xref: /linux/tools/perf/util/arm-spe.c (revision a85ac2dae6bf8050deaf9839e4c0328756b48720)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #include "../../arch/arm64/include/asm/cputype.h"
38 #define MAX_TIMESTAMP (~0ULL)
39 
40 #define is_ldst_op(op)		(!!((op) & ARM_SPE_OP_LDST))
41 
42 struct arm_spe {
43 	struct auxtrace			auxtrace;
44 	struct auxtrace_queues		queues;
45 	struct auxtrace_heap		heap;
46 	struct itrace_synth_opts        synth_opts;
47 	u32				auxtrace_type;
48 	struct perf_session		*session;
49 	struct machine			*machine;
50 	u32				pmu_type;
51 
52 	struct perf_tsc_conversion	tc;
53 
54 	u8				timeless_decoding;
55 	u8				data_queued;
56 
57 	u64				sample_type;
58 	u8				sample_flc;
59 	u8				sample_llc;
60 	u8				sample_tlb;
61 	u8				sample_branch;
62 	u8				sample_remote_access;
63 	u8				sample_memory;
64 	u8				sample_instructions;
65 
66 	u64				l1d_miss_id;
67 	u64				l1d_access_id;
68 	u64				llc_miss_id;
69 	u64				llc_access_id;
70 	u64				tlb_miss_id;
71 	u64				tlb_access_id;
72 	u64				branch_id;
73 	u64				remote_access_id;
74 	u64				memory_id;
75 	u64				instructions_id;
76 
77 	u64				kernel_start;
78 
79 	unsigned long			num_events;
80 	u8				use_ctx_pkt_for_pid;
81 
82 	u64				**metadata;
83 	u64				metadata_ver;
84 	u64				metadata_nr_cpu;
85 	bool				is_homogeneous;
86 };
87 
88 struct arm_spe_queue {
89 	struct arm_spe			*spe;
90 	unsigned int			queue_nr;
91 	struct auxtrace_buffer		*buffer;
92 	struct auxtrace_buffer		*old_buffer;
93 	union perf_event		*event_buf;
94 	bool				on_heap;
95 	bool				done;
96 	pid_t				pid;
97 	pid_t				tid;
98 	int				cpu;
99 	struct arm_spe_decoder		*decoder;
100 	u64				time;
101 	u64				timestamp;
102 	struct thread			*thread;
103 	u64				sample_count;
104 	u32				flags;
105 	struct branch_stack		*last_branch;
106 };
107 
108 struct data_source_handle {
109 	const struct midr_range *midr_ranges;
110 	void (*ds_synth)(const struct arm_spe_record *record,
111 			 union perf_mem_data_src *data_src);
112 };
113 
114 #define DS(range, func)					\
115 	{						\
116 		.midr_ranges = range,			\
117 		.ds_synth = arm_spe__synth_##func,	\
118 	}
119 
120 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
121 			 unsigned char *buf, size_t len)
122 {
123 	struct arm_spe_pkt packet;
124 	size_t pos = 0;
125 	int ret, pkt_len, i;
126 	char desc[ARM_SPE_PKT_DESC_MAX];
127 	const char *color = PERF_COLOR_BLUE;
128 
129 	color_fprintf(stdout, color,
130 		      ". ... ARM SPE data: size %#zx bytes\n",
131 		      len);
132 
133 	while (len) {
134 		ret = arm_spe_get_packet(buf, len, &packet);
135 		if (ret > 0)
136 			pkt_len = ret;
137 		else
138 			pkt_len = 1;
139 		printf(".");
140 		color_fprintf(stdout, color, "  %08zx: ", pos);
141 		for (i = 0; i < pkt_len; i++)
142 			color_fprintf(stdout, color, " %02x", buf[i]);
143 		for (; i < 16; i++)
144 			color_fprintf(stdout, color, "   ");
145 		if (ret > 0) {
146 			ret = arm_spe_pkt_desc(&packet, desc,
147 					       ARM_SPE_PKT_DESC_MAX);
148 			if (!ret)
149 				color_fprintf(stdout, color, " %s\n", desc);
150 		} else {
151 			color_fprintf(stdout, color, " Bad packet!\n");
152 		}
153 		pos += pkt_len;
154 		buf += pkt_len;
155 		len -= pkt_len;
156 	}
157 }
158 
159 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
160 			       size_t len)
161 {
162 	printf(".\n");
163 	arm_spe_dump(spe, buf, len);
164 }
165 
166 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
167 {
168 	struct arm_spe_queue *speq = data;
169 	struct auxtrace_buffer *buffer = speq->buffer;
170 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
171 	struct auxtrace_queue *queue;
172 
173 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
174 
175 	buffer = auxtrace_buffer__next(queue, buffer);
176 	/* If no more data, drop the previous auxtrace_buffer and return */
177 	if (!buffer) {
178 		if (old_buffer)
179 			auxtrace_buffer__drop_data(old_buffer);
180 		b->len = 0;
181 		return 0;
182 	}
183 
184 	speq->buffer = buffer;
185 
186 	/* If the aux_buffer doesn't have data associated, try to load it */
187 	if (!buffer->data) {
188 		/* get the file desc associated with the perf data file */
189 		int fd = perf_data__fd(speq->spe->session->data);
190 
191 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
192 		if (!buffer->data)
193 			return -ENOMEM;
194 	}
195 
196 	b->len = buffer->size;
197 	b->buf = buffer->data;
198 
199 	if (b->len) {
200 		if (old_buffer)
201 			auxtrace_buffer__drop_data(old_buffer);
202 		speq->old_buffer = buffer;
203 	} else {
204 		auxtrace_buffer__drop_data(buffer);
205 		return arm_spe_get_trace(b, data);
206 	}
207 
208 	return 0;
209 }
210 
211 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
212 		unsigned int queue_nr)
213 {
214 	struct arm_spe_params params = { .get_trace = 0, };
215 	struct arm_spe_queue *speq;
216 
217 	speq = zalloc(sizeof(*speq));
218 	if (!speq)
219 		return NULL;
220 
221 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
222 	if (!speq->event_buf)
223 		goto out_free;
224 
225 	speq->spe = spe;
226 	speq->queue_nr = queue_nr;
227 	speq->pid = -1;
228 	speq->tid = -1;
229 	speq->cpu = -1;
230 
231 	/* params set */
232 	params.get_trace = arm_spe_get_trace;
233 	params.data = speq;
234 
235 	if (spe->synth_opts.last_branch) {
236 		size_t sz = sizeof(struct branch_stack);
237 
238 		/* Allocate up to two entries for PBT + TGT */
239 		sz += sizeof(struct branch_entry) *
240 			min(spe->synth_opts.last_branch_sz, 2U);
241 		speq->last_branch = zalloc(sz);
242 		if (!speq->last_branch)
243 			goto out_free;
244 	}
245 
246 	/* create new decoder */
247 	speq->decoder = arm_spe_decoder_new(&params);
248 	if (!speq->decoder)
249 		goto out_free;
250 
251 	return speq;
252 
253 out_free:
254 	zfree(&speq->event_buf);
255 	zfree(&speq->last_branch);
256 	free(speq);
257 
258 	return NULL;
259 }
260 
261 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
262 {
263 	return ip >= spe->kernel_start ?
264 		PERF_RECORD_MISC_KERNEL :
265 		PERF_RECORD_MISC_USER;
266 }
267 
268 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
269 				    struct auxtrace_queue *queue)
270 {
271 	struct arm_spe_queue *speq = queue->priv;
272 	pid_t tid;
273 
274 	tid = machine__get_current_tid(spe->machine, speq->cpu);
275 	if (tid != -1) {
276 		speq->tid = tid;
277 		thread__zput(speq->thread);
278 	} else
279 		speq->tid = queue->tid;
280 
281 	if ((!speq->thread) && (speq->tid != -1)) {
282 		speq->thread = machine__find_thread(spe->machine, -1,
283 						    speq->tid);
284 	}
285 
286 	if (speq->thread) {
287 		speq->pid = thread__pid(speq->thread);
288 		if (queue->cpu == -1)
289 			speq->cpu = thread__cpu(speq->thread);
290 	}
291 }
292 
293 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
294 {
295 	struct arm_spe *spe = speq->spe;
296 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
297 
298 	if (err)
299 		return err;
300 
301 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
302 
303 	return 0;
304 }
305 
306 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
307 {
308 	u64 i;
309 
310 	if (!spe->metadata)
311 		return NULL;
312 
313 	for (i = 0; i < spe->metadata_nr_cpu; i++)
314 		if (spe->metadata[i][ARM_SPE_CPU] == cpu)
315 			return spe->metadata[i];
316 
317 	return NULL;
318 }
319 
320 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
321 {
322 	struct simd_flags simd_flags = {};
323 
324 	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
325 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
326 
327 	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
328 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
329 
330 	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
331 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
332 
333 	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
334 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
335 
336 	return simd_flags;
337 }
338 
339 static void arm_spe_prep_sample(struct arm_spe *spe,
340 				struct arm_spe_queue *speq,
341 				union perf_event *event,
342 				struct perf_sample *sample)
343 {
344 	struct arm_spe_record *record = &speq->decoder->record;
345 
346 	if (!spe->timeless_decoding)
347 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
348 
349 	sample->ip = record->from_ip;
350 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
351 	sample->pid = speq->pid;
352 	sample->tid = speq->tid;
353 	sample->period = spe->synth_opts.period;
354 	sample->cpu = speq->cpu;
355 	sample->simd_flags = arm_spe__synth_simd_flags(record);
356 
357 	event->sample.header.type = PERF_RECORD_SAMPLE;
358 	event->sample.header.misc = sample->cpumode;
359 	event->sample.header.size = sizeof(struct perf_event_header);
360 }
361 
362 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
363 {
364 	struct arm_spe *spe = speq->spe;
365 	struct arm_spe_record *record = &speq->decoder->record;
366 	struct branch_stack *bstack = speq->last_branch;
367 	struct branch_flags *bs_flags;
368 	unsigned int last_branch_sz = spe->synth_opts.last_branch_sz;
369 	bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH);
370 	bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt;
371 	size_t sz = sizeof(struct branch_stack) +
372 		    sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */;
373 	int i = 0;
374 
375 	/* Clean up branch stack */
376 	memset(bstack, 0x0, sz);
377 
378 	if (!have_tgt && !have_pbt)
379 		return;
380 
381 	if (have_tgt) {
382 		bstack->entries[i].from = record->from_ip;
383 		bstack->entries[i].to = record->to_ip;
384 
385 		bs_flags = &bstack->entries[i].flags;
386 		bs_flags->value = 0;
387 
388 		if (record->op & ARM_SPE_OP_BR_CR_BL) {
389 			if (record->op & ARM_SPE_OP_BR_COND)
390 				bs_flags->type |= PERF_BR_COND_CALL;
391 			else
392 				bs_flags->type |= PERF_BR_CALL;
393 		/*
394 		 * Indirect branch instruction without link (e.g. BR),
395 		 * take this case as function return.
396 		 */
397 		} else if (record->op & ARM_SPE_OP_BR_CR_RET ||
398 			   record->op & ARM_SPE_OP_BR_INDIRECT) {
399 			if (record->op & ARM_SPE_OP_BR_COND)
400 				bs_flags->type |= PERF_BR_COND_RET;
401 			else
402 				bs_flags->type |= PERF_BR_RET;
403 		} else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) {
404 			if (record->op & ARM_SPE_OP_BR_COND)
405 				bs_flags->type |= PERF_BR_COND;
406 			else
407 				bs_flags->type |= PERF_BR_UNCOND;
408 		} else {
409 			if (record->op & ARM_SPE_OP_BR_COND)
410 				bs_flags->type |= PERF_BR_COND;
411 			else
412 				bs_flags->type |= PERF_BR_UNKNOWN;
413 		}
414 
415 		if (record->type & ARM_SPE_BRANCH_MISS) {
416 			bs_flags->mispred = 1;
417 			bs_flags->predicted = 0;
418 		} else {
419 			bs_flags->mispred = 0;
420 			bs_flags->predicted = 1;
421 		}
422 
423 		if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
424 			bs_flags->not_taken = 1;
425 
426 		if (record->type & ARM_SPE_IN_TXN)
427 			bs_flags->in_tx = 1;
428 
429 		bs_flags->cycles = min(record->latency, 0xFFFFU);
430 		i++;
431 	}
432 
433 	if (have_pbt) {
434 		bs_flags = &bstack->entries[i].flags;
435 		bs_flags->type |= PERF_BR_UNKNOWN;
436 		bstack->entries[i].to = record->prev_br_tgt;
437 		i++;
438 	}
439 
440 	bstack->nr = i;
441 	bstack->hw_idx = -1ULL;
442 }
443 
444 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
445 {
446 	event->header.size = perf_event__sample_event_size(sample, type, 0);
447 	return perf_event__synthesize_sample(event, type, 0, sample);
448 }
449 
450 static inline int
451 arm_spe_deliver_synth_event(struct arm_spe *spe,
452 			    struct arm_spe_queue *speq __maybe_unused,
453 			    union perf_event *event,
454 			    struct perf_sample *sample)
455 {
456 	int ret;
457 
458 	if (spe->synth_opts.inject) {
459 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
460 		if (ret)
461 			return ret;
462 	}
463 
464 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
465 	if (ret)
466 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
467 
468 	return ret;
469 }
470 
471 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
472 				     u64 spe_events_id, u64 data_src)
473 {
474 	struct arm_spe *spe = speq->spe;
475 	struct arm_spe_record *record = &speq->decoder->record;
476 	union perf_event *event = speq->event_buf;
477 	struct perf_sample sample;
478 	int ret;
479 
480 	perf_sample__init(&sample, /*all=*/true);
481 	arm_spe_prep_sample(spe, speq, event, &sample);
482 
483 	sample.id = spe_events_id;
484 	sample.stream_id = spe_events_id;
485 	sample.addr = record->virt_addr;
486 	sample.phys_addr = record->phys_addr;
487 	sample.data_src = data_src;
488 	sample.weight = record->latency;
489 
490 	ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
491 	perf_sample__exit(&sample);
492 	return ret;
493 }
494 
495 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
496 					u64 spe_events_id)
497 {
498 	struct arm_spe *spe = speq->spe;
499 	struct arm_spe_record *record = &speq->decoder->record;
500 	union perf_event *event = speq->event_buf;
501 	struct perf_sample sample;
502 	int ret;
503 
504 	perf_sample__init(&sample, /*all=*/true);
505 	arm_spe_prep_sample(spe, speq, event, &sample);
506 
507 	sample.id = spe_events_id;
508 	sample.stream_id = spe_events_id;
509 	sample.addr = record->to_ip;
510 	sample.weight = record->latency;
511 	sample.flags = speq->flags;
512 	sample.branch_stack = speq->last_branch;
513 
514 	ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
515 	perf_sample__exit(&sample);
516 	return ret;
517 }
518 
519 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
520 					     u64 spe_events_id, u64 data_src)
521 {
522 	struct arm_spe *spe = speq->spe;
523 	struct arm_spe_record *record = &speq->decoder->record;
524 	union perf_event *event = speq->event_buf;
525 	struct perf_sample sample;
526 	int ret;
527 
528 	perf_sample__init(&sample, /*all=*/true);
529 	arm_spe_prep_sample(spe, speq, event, &sample);
530 
531 	sample.id = spe_events_id;
532 	sample.stream_id = spe_events_id;
533 	sample.addr = record->to_ip;
534 	sample.phys_addr = record->phys_addr;
535 	sample.data_src = data_src;
536 	sample.weight = record->latency;
537 	sample.flags = speq->flags;
538 	sample.branch_stack = speq->last_branch;
539 
540 	ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
541 	perf_sample__exit(&sample);
542 	return ret;
543 }
544 
545 static const struct midr_range common_ds_encoding_cpus[] = {
546 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
547 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
548 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
549 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
550 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
551 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
552 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
553 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
554 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
555 	{},
556 };
557 
558 static const struct midr_range ampereone_ds_encoding_cpus[] = {
559 	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
560 	{},
561 };
562 
563 static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
564 	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
565 	{},
566 };
567 
568 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
569 {
570 	const struct arm_spe_record *record = &speq->decoder->record;
571 
572 	speq->flags = 0;
573 	if (record->op & ARM_SPE_OP_BRANCH_ERET) {
574 		speq->flags = PERF_IP_FLAG_BRANCH;
575 
576 		if (record->type & ARM_SPE_BRANCH_MISS)
577 			speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
578 
579 		if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
580 			speq->flags |= PERF_IP_FLAG_NOT_TAKEN;
581 
582 		if (record->type & ARM_SPE_IN_TXN)
583 			speq->flags |= PERF_IP_FLAG_IN_TX;
584 
585 		if (record->op & ARM_SPE_OP_BR_COND)
586 			speq->flags |= PERF_IP_FLAG_CONDITIONAL;
587 
588 		if (record->op & ARM_SPE_OP_BR_CR_BL)
589 			speq->flags |= PERF_IP_FLAG_CALL;
590 		else if (record->op & ARM_SPE_OP_BR_CR_RET)
591 			speq->flags |= PERF_IP_FLAG_RETURN;
592 		/*
593 		 * Indirect branch instruction without link (e.g. BR),
594 		 * take it as a function return.
595 		 */
596 		else if (record->op & ARM_SPE_OP_BR_INDIRECT)
597 			speq->flags |= PERF_IP_FLAG_RETURN;
598 	}
599 }
600 
601 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
602 					      union perf_mem_data_src *data_src)
603 {
604 	/*
605 	 * Even though four levels of cache hierarchy are possible, no known
606 	 * production Neoverse systems currently include more than three levels
607 	 * so for the time being we assume three exist. If a production system
608 	 * is built with four the this function would have to be changed to
609 	 * detect the number of levels for reporting.
610 	 */
611 
612 	/*
613 	 * We have no data on the hit level or data source for stores in the
614 	 * Neoverse SPE records.
615 	 */
616 	if (record->op & ARM_SPE_OP_ST) {
617 		data_src->mem_lvl = PERF_MEM_LVL_NA;
618 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
619 		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
620 		return;
621 	}
622 
623 	switch (record->source) {
624 	case ARM_SPE_COMMON_DS_L1D:
625 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
626 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
627 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
628 		break;
629 	case ARM_SPE_COMMON_DS_L2:
630 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
631 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
632 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
633 		break;
634 	case ARM_SPE_COMMON_DS_PEER_CORE:
635 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
636 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
637 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
638 		break;
639 	/*
640 	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
641 	 * transfer, so set SNOOPX_PEER
642 	 */
643 	case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
644 	case ARM_SPE_COMMON_DS_PEER_CLUSTER:
645 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
646 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
647 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
648 		break;
649 	/*
650 	 * System cache is assumed to be L3
651 	 */
652 	case ARM_SPE_COMMON_DS_SYS_CACHE:
653 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
654 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
655 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
656 		break;
657 	/*
658 	 * We don't know what level it hit in, except it came from the other
659 	 * socket
660 	 */
661 	case ARM_SPE_COMMON_DS_REMOTE:
662 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
663 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
664 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
665 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
666 		break;
667 	case ARM_SPE_COMMON_DS_DRAM:
668 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
669 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
670 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
671 		break;
672 	default:
673 		break;
674 	}
675 }
676 
677 /*
678  * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
679  * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
680  */
681 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
682 						 union perf_mem_data_src *data_src)
683 {
684 	struct arm_spe_record common_record;
685 
686 	switch (record->source) {
687 	case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
688 		common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
689 		break;
690 	case ARM_SPE_AMPEREONE_SLC:
691 		common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
692 		break;
693 	case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
694 		common_record.source = ARM_SPE_COMMON_DS_REMOTE;
695 		break;
696 	case ARM_SPE_AMPEREONE_DDR:
697 		common_record.source = ARM_SPE_COMMON_DS_DRAM;
698 		break;
699 	case ARM_SPE_AMPEREONE_L1D:
700 		common_record.source = ARM_SPE_COMMON_DS_L1D;
701 		break;
702 	case ARM_SPE_AMPEREONE_L2D:
703 		common_record.source = ARM_SPE_COMMON_DS_L2;
704 		break;
705 	default:
706 		pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
707 				record->source);
708 		return;
709 	}
710 
711 	common_record.op = record->op;
712 	arm_spe__synth_data_source_common(&common_record, data_src);
713 }
714 
715 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
716 						union perf_mem_data_src *data_src)
717 {
718 	/* Use common synthesis method to handle store operations */
719 	if (record->op & ARM_SPE_OP_ST) {
720 		arm_spe__synth_data_source_common(record, data_src);
721 		return;
722 	}
723 
724 	switch (record->source) {
725 	case ARM_SPE_HISI_HIP_PEER_CPU:
726 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
727 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
728 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
729 		break;
730 	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
731 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
732 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
733 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
734 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
735 		break;
736 	case ARM_SPE_HISI_HIP_L3:
737 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
738 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
739 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
740 		break;
741 	case ARM_SPE_HISI_HIP_L3_HITM:
742 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
743 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
744 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
745 		break;
746 	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
747 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
748 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
749 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
750 		break;
751 	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
752 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
753 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
754 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
755 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
756 		break;
757 	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
758 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
759 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
760 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
761 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
762 		break;
763 	case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
764 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
765 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
766 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
767 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
768 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
769 		break;
770 	case ARM_SPE_HISI_HIP_LOCAL_MEM:
771 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
772 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
773 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
774 		break;
775 	case ARM_SPE_HISI_HIP_REMOTE_MEM:
776 		data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
777 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
778 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
779 		break;
780 	case ARM_SPE_HISI_HIP_NC_DEV:
781 		data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
782 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
783 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
784 		break;
785 	case ARM_SPE_HISI_HIP_L2:
786 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
787 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
788 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
789 		break;
790 	case ARM_SPE_HISI_HIP_L2_HITM:
791 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
792 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
793 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
794 		break;
795 	case ARM_SPE_HISI_HIP_L1:
796 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
797 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
798 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
799 		break;
800 	default:
801 		break;
802 	}
803 }
804 
805 static const struct data_source_handle data_source_handles[] = {
806 	DS(common_ds_encoding_cpus, data_source_common),
807 	DS(ampereone_ds_encoding_cpus, data_source_ampereone),
808 	DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
809 };
810 
811 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
812 					union perf_mem_data_src *data_src)
813 {
814 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
815 		data_src->mem_lvl = PERF_MEM_LVL_L3;
816 
817 		if (record->type & ARM_SPE_LLC_MISS)
818 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
819 		else
820 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
821 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
822 		data_src->mem_lvl = PERF_MEM_LVL_L1;
823 
824 		if (record->type & ARM_SPE_L1D_MISS)
825 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
826 		else
827 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
828 	}
829 
830 	if (record->type & ARM_SPE_REMOTE_ACCESS)
831 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
832 }
833 
834 static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
835 			      const struct arm_spe_record *record,
836 			      union perf_mem_data_src *data_src)
837 {
838 	struct arm_spe *spe = speq->spe;
839 	u64 *metadata = NULL;
840 	u64 midr;
841 	unsigned int i;
842 
843 	/* Metadata version 1 assumes all CPUs are the same (old behavior) */
844 	if (spe->metadata_ver == 1) {
845 		const char *cpuid;
846 
847 		pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
848 		cpuid = perf_env__cpuid(perf_session__env(spe->session));
849 		midr = strtol(cpuid, NULL, 16);
850 	} else {
851 		/* CPU ID is -1 for per-thread mode */
852 		if (speq->cpu < 0) {
853 			/*
854 			 * On the heterogeneous system, due to CPU ID is -1,
855 			 * cannot confirm the data source packet is supported.
856 			 */
857 			if (!spe->is_homogeneous)
858 				return false;
859 
860 			/* In homogeneous system, simply use CPU0's metadata */
861 			if (spe->metadata)
862 				metadata = spe->metadata[0];
863 		} else {
864 			metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
865 		}
866 
867 		if (!metadata)
868 			return false;
869 
870 		midr = metadata[ARM_SPE_CPU_MIDR];
871 	}
872 
873 	for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
874 		if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
875 			data_source_handles[i].ds_synth(record, data_src);
876 			return true;
877 		}
878 	}
879 
880 	return false;
881 }
882 
883 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
884 				      const struct arm_spe_record *record)
885 {
886 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
887 
888 	/* Only synthesize data source for LDST operations */
889 	if (!is_ldst_op(record->op))
890 		return 0;
891 
892 	if (record->op & ARM_SPE_OP_LD)
893 		data_src.mem_op = PERF_MEM_OP_LOAD;
894 	else if (record->op & ARM_SPE_OP_ST)
895 		data_src.mem_op = PERF_MEM_OP_STORE;
896 	else
897 		return 0;
898 
899 	if (!arm_spe__synth_ds(speq, record, &data_src))
900 		arm_spe__synth_memory_level(record, &data_src);
901 
902 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
903 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
904 
905 		if (record->type & ARM_SPE_TLB_MISS)
906 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
907 		else
908 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
909 	}
910 
911 	return data_src.val;
912 }
913 
914 static int arm_spe_sample(struct arm_spe_queue *speq)
915 {
916 	const struct arm_spe_record *record = &speq->decoder->record;
917 	struct arm_spe *spe = speq->spe;
918 	u64 data_src;
919 	int err;
920 
921 	/*
922 	 * Discard all samples until period is reached
923 	 */
924 	speq->sample_count++;
925 	if (speq->sample_count < spe->synth_opts.period)
926 		return 0;
927 	speq->sample_count = 0;
928 
929 	arm_spe__sample_flags(speq);
930 	data_src = arm_spe__synth_data_source(speq, record);
931 
932 	if (spe->sample_flc) {
933 		if (record->type & ARM_SPE_L1D_MISS) {
934 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
935 							data_src);
936 			if (err)
937 				return err;
938 		}
939 
940 		if (record->type & ARM_SPE_L1D_ACCESS) {
941 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
942 							data_src);
943 			if (err)
944 				return err;
945 		}
946 	}
947 
948 	if (spe->sample_llc) {
949 		if (record->type & ARM_SPE_LLC_MISS) {
950 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
951 							data_src);
952 			if (err)
953 				return err;
954 		}
955 
956 		if (record->type & ARM_SPE_LLC_ACCESS) {
957 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
958 							data_src);
959 			if (err)
960 				return err;
961 		}
962 	}
963 
964 	if (spe->sample_tlb) {
965 		if (record->type & ARM_SPE_TLB_MISS) {
966 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
967 							data_src);
968 			if (err)
969 				return err;
970 		}
971 
972 		if (record->type & ARM_SPE_TLB_ACCESS) {
973 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
974 							data_src);
975 			if (err)
976 				return err;
977 		}
978 	}
979 
980 	if (spe->synth_opts.last_branch &&
981 	    (spe->sample_branch || spe->sample_instructions))
982 		arm_spe__prep_branch_stack(speq);
983 
984 	if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
985 		err = arm_spe__synth_branch_sample(speq, spe->branch_id);
986 		if (err)
987 			return err;
988 	}
989 
990 	if (spe->sample_remote_access &&
991 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
992 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
993 						data_src);
994 		if (err)
995 			return err;
996 	}
997 
998 	/*
999 	 * When data_src is zero it means the record is not a memory operation,
1000 	 * skip to synthesize memory sample for this case.
1001 	 */
1002 	if (spe->sample_memory && is_ldst_op(record->op)) {
1003 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
1004 		if (err)
1005 			return err;
1006 	}
1007 
1008 	if (spe->sample_instructions) {
1009 		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
1010 		if (err)
1011 			return err;
1012 	}
1013 
1014 	return 0;
1015 }
1016 
1017 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
1018 {
1019 	struct arm_spe *spe = speq->spe;
1020 	struct arm_spe_record *record;
1021 	int ret;
1022 
1023 	if (!spe->kernel_start)
1024 		spe->kernel_start = machine__kernel_start(spe->machine);
1025 
1026 	while (1) {
1027 		/*
1028 		 * The usual logic is firstly to decode the packets, and then
1029 		 * based the record to synthesize sample; but here the flow is
1030 		 * reversed: it calls arm_spe_sample() for synthesizing samples
1031 		 * prior to arm_spe_decode().
1032 		 *
1033 		 * Two reasons for this code logic:
1034 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
1035 		 * has decoded trace data and generated a record, but the record
1036 		 * is left to generate sample until run to here, so it's correct
1037 		 * to synthesize sample for the left record.
1038 		 * 2. After decoding trace data, it needs to compare the record
1039 		 * timestamp with the coming perf event, if the record timestamp
1040 		 * is later than the perf event, it needs bail out and pushs the
1041 		 * record into auxtrace heap, thus the record can be deferred to
1042 		 * synthesize sample until run to here at the next time; so this
1043 		 * can correlate samples between Arm SPE trace data and other
1044 		 * perf events with correct time ordering.
1045 		 */
1046 
1047 		/*
1048 		 * Update pid/tid info.
1049 		 */
1050 		record = &speq->decoder->record;
1051 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
1052 			ret = arm_spe_set_tid(speq, record->context_id);
1053 			if (ret)
1054 				return ret;
1055 
1056 			spe->use_ctx_pkt_for_pid = true;
1057 		}
1058 
1059 		ret = arm_spe_sample(speq);
1060 		if (ret)
1061 			return ret;
1062 
1063 		ret = arm_spe_decode(speq->decoder);
1064 		if (!ret) {
1065 			pr_debug("No data or all data has been processed.\n");
1066 			return 1;
1067 		}
1068 
1069 		/*
1070 		 * Error is detected when decode SPE trace data, continue to
1071 		 * the next trace data and find out more records.
1072 		 */
1073 		if (ret < 0)
1074 			continue;
1075 
1076 		record = &speq->decoder->record;
1077 
1078 		/* Update timestamp for the last record */
1079 		if (record->timestamp > speq->timestamp)
1080 			speq->timestamp = record->timestamp;
1081 
1082 		/*
1083 		 * If the timestamp of the queue is later than timestamp of the
1084 		 * coming perf event, bail out so can allow the perf event to
1085 		 * be processed ahead.
1086 		 */
1087 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
1088 			*timestamp = speq->timestamp;
1089 			return 0;
1090 		}
1091 	}
1092 
1093 	return 0;
1094 }
1095 
1096 static int arm_spe__setup_queue(struct arm_spe *spe,
1097 			       struct auxtrace_queue *queue,
1098 			       unsigned int queue_nr)
1099 {
1100 	struct arm_spe_queue *speq = queue->priv;
1101 	struct arm_spe_record *record;
1102 
1103 	if (list_empty(&queue->head) || speq)
1104 		return 0;
1105 
1106 	speq = arm_spe__alloc_queue(spe, queue_nr);
1107 
1108 	if (!speq)
1109 		return -ENOMEM;
1110 
1111 	queue->priv = speq;
1112 
1113 	if (queue->cpu != -1)
1114 		speq->cpu = queue->cpu;
1115 
1116 	if (!speq->on_heap) {
1117 		int ret;
1118 
1119 		if (spe->timeless_decoding)
1120 			return 0;
1121 
1122 retry:
1123 		ret = arm_spe_decode(speq->decoder);
1124 
1125 		if (!ret)
1126 			return 0;
1127 
1128 		if (ret < 0)
1129 			goto retry;
1130 
1131 		record = &speq->decoder->record;
1132 
1133 		speq->timestamp = record->timestamp;
1134 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
1135 		if (ret)
1136 			return ret;
1137 		speq->on_heap = true;
1138 	}
1139 
1140 	return 0;
1141 }
1142 
1143 static int arm_spe__setup_queues(struct arm_spe *spe)
1144 {
1145 	unsigned int i;
1146 	int ret;
1147 
1148 	for (i = 0; i < spe->queues.nr_queues; i++) {
1149 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
1150 		if (ret)
1151 			return ret;
1152 	}
1153 
1154 	return 0;
1155 }
1156 
1157 static int arm_spe__update_queues(struct arm_spe *spe)
1158 {
1159 	if (spe->queues.new_data) {
1160 		spe->queues.new_data = false;
1161 		return arm_spe__setup_queues(spe);
1162 	}
1163 
1164 	return 0;
1165 }
1166 
1167 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
1168 {
1169 	struct evsel *evsel;
1170 	struct evlist *evlist = spe->session->evlist;
1171 	bool timeless_decoding = true;
1172 
1173 	/*
1174 	 * Circle through the list of event and complain if we find one
1175 	 * with the time bit set.
1176 	 */
1177 	evlist__for_each_entry(evlist, evsel) {
1178 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
1179 			timeless_decoding = false;
1180 	}
1181 
1182 	return timeless_decoding;
1183 }
1184 
1185 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
1186 {
1187 	unsigned int queue_nr;
1188 	u64 ts;
1189 	int ret;
1190 
1191 	while (1) {
1192 		struct auxtrace_queue *queue;
1193 		struct arm_spe_queue *speq;
1194 
1195 		if (!spe->heap.heap_cnt)
1196 			return 0;
1197 
1198 		if (spe->heap.heap_array[0].ordinal >= timestamp)
1199 			return 0;
1200 
1201 		queue_nr = spe->heap.heap_array[0].queue_nr;
1202 		queue = &spe->queues.queue_array[queue_nr];
1203 		speq = queue->priv;
1204 
1205 		auxtrace_heap__pop(&spe->heap);
1206 
1207 		if (spe->heap.heap_cnt) {
1208 			ts = spe->heap.heap_array[0].ordinal + 1;
1209 			if (ts > timestamp)
1210 				ts = timestamp;
1211 		} else {
1212 			ts = timestamp;
1213 		}
1214 
1215 		/*
1216 		 * A previous context-switch event has set pid/tid in the machine's context, so
1217 		 * here we need to update the pid/tid in the thread and SPE queue.
1218 		 */
1219 		if (!spe->use_ctx_pkt_for_pid)
1220 			arm_spe_set_pid_tid_cpu(spe, queue);
1221 
1222 		ret = arm_spe_run_decoder(speq, &ts);
1223 		if (ret < 0) {
1224 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
1225 			return ret;
1226 		}
1227 
1228 		if (!ret) {
1229 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
1230 			if (ret < 0)
1231 				return ret;
1232 		} else {
1233 			speq->on_heap = false;
1234 		}
1235 	}
1236 
1237 	return 0;
1238 }
1239 
1240 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
1241 					    u64 time_)
1242 {
1243 	struct auxtrace_queues *queues = &spe->queues;
1244 	unsigned int i;
1245 	u64 ts = 0;
1246 
1247 	for (i = 0; i < queues->nr_queues; i++) {
1248 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
1249 		struct arm_spe_queue *speq = queue->priv;
1250 
1251 		if (speq && (tid == -1 || speq->tid == tid)) {
1252 			speq->time = time_;
1253 			arm_spe_set_pid_tid_cpu(spe, queue);
1254 			arm_spe_run_decoder(speq, &ts);
1255 		}
1256 	}
1257 	return 0;
1258 }
1259 
1260 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
1261 				  struct perf_sample *sample)
1262 {
1263 	pid_t pid, tid;
1264 	int cpu;
1265 
1266 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
1267 		return 0;
1268 
1269 	pid = event->context_switch.next_prev_pid;
1270 	tid = event->context_switch.next_prev_tid;
1271 	cpu = sample->cpu;
1272 
1273 	if (tid == -1)
1274 		pr_warning("context_switch event has no tid\n");
1275 
1276 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
1277 }
1278 
1279 static int arm_spe_process_event(struct perf_session *session,
1280 				 union perf_event *event,
1281 				 struct perf_sample *sample,
1282 				 const struct perf_tool *tool)
1283 {
1284 	int err = 0;
1285 	u64 timestamp;
1286 	struct arm_spe *spe = container_of(session->auxtrace,
1287 			struct arm_spe, auxtrace);
1288 
1289 	if (dump_trace)
1290 		return 0;
1291 
1292 	if (!tool->ordered_events) {
1293 		pr_err("SPE trace requires ordered events\n");
1294 		return -EINVAL;
1295 	}
1296 
1297 	if (sample->time && (sample->time != (u64) -1))
1298 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1299 	else
1300 		timestamp = 0;
1301 
1302 	if (timestamp || spe->timeless_decoding) {
1303 		err = arm_spe__update_queues(spe);
1304 		if (err)
1305 			return err;
1306 	}
1307 
1308 	if (spe->timeless_decoding) {
1309 		if (event->header.type == PERF_RECORD_EXIT) {
1310 			err = arm_spe_process_timeless_queues(spe,
1311 					event->fork.tid,
1312 					sample->time);
1313 		}
1314 	} else if (timestamp) {
1315 		err = arm_spe_process_queues(spe, timestamp);
1316 		if (err)
1317 			return err;
1318 
1319 		if (!spe->use_ctx_pkt_for_pid &&
1320 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1321 		    event->header.type == PERF_RECORD_SWITCH))
1322 			err = arm_spe_context_switch(spe, event, sample);
1323 	}
1324 
1325 	return err;
1326 }
1327 
1328 static int arm_spe_process_auxtrace_event(struct perf_session *session,
1329 					  union perf_event *event,
1330 					  const struct perf_tool *tool __maybe_unused)
1331 {
1332 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1333 					     auxtrace);
1334 
1335 	if (!spe->data_queued) {
1336 		struct auxtrace_buffer *buffer;
1337 		off_t data_offset;
1338 		int fd = perf_data__fd(session->data);
1339 		int err;
1340 
1341 		if (perf_data__is_pipe(session->data)) {
1342 			data_offset = 0;
1343 		} else {
1344 			data_offset = lseek(fd, 0, SEEK_CUR);
1345 			if (data_offset == -1)
1346 				return -errno;
1347 		}
1348 
1349 		err = auxtrace_queues__add_event(&spe->queues, session, event,
1350 				data_offset, &buffer);
1351 		if (err)
1352 			return err;
1353 
1354 		/* Dump here now we have copied a piped trace out of the pipe */
1355 		if (dump_trace) {
1356 			if (auxtrace_buffer__get_data(buffer, fd)) {
1357 				arm_spe_dump_event(spe, buffer->data,
1358 						buffer->size);
1359 				auxtrace_buffer__put_data(buffer);
1360 			}
1361 		}
1362 	}
1363 
1364 	return 0;
1365 }
1366 
1367 static int arm_spe_flush(struct perf_session *session __maybe_unused,
1368 			 const struct perf_tool *tool __maybe_unused)
1369 {
1370 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1371 			auxtrace);
1372 	int ret;
1373 
1374 	if (dump_trace)
1375 		return 0;
1376 
1377 	if (!tool->ordered_events)
1378 		return -EINVAL;
1379 
1380 	ret = arm_spe__update_queues(spe);
1381 	if (ret < 0)
1382 		return ret;
1383 
1384 	if (spe->timeless_decoding)
1385 		return arm_spe_process_timeless_queues(spe, -1,
1386 				MAX_TIMESTAMP - 1);
1387 
1388 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1389 	if (ret)
1390 		return ret;
1391 
1392 	if (!spe->use_ctx_pkt_for_pid)
1393 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1394 			    "Matching of TIDs to SPE events could be inaccurate.\n");
1395 
1396 	return 0;
1397 }
1398 
1399 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1400 {
1401 	u64 *metadata;
1402 
1403 	metadata = zalloc(per_cpu_size);
1404 	if (!metadata)
1405 		return NULL;
1406 
1407 	memcpy(metadata, buf, per_cpu_size);
1408 	return metadata;
1409 }
1410 
1411 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1412 {
1413 	int i;
1414 
1415 	for (i = 0; i < nr_cpu; i++)
1416 		zfree(&metadata[i]);
1417 	free(metadata);
1418 }
1419 
1420 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1421 				     u64 *ver, int *nr_cpu)
1422 {
1423 	u64 *ptr = (u64 *)info->priv;
1424 	u64 metadata_size;
1425 	u64 **metadata = NULL;
1426 	int hdr_sz, per_cpu_sz, i;
1427 
1428 	metadata_size = info->header.size -
1429 		sizeof(struct perf_record_auxtrace_info);
1430 
1431 	/* Metadata version 1 */
1432 	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1433 		*ver = 1;
1434 		*nr_cpu = 0;
1435 		/* No per CPU metadata */
1436 		return NULL;
1437 	}
1438 
1439 	*ver = ptr[ARM_SPE_HEADER_VERSION];
1440 	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1441 	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1442 
1443 	metadata = calloc(*nr_cpu, sizeof(*metadata));
1444 	if (!metadata)
1445 		return NULL;
1446 
1447 	/* Locate the start address of per CPU metadata */
1448 	ptr += hdr_sz;
1449 	per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1450 
1451 	for (i = 0; i < *nr_cpu; i++) {
1452 		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1453 		if (!metadata[i])
1454 			goto err_per_cpu_metadata;
1455 
1456 		ptr += per_cpu_sz / sizeof(u64);
1457 	}
1458 
1459 	return metadata;
1460 
1461 err_per_cpu_metadata:
1462 	arm_spe__free_metadata(metadata, *nr_cpu);
1463 	return NULL;
1464 }
1465 
1466 static void arm_spe_free_queue(void *priv)
1467 {
1468 	struct arm_spe_queue *speq = priv;
1469 
1470 	if (!speq)
1471 		return;
1472 	thread__zput(speq->thread);
1473 	arm_spe_decoder_free(speq->decoder);
1474 	zfree(&speq->event_buf);
1475 	zfree(&speq->last_branch);
1476 	free(speq);
1477 }
1478 
1479 static void arm_spe_free_events(struct perf_session *session)
1480 {
1481 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1482 					     auxtrace);
1483 	struct auxtrace_queues *queues = &spe->queues;
1484 	unsigned int i;
1485 
1486 	for (i = 0; i < queues->nr_queues; i++) {
1487 		arm_spe_free_queue(queues->queue_array[i].priv);
1488 		queues->queue_array[i].priv = NULL;
1489 	}
1490 	auxtrace_queues__free(queues);
1491 }
1492 
1493 static void arm_spe_free(struct perf_session *session)
1494 {
1495 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1496 					     auxtrace);
1497 
1498 	auxtrace_heap__free(&spe->heap);
1499 	arm_spe_free_events(session);
1500 	session->auxtrace = NULL;
1501 	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1502 	free(spe);
1503 }
1504 
1505 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1506 				      struct evsel *evsel)
1507 {
1508 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1509 
1510 	return evsel->core.attr.type == spe->pmu_type;
1511 }
1512 
1513 static const char * const metadata_hdr_v1_fmts[] = {
1514 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           :%"PRId64"\n",
1515 	[ARM_SPE_PER_CPU_MMAPS]		= "  Per CPU mmaps      :%"PRId64"\n",
1516 };
1517 
1518 static const char * const metadata_hdr_fmts[] = {
1519 	[ARM_SPE_HEADER_VERSION]	= "  Header version     :%"PRId64"\n",
1520 	[ARM_SPE_HEADER_SIZE]		= "  Header size        :%"PRId64"\n",
1521 	[ARM_SPE_PMU_TYPE_V2]		= "  PMU type v2        :%"PRId64"\n",
1522 	[ARM_SPE_CPUS_NUM]		= "  CPU number         :%"PRId64"\n",
1523 };
1524 
1525 static const char * const metadata_per_cpu_fmts[] = {
1526 	[ARM_SPE_MAGIC]			= "    Magic            :0x%"PRIx64"\n",
1527 	[ARM_SPE_CPU]			= "    CPU #            :%"PRId64"\n",
1528 	[ARM_SPE_CPU_NR_PARAMS]		= "    Num of params    :%"PRId64"\n",
1529 	[ARM_SPE_CPU_MIDR]		= "    MIDR             :0x%"PRIx64"\n",
1530 	[ARM_SPE_CPU_PMU_TYPE]		= "    PMU Type         :%"PRId64"\n",
1531 	[ARM_SPE_CAP_MIN_IVAL]		= "    Min Interval     :%"PRId64"\n",
1532 };
1533 
1534 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
1535 {
1536 	unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1537 	const char * const *hdr_fmts;
1538 
1539 	if (!dump_trace)
1540 		return;
1541 
1542 	if (spe->metadata_ver == 1) {
1543 		cpu_num = 0;
1544 		hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1545 		hdr_fmts = metadata_hdr_v1_fmts;
1546 	} else {
1547 		cpu_num = arr[ARM_SPE_CPUS_NUM];
1548 		hdr_size = arr[ARM_SPE_HEADER_SIZE];
1549 		hdr_fmts = metadata_hdr_fmts;
1550 	}
1551 
1552 	for (i = 0; i < hdr_size; i++)
1553 		fprintf(stdout, hdr_fmts[i], arr[i]);
1554 
1555 	arr += hdr_size;
1556 	for (cpu = 0; cpu < cpu_num; cpu++) {
1557 		/*
1558 		 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1559 		 * are fixed. The sequential parameter size is decided by the
1560 		 * field 'ARM_SPE_CPU_NR_PARAMS'.
1561 		 */
1562 		cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1563 		for (i = 0; i < cpu_size; i++)
1564 			fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1565 		arr += cpu_size;
1566 	}
1567 }
1568 
1569 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1570 				    const char *name)
1571 {
1572 	struct evsel *evsel;
1573 
1574 	evlist__for_each_entry(evlist, evsel) {
1575 		if (evsel->core.id && evsel->core.id[0] == id) {
1576 			if (evsel->name)
1577 				zfree(&evsel->name);
1578 			evsel->name = strdup(name);
1579 			break;
1580 		}
1581 	}
1582 }
1583 
1584 static int
1585 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1586 {
1587 	struct evlist *evlist = session->evlist;
1588 	struct evsel *evsel;
1589 	struct perf_event_attr attr;
1590 	bool found = false;
1591 	u64 id;
1592 	int err;
1593 
1594 	evlist__for_each_entry(evlist, evsel) {
1595 		if (evsel->core.attr.type == spe->pmu_type) {
1596 			found = true;
1597 			break;
1598 		}
1599 	}
1600 
1601 	if (!found) {
1602 		pr_debug("No selected events with SPE trace data\n");
1603 		return 0;
1604 	}
1605 
1606 	memset(&attr, 0, sizeof(struct perf_event_attr));
1607 	attr.size = sizeof(struct perf_event_attr);
1608 	attr.type = PERF_TYPE_HARDWARE;
1609 	attr.sample_type = evsel->core.attr.sample_type &
1610 				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1611 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1612 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1613 			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1614 	if (spe->timeless_decoding)
1615 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1616 	else
1617 		attr.sample_type |= PERF_SAMPLE_TIME;
1618 
1619 	spe->sample_type = attr.sample_type;
1620 
1621 	attr.exclude_user = evsel->core.attr.exclude_user;
1622 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1623 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1624 	attr.exclude_host = evsel->core.attr.exclude_host;
1625 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1626 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1627 	attr.read_format = evsel->core.attr.read_format;
1628 	attr.sample_period = spe->synth_opts.period;
1629 
1630 	/* create new id val to be a fixed offset from evsel id */
1631 	id = evsel->core.id[0] + 1000000000;
1632 
1633 	if (!id)
1634 		id = 1;
1635 
1636 	if (spe->synth_opts.flc) {
1637 		spe->sample_flc = true;
1638 
1639 		/* Level 1 data cache miss */
1640 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1641 		if (err)
1642 			return err;
1643 		spe->l1d_miss_id = id;
1644 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1645 		id += 1;
1646 
1647 		/* Level 1 data cache access */
1648 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1649 		if (err)
1650 			return err;
1651 		spe->l1d_access_id = id;
1652 		arm_spe_set_event_name(evlist, id, "l1d-access");
1653 		id += 1;
1654 	}
1655 
1656 	if (spe->synth_opts.llc) {
1657 		spe->sample_llc = true;
1658 
1659 		/* Last level cache miss */
1660 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1661 		if (err)
1662 			return err;
1663 		spe->llc_miss_id = id;
1664 		arm_spe_set_event_name(evlist, id, "llc-miss");
1665 		id += 1;
1666 
1667 		/* Last level cache access */
1668 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1669 		if (err)
1670 			return err;
1671 		spe->llc_access_id = id;
1672 		arm_spe_set_event_name(evlist, id, "llc-access");
1673 		id += 1;
1674 	}
1675 
1676 	if (spe->synth_opts.tlb) {
1677 		spe->sample_tlb = true;
1678 
1679 		/* TLB miss */
1680 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1681 		if (err)
1682 			return err;
1683 		spe->tlb_miss_id = id;
1684 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1685 		id += 1;
1686 
1687 		/* TLB access */
1688 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1689 		if (err)
1690 			return err;
1691 		spe->tlb_access_id = id;
1692 		arm_spe_set_event_name(evlist, id, "tlb-access");
1693 		id += 1;
1694 	}
1695 
1696 	if (spe->synth_opts.last_branch) {
1697 		if (spe->synth_opts.last_branch_sz > 2)
1698 			pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n");
1699 
1700 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1701 		/*
1702 		 * We don't use the hardware index, but the sample generation
1703 		 * code uses the new format branch_stack with this field,
1704 		 * so the event attributes must indicate that it's present.
1705 		 */
1706 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1707 	}
1708 
1709 	if (spe->synth_opts.branches) {
1710 		spe->sample_branch = true;
1711 
1712 		/* Branch */
1713 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1714 		if (err)
1715 			return err;
1716 		spe->branch_id = id;
1717 		arm_spe_set_event_name(evlist, id, "branch");
1718 		id += 1;
1719 	}
1720 
1721 	if (spe->synth_opts.remote_access) {
1722 		spe->sample_remote_access = true;
1723 
1724 		/* Remote access */
1725 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1726 		if (err)
1727 			return err;
1728 		spe->remote_access_id = id;
1729 		arm_spe_set_event_name(evlist, id, "remote-access");
1730 		id += 1;
1731 	}
1732 
1733 	if (spe->synth_opts.mem) {
1734 		spe->sample_memory = true;
1735 
1736 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1737 		if (err)
1738 			return err;
1739 		spe->memory_id = id;
1740 		arm_spe_set_event_name(evlist, id, "memory");
1741 		id += 1;
1742 	}
1743 
1744 	if (spe->synth_opts.instructions) {
1745 		spe->sample_instructions = true;
1746 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1747 
1748 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1749 		if (err)
1750 			return err;
1751 		spe->instructions_id = id;
1752 		arm_spe_set_event_name(evlist, id, "instructions");
1753 	}
1754 
1755 	return 0;
1756 }
1757 
1758 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1759 {
1760 	u64 midr;
1761 	int i;
1762 
1763 	if (!nr_cpu)
1764 		return false;
1765 
1766 	for (i = 0; i < nr_cpu; i++) {
1767 		if (!metadata[i])
1768 			return false;
1769 
1770 		if (i == 0) {
1771 			midr = metadata[i][ARM_SPE_CPU_MIDR];
1772 			continue;
1773 		}
1774 
1775 		if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1776 			return false;
1777 	}
1778 
1779 	return true;
1780 }
1781 
1782 int arm_spe_process_auxtrace_info(union perf_event *event,
1783 				  struct perf_session *session)
1784 {
1785 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1786 	size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1787 	struct perf_record_time_conv *tc = &session->time_conv;
1788 	struct arm_spe *spe;
1789 	u64 **metadata = NULL;
1790 	u64 metadata_ver;
1791 	int nr_cpu, err;
1792 
1793 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1794 					min_sz)
1795 		return -EINVAL;
1796 
1797 	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1798 					   &nr_cpu);
1799 	if (!metadata && metadata_ver != 1) {
1800 		pr_err("Failed to parse Arm SPE metadata.\n");
1801 		return -EINVAL;
1802 	}
1803 
1804 	spe = zalloc(sizeof(struct arm_spe));
1805 	if (!spe) {
1806 		err = -ENOMEM;
1807 		goto err_free_metadata;
1808 	}
1809 
1810 	err = auxtrace_queues__init(&spe->queues);
1811 	if (err)
1812 		goto err_free;
1813 
1814 	spe->session = session;
1815 	spe->machine = &session->machines.host; /* No kvm support */
1816 	spe->auxtrace_type = auxtrace_info->type;
1817 	if (metadata_ver == 1)
1818 		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1819 	else
1820 		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1821 	spe->metadata = metadata;
1822 	spe->metadata_ver = metadata_ver;
1823 	spe->metadata_nr_cpu = nr_cpu;
1824 	spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1825 
1826 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1827 
1828 	/*
1829 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1830 	 * and the parameters for hardware clock are stored in the session
1831 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1832 	 * in "spe->tc", which is used for later conversion between clock
1833 	 * counter and timestamp.
1834 	 *
1835 	 * For backward compatibility, copies the fields starting from
1836 	 * "time_cycles" only if they are contained in the event.
1837 	 */
1838 	spe->tc.time_shift = tc->time_shift;
1839 	spe->tc.time_mult = tc->time_mult;
1840 	spe->tc.time_zero = tc->time_zero;
1841 
1842 	if (event_contains(*tc, time_cycles)) {
1843 		spe->tc.time_cycles = tc->time_cycles;
1844 		spe->tc.time_mask = tc->time_mask;
1845 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1846 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1847 	}
1848 
1849 	spe->auxtrace.process_event = arm_spe_process_event;
1850 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1851 	spe->auxtrace.flush_events = arm_spe_flush;
1852 	spe->auxtrace.free_events = arm_spe_free_events;
1853 	spe->auxtrace.free = arm_spe_free;
1854 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1855 	session->auxtrace = &spe->auxtrace;
1856 
1857 	arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1858 
1859 	if (dump_trace)
1860 		return 0;
1861 
1862 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
1863 		spe->synth_opts = *session->itrace_synth_opts;
1864 	} else {
1865 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1866 		/* Default nanoseconds period not supported */
1867 		spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
1868 		spe->synth_opts.period = 1;
1869 	}
1870 
1871 	if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1872 		ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n");
1873 		err = -EINVAL;
1874 		goto err_free_queues;
1875 	}
1876 	if (spe->synth_opts.period > 1)
1877 		ui__warning("Arm SPE has a hardware-based sampling period.\n\n"
1878 			    "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n");
1879 
1880 	err = arm_spe_synth_events(spe, session);
1881 	if (err)
1882 		goto err_free_queues;
1883 
1884 	err = auxtrace_queues__process_index(&spe->queues, session);
1885 	if (err)
1886 		goto err_free_queues;
1887 
1888 	if (spe->queues.populated)
1889 		spe->data_queued = true;
1890 
1891 	return 0;
1892 
1893 err_free_queues:
1894 	auxtrace_queues__free(&spe->queues);
1895 	session->auxtrace = NULL;
1896 err_free:
1897 	free(spe);
1898 err_free_metadata:
1899 	arm_spe__free_metadata(metadata, nr_cpu);
1900 	return err;
1901 }
1902