xref: /linux/tools/perf/util/arm-spe.c (revision cdc1aff17f1b72d112f508450f2d103b6b955b74)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #include "../../arch/arm64/include/asm/cputype.h"
38 #define MAX_TIMESTAMP (~0ULL)
39 
40 #define is_ldst_op(op)		(!!((op) & ARM_SPE_OP_LDST))
41 
42 #define ARM_SPE_CACHE_EVENT(lvl) \
43 	(ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS)
44 
45 #define arm_spe_is_cache_level(type, lvl) \
46 	((type) & ARM_SPE_CACHE_EVENT(lvl))
47 
48 #define arm_spe_is_cache_hit(type, lvl) \
49 	(((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS)
50 
51 #define arm_spe_is_cache_miss(type, lvl) \
52 	((type) & ARM_SPE_##lvl##_MISS)
53 
54 struct arm_spe {
55 	struct auxtrace			auxtrace;
56 	struct auxtrace_queues		queues;
57 	struct auxtrace_heap		heap;
58 	struct itrace_synth_opts        synth_opts;
59 	u32				auxtrace_type;
60 	struct perf_session		*session;
61 	struct machine			*machine;
62 	u32				pmu_type;
63 
64 	struct perf_tsc_conversion	tc;
65 
66 	u8				timeless_decoding;
67 	u8				data_queued;
68 
69 	u64				sample_type;
70 	u8				sample_flc;
71 	u8				sample_llc;
72 	u8				sample_tlb;
73 	u8				sample_branch;
74 	u8				sample_remote_access;
75 	u8				sample_memory;
76 	u8				sample_instructions;
77 
78 	u64				l1d_miss_id;
79 	u64				l1d_access_id;
80 	u64				llc_miss_id;
81 	u64				llc_access_id;
82 	u64				tlb_miss_id;
83 	u64				tlb_access_id;
84 	u64				branch_id;
85 	u64				remote_access_id;
86 	u64				memory_id;
87 	u64				instructions_id;
88 
89 	u64				kernel_start;
90 
91 	unsigned long			num_events;
92 	u8				use_ctx_pkt_for_pid;
93 
94 	u64				**metadata;
95 	u64				metadata_ver;
96 	u64				metadata_nr_cpu;
97 	bool				is_homogeneous;
98 };
99 
100 struct arm_spe_queue {
101 	struct arm_spe			*spe;
102 	unsigned int			queue_nr;
103 	struct auxtrace_buffer		*buffer;
104 	struct auxtrace_buffer		*old_buffer;
105 	union perf_event		*event_buf;
106 	bool				on_heap;
107 	bool				done;
108 	pid_t				pid;
109 	pid_t				tid;
110 	int				cpu;
111 	struct arm_spe_decoder		*decoder;
112 	u64				time;
113 	u64				timestamp;
114 	struct thread			*thread;
115 	u64				sample_count;
116 	u32				flags;
117 	struct branch_stack		*last_branch;
118 };
119 
120 struct data_source_handle {
121 	const struct midr_range *midr_ranges;
122 	void (*ds_synth)(const struct arm_spe_record *record,
123 			 union perf_mem_data_src *data_src);
124 };
125 
126 #define DS(range, func)					\
127 	{						\
128 		.midr_ranges = range,			\
129 		.ds_synth = arm_spe__synth_##func,	\
130 	}
131 
132 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
133 			 unsigned char *buf, size_t len)
134 {
135 	struct arm_spe_pkt packet;
136 	size_t pos = 0;
137 	int ret, pkt_len, i;
138 	char desc[ARM_SPE_PKT_DESC_MAX];
139 	const char *color = PERF_COLOR_BLUE;
140 
141 	color_fprintf(stdout, color,
142 		      ". ... ARM SPE data: size %#zx bytes\n",
143 		      len);
144 
145 	while (len) {
146 		ret = arm_spe_get_packet(buf, len, &packet);
147 		if (ret > 0)
148 			pkt_len = ret;
149 		else
150 			pkt_len = 1;
151 		printf(".");
152 		color_fprintf(stdout, color, "  %08zx: ", pos);
153 		for (i = 0; i < pkt_len; i++)
154 			color_fprintf(stdout, color, " %02x", buf[i]);
155 		for (; i < 16; i++)
156 			color_fprintf(stdout, color, "   ");
157 		if (ret > 0) {
158 			ret = arm_spe_pkt_desc(&packet, desc,
159 					       ARM_SPE_PKT_DESC_MAX);
160 			if (!ret)
161 				color_fprintf(stdout, color, " %s\n", desc);
162 		} else {
163 			color_fprintf(stdout, color, " Bad packet!\n");
164 		}
165 		pos += pkt_len;
166 		buf += pkt_len;
167 		len -= pkt_len;
168 	}
169 }
170 
171 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
172 			       size_t len)
173 {
174 	printf(".\n");
175 	arm_spe_dump(spe, buf, len);
176 }
177 
178 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
179 {
180 	struct arm_spe_queue *speq = data;
181 	struct auxtrace_buffer *buffer = speq->buffer;
182 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
183 	struct auxtrace_queue *queue;
184 
185 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
186 
187 	buffer = auxtrace_buffer__next(queue, buffer);
188 	/* If no more data, drop the previous auxtrace_buffer and return */
189 	if (!buffer) {
190 		if (old_buffer)
191 			auxtrace_buffer__drop_data(old_buffer);
192 		b->len = 0;
193 		return 0;
194 	}
195 
196 	speq->buffer = buffer;
197 
198 	/* If the aux_buffer doesn't have data associated, try to load it */
199 	if (!buffer->data) {
200 		/* get the file desc associated with the perf data file */
201 		int fd = perf_data__fd(speq->spe->session->data);
202 
203 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
204 		if (!buffer->data)
205 			return -ENOMEM;
206 	}
207 
208 	b->len = buffer->size;
209 	b->buf = buffer->data;
210 
211 	if (b->len) {
212 		if (old_buffer)
213 			auxtrace_buffer__drop_data(old_buffer);
214 		speq->old_buffer = buffer;
215 	} else {
216 		auxtrace_buffer__drop_data(buffer);
217 		return arm_spe_get_trace(b, data);
218 	}
219 
220 	return 0;
221 }
222 
223 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
224 		unsigned int queue_nr)
225 {
226 	struct arm_spe_params params = { .get_trace = 0, };
227 	struct arm_spe_queue *speq;
228 
229 	speq = zalloc(sizeof(*speq));
230 	if (!speq)
231 		return NULL;
232 
233 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
234 	if (!speq->event_buf)
235 		goto out_free;
236 
237 	speq->spe = spe;
238 	speq->queue_nr = queue_nr;
239 	speq->pid = -1;
240 	speq->tid = -1;
241 	speq->cpu = -1;
242 
243 	/* params set */
244 	params.get_trace = arm_spe_get_trace;
245 	params.data = speq;
246 
247 	if (spe->synth_opts.last_branch) {
248 		size_t sz = sizeof(struct branch_stack);
249 
250 		/* Allocate up to two entries for PBT + TGT */
251 		sz += sizeof(struct branch_entry) *
252 			min(spe->synth_opts.last_branch_sz, 2U);
253 		speq->last_branch = zalloc(sz);
254 		if (!speq->last_branch)
255 			goto out_free;
256 	}
257 
258 	/* create new decoder */
259 	speq->decoder = arm_spe_decoder_new(&params);
260 	if (!speq->decoder)
261 		goto out_free;
262 
263 	return speq;
264 
265 out_free:
266 	zfree(&speq->event_buf);
267 	zfree(&speq->last_branch);
268 	free(speq);
269 
270 	return NULL;
271 }
272 
273 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
274 {
275 	return ip >= spe->kernel_start ?
276 		PERF_RECORD_MISC_KERNEL :
277 		PERF_RECORD_MISC_USER;
278 }
279 
280 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
281 				    struct auxtrace_queue *queue)
282 {
283 	struct arm_spe_queue *speq = queue->priv;
284 	pid_t tid;
285 
286 	tid = machine__get_current_tid(spe->machine, speq->cpu);
287 	if (tid != -1) {
288 		speq->tid = tid;
289 		thread__zput(speq->thread);
290 	} else
291 		speq->tid = queue->tid;
292 
293 	if ((!speq->thread) && (speq->tid != -1)) {
294 		speq->thread = machine__find_thread(spe->machine, -1,
295 						    speq->tid);
296 	}
297 
298 	if (speq->thread) {
299 		speq->pid = thread__pid(speq->thread);
300 		if (queue->cpu == -1)
301 			speq->cpu = thread__cpu(speq->thread);
302 	}
303 }
304 
305 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
306 {
307 	struct arm_spe *spe = speq->spe;
308 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
309 
310 	if (err)
311 		return err;
312 
313 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
314 
315 	return 0;
316 }
317 
318 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu)
319 {
320 	u64 i;
321 
322 	if (!spe->metadata)
323 		return NULL;
324 
325 	/* CPU ID is -1 for per-thread mode */
326 	if (cpu < 0) {
327 		/*
328 		 * On the heterogeneous system, due to CPU ID is -1,
329 		 * cannot confirm the data source packet is supported.
330 		 */
331 		if (!spe->is_homogeneous)
332 			return NULL;
333 
334 		/* In homogeneous system, simply use CPU0's metadata */
335 		return spe->metadata[0];
336 	}
337 
338 	for (i = 0; i < spe->metadata_nr_cpu; i++)
339 		if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu)
340 			return spe->metadata[i];
341 
342 	return NULL;
343 }
344 
345 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
346 {
347 	struct simd_flags simd_flags = {};
348 
349 	if (record->op & ARM_SPE_OP_SVE)
350 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
351 
352 	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
353 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
354 
355 	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
356 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
357 
358 	return simd_flags;
359 }
360 
361 static void arm_spe_prep_sample(struct arm_spe *spe,
362 				struct arm_spe_queue *speq,
363 				union perf_event *event,
364 				struct perf_sample *sample)
365 {
366 	struct arm_spe_record *record = &speq->decoder->record;
367 
368 	if (!spe->timeless_decoding)
369 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
370 
371 	sample->ip = record->from_ip;
372 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
373 	sample->pid = speq->pid;
374 	sample->tid = speq->tid;
375 	sample->period = spe->synth_opts.period;
376 	sample->cpu = speq->cpu;
377 	sample->simd_flags = arm_spe__synth_simd_flags(record);
378 
379 	event->sample.header.type = PERF_RECORD_SAMPLE;
380 	event->sample.header.misc = sample->cpumode;
381 	event->sample.header.size = sizeof(struct perf_event_header);
382 }
383 
384 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
385 {
386 	struct arm_spe *spe = speq->spe;
387 	struct arm_spe_record *record = &speq->decoder->record;
388 	struct branch_stack *bstack = speq->last_branch;
389 	struct branch_flags *bs_flags;
390 	unsigned int last_branch_sz = spe->synth_opts.last_branch_sz;
391 	bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH);
392 	bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt;
393 	size_t sz = sizeof(struct branch_stack) +
394 		    sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */;
395 	int i = 0;
396 
397 	/* Clean up branch stack */
398 	memset(bstack, 0x0, sz);
399 
400 	if (!have_tgt && !have_pbt)
401 		return;
402 
403 	if (have_tgt) {
404 		bstack->entries[i].from = record->from_ip;
405 		bstack->entries[i].to = record->to_ip;
406 
407 		bs_flags = &bstack->entries[i].flags;
408 		bs_flags->value = 0;
409 
410 		if (record->op & ARM_SPE_OP_BR_CR_BL) {
411 			if (record->op & ARM_SPE_OP_BR_COND)
412 				bs_flags->type |= PERF_BR_COND_CALL;
413 			else
414 				bs_flags->type |= PERF_BR_CALL;
415 		/*
416 		 * Indirect branch instruction without link (e.g. BR),
417 		 * take this case as function return.
418 		 */
419 		} else if (record->op & ARM_SPE_OP_BR_CR_RET ||
420 			   record->op & ARM_SPE_OP_BR_INDIRECT) {
421 			if (record->op & ARM_SPE_OP_BR_COND)
422 				bs_flags->type |= PERF_BR_COND_RET;
423 			else
424 				bs_flags->type |= PERF_BR_RET;
425 		} else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) {
426 			if (record->op & ARM_SPE_OP_BR_COND)
427 				bs_flags->type |= PERF_BR_COND;
428 			else
429 				bs_flags->type |= PERF_BR_UNCOND;
430 		} else {
431 			if (record->op & ARM_SPE_OP_BR_COND)
432 				bs_flags->type |= PERF_BR_COND;
433 			else
434 				bs_flags->type |= PERF_BR_UNKNOWN;
435 		}
436 
437 		if (record->type & ARM_SPE_BRANCH_MISS) {
438 			bs_flags->mispred = 1;
439 			bs_flags->predicted = 0;
440 		} else {
441 			bs_flags->mispred = 0;
442 			bs_flags->predicted = 1;
443 		}
444 
445 		if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
446 			bs_flags->not_taken = 1;
447 
448 		if (record->type & ARM_SPE_IN_TXN)
449 			bs_flags->in_tx = 1;
450 
451 		bs_flags->cycles = min(record->latency, 0xFFFFU);
452 		i++;
453 	}
454 
455 	if (have_pbt) {
456 		bs_flags = &bstack->entries[i].flags;
457 		bs_flags->type |= PERF_BR_UNKNOWN;
458 		bstack->entries[i].to = record->prev_br_tgt;
459 		i++;
460 	}
461 
462 	bstack->nr = i;
463 	bstack->hw_idx = -1ULL;
464 }
465 
466 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
467 {
468 	event->header.size = perf_event__sample_event_size(sample, type, 0);
469 	return perf_event__synthesize_sample(event, type, 0, sample);
470 }
471 
472 static inline int
473 arm_spe_deliver_synth_event(struct arm_spe *spe,
474 			    struct arm_spe_queue *speq __maybe_unused,
475 			    union perf_event *event,
476 			    struct perf_sample *sample)
477 {
478 	int ret;
479 
480 	if (spe->synth_opts.inject) {
481 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
482 		if (ret)
483 			return ret;
484 	}
485 
486 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
487 	if (ret)
488 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
489 
490 	return ret;
491 }
492 
493 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
494 				     u64 spe_events_id,
495 				     union perf_mem_data_src data_src)
496 {
497 	struct arm_spe *spe = speq->spe;
498 	struct arm_spe_record *record = &speq->decoder->record;
499 	union perf_event *event = speq->event_buf;
500 	struct perf_sample sample;
501 	int ret;
502 
503 	perf_sample__init(&sample, /*all=*/true);
504 	arm_spe_prep_sample(spe, speq, event, &sample);
505 
506 	sample.id = spe_events_id;
507 	sample.stream_id = spe_events_id;
508 	sample.addr = record->virt_addr;
509 	sample.phys_addr = record->phys_addr;
510 	sample.data_src = data_src.val;
511 	sample.weight = record->latency;
512 
513 	ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
514 	perf_sample__exit(&sample);
515 	return ret;
516 }
517 
518 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
519 					u64 spe_events_id)
520 {
521 	struct arm_spe *spe = speq->spe;
522 	struct arm_spe_record *record = &speq->decoder->record;
523 	union perf_event *event = speq->event_buf;
524 	struct perf_sample sample;
525 	int ret;
526 
527 	perf_sample__init(&sample, /*all=*/true);
528 	arm_spe_prep_sample(spe, speq, event, &sample);
529 
530 	sample.id = spe_events_id;
531 	sample.stream_id = spe_events_id;
532 	sample.addr = record->to_ip;
533 	sample.weight = record->latency;
534 	sample.flags = speq->flags;
535 	sample.branch_stack = speq->last_branch;
536 
537 	ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
538 	perf_sample__exit(&sample);
539 	return ret;
540 }
541 
542 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
543 					     u64 spe_events_id,
544 					     union perf_mem_data_src data_src)
545 {
546 	struct arm_spe *spe = speq->spe;
547 	struct arm_spe_record *record = &speq->decoder->record;
548 	union perf_event *event = speq->event_buf;
549 	struct perf_sample sample;
550 	int ret;
551 
552 	perf_sample__init(&sample, /*all=*/true);
553 	arm_spe_prep_sample(spe, speq, event, &sample);
554 
555 	sample.id = spe_events_id;
556 	sample.stream_id = spe_events_id;
557 	sample.addr = record->to_ip;
558 	sample.phys_addr = record->phys_addr;
559 	sample.data_src = data_src.val;
560 	sample.weight = record->latency;
561 	sample.flags = speq->flags;
562 	sample.branch_stack = speq->last_branch;
563 
564 	ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
565 	perf_sample__exit(&sample);
566 	return ret;
567 }
568 
569 static const struct midr_range common_ds_encoding_cpus[] = {
570 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
571 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE),
572 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
573 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
574 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
575 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
576 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
577 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
578 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
579 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
580 	{},
581 };
582 
583 static const struct midr_range ampereone_ds_encoding_cpus[] = {
584 	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
585 	{},
586 };
587 
588 static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
589 	MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
590 	{},
591 };
592 
593 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
594 {
595 	const struct arm_spe_record *record = &speq->decoder->record;
596 
597 	speq->flags = 0;
598 	if (record->op & ARM_SPE_OP_BRANCH_ERET) {
599 		speq->flags = PERF_IP_FLAG_BRANCH;
600 
601 		if (record->type & ARM_SPE_BRANCH_MISS)
602 			speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
603 
604 		if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
605 			speq->flags |= PERF_IP_FLAG_NOT_TAKEN;
606 
607 		if (record->type & ARM_SPE_IN_TXN)
608 			speq->flags |= PERF_IP_FLAG_IN_TX;
609 
610 		if (record->op & ARM_SPE_OP_BR_COND)
611 			speq->flags |= PERF_IP_FLAG_CONDITIONAL;
612 
613 		if (record->op & ARM_SPE_OP_BR_CR_BL)
614 			speq->flags |= PERF_IP_FLAG_CALL;
615 		else if (record->op & ARM_SPE_OP_BR_CR_RET)
616 			speq->flags |= PERF_IP_FLAG_RETURN;
617 		/*
618 		 * Indirect branch instruction without link (e.g. BR),
619 		 * take it as a function return.
620 		 */
621 		else if (record->op & ARM_SPE_OP_BR_INDIRECT)
622 			speq->flags |= PERF_IP_FLAG_RETURN;
623 	}
624 }
625 
626 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
627 					      union perf_mem_data_src *data_src)
628 {
629 	/*
630 	 * Even though four levels of cache hierarchy are possible, no known
631 	 * production Neoverse systems currently include more than three levels
632 	 * so for the time being we assume three exist. If a production system
633 	 * is built with four the this function would have to be changed to
634 	 * detect the number of levels for reporting.
635 	 */
636 
637 	/*
638 	 * We have no data on the hit level or data source for stores in the
639 	 * Neoverse SPE records.
640 	 */
641 	if (record->op & ARM_SPE_OP_ST) {
642 		data_src->mem_lvl = PERF_MEM_LVL_NA;
643 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
644 		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
645 		return;
646 	}
647 
648 	switch (record->source) {
649 	case ARM_SPE_COMMON_DS_L1D:
650 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
651 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
652 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
653 		break;
654 	case ARM_SPE_COMMON_DS_L2:
655 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
656 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
657 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
658 		break;
659 	case ARM_SPE_COMMON_DS_PEER_CORE:
660 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
661 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
662 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
663 		break;
664 	/*
665 	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
666 	 * transfer, so set SNOOPX_PEER
667 	 */
668 	case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
669 	case ARM_SPE_COMMON_DS_PEER_CLUSTER:
670 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
671 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
672 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
673 		break;
674 	/*
675 	 * System cache is assumed to be L3
676 	 */
677 	case ARM_SPE_COMMON_DS_SYS_CACHE:
678 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
679 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
680 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
681 		break;
682 	/*
683 	 * We don't know what level it hit in, except it came from the other
684 	 * socket
685 	 */
686 	case ARM_SPE_COMMON_DS_REMOTE:
687 		data_src->mem_lvl = PERF_MEM_LVL_NA;
688 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
689 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
690 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
691 		break;
692 	case ARM_SPE_COMMON_DS_DRAM:
693 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
694 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
695 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
696 		break;
697 	default:
698 		break;
699 	}
700 }
701 
702 /*
703  * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
704  * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
705  */
706 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
707 						 union perf_mem_data_src *data_src)
708 {
709 	struct arm_spe_record common_record;
710 
711 	switch (record->source) {
712 	case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
713 		common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
714 		break;
715 	case ARM_SPE_AMPEREONE_SLC:
716 		common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
717 		break;
718 	case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
719 		common_record.source = ARM_SPE_COMMON_DS_REMOTE;
720 		break;
721 	case ARM_SPE_AMPEREONE_DDR:
722 		common_record.source = ARM_SPE_COMMON_DS_DRAM;
723 		break;
724 	case ARM_SPE_AMPEREONE_L1D:
725 		common_record.source = ARM_SPE_COMMON_DS_L1D;
726 		break;
727 	case ARM_SPE_AMPEREONE_L2D:
728 		common_record.source = ARM_SPE_COMMON_DS_L2;
729 		break;
730 	default:
731 		pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
732 				record->source);
733 		return;
734 	}
735 
736 	common_record.op = record->op;
737 	arm_spe__synth_data_source_common(&common_record, data_src);
738 }
739 
740 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
741 						union perf_mem_data_src *data_src)
742 {
743 	/* Use common synthesis method to handle store operations */
744 	if (record->op & ARM_SPE_OP_ST) {
745 		arm_spe__synth_data_source_common(record, data_src);
746 		return;
747 	}
748 
749 	switch (record->source) {
750 	case ARM_SPE_HISI_HIP_PEER_CPU:
751 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
752 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
753 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
754 		break;
755 	case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
756 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
757 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
758 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
759 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
760 		break;
761 	case ARM_SPE_HISI_HIP_L3:
762 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
763 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
764 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
765 		break;
766 	case ARM_SPE_HISI_HIP_L3_HITM:
767 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
768 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
769 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
770 		break;
771 	case ARM_SPE_HISI_HIP_PEER_CLUSTER:
772 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
773 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
774 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
775 		break;
776 	case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
777 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
778 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
779 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
780 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
781 		break;
782 	case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
783 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
784 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
785 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
786 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
787 		break;
788 	case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
789 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
790 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
791 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
792 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
793 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
794 		break;
795 	case ARM_SPE_HISI_HIP_LOCAL_MEM:
796 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
797 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
798 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
799 		break;
800 	case ARM_SPE_HISI_HIP_REMOTE_MEM:
801 		data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
802 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
803 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
804 		break;
805 	case ARM_SPE_HISI_HIP_NC_DEV:
806 		data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
807 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
808 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
809 		break;
810 	case ARM_SPE_HISI_HIP_L2:
811 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
812 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
813 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
814 		break;
815 	case ARM_SPE_HISI_HIP_L2_HITM:
816 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
817 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
818 		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
819 		break;
820 	case ARM_SPE_HISI_HIP_L1:
821 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
822 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
823 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
824 		break;
825 	default:
826 		break;
827 	}
828 }
829 
830 static const struct data_source_handle data_source_handles[] = {
831 	DS(common_ds_encoding_cpus, data_source_common),
832 	DS(ampereone_ds_encoding_cpus, data_source_ampereone),
833 	DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
834 };
835 
836 static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record,
837 					   union perf_mem_data_src *data_src)
838 {
839 	/*
840 	 * To find a cache hit, search in ascending order from the lower level
841 	 * caches to the higher level caches. This reflects the best scenario
842 	 * for a cache hit.
843 	 */
844 	if (arm_spe_is_cache_hit(record->type, L1D)) {
845 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
846 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
847 	} else if (record->type & ARM_SPE_RECENTLY_FETCHED) {
848 		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
849 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB;
850 	} else if (arm_spe_is_cache_hit(record->type, L2D)) {
851 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
852 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
853 	} else if (arm_spe_is_cache_hit(record->type, LLC)) {
854 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
855 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
856 	/*
857 	 * To find a cache miss, search in descending order from the higher
858 	 * level cache to the lower level cache. This represents the worst
859 	 * scenario for a cache miss.
860 	 */
861 	} else if (arm_spe_is_cache_miss(record->type, LLC)) {
862 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS;
863 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
864 	} else if (arm_spe_is_cache_miss(record->type, L2D)) {
865 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS;
866 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
867 	} else if (arm_spe_is_cache_miss(record->type, L1D)) {
868 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
869 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
870 	}
871 }
872 
873 static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record,
874 					   union perf_mem_data_src *data_src)
875 {
876 	/* Record the greatest level info for a store operation. */
877 	if (arm_spe_is_cache_level(record->type, LLC)) {
878 		data_src->mem_lvl = PERF_MEM_LVL_L3;
879 		data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ?
880 				     PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
881 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
882 	} else if (arm_spe_is_cache_level(record->type, L2D)) {
883 		data_src->mem_lvl = PERF_MEM_LVL_L2;
884 		data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ?
885 				     PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
886 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
887 	} else if (arm_spe_is_cache_level(record->type, L1D)) {
888 		data_src->mem_lvl = PERF_MEM_LVL_L1;
889 		data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ?
890 				     PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
891 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
892 	}
893 }
894 
895 static void arm_spe__synth_memory_level(struct arm_spe_queue *speq,
896 					const struct arm_spe_record *record,
897 					union perf_mem_data_src *data_src)
898 {
899 	struct arm_spe *spe = speq->spe;
900 
901 	/*
902 	 * The data source packet contains more info for cache levels for
903 	 * peer snooping. So respect the memory level if has been set by
904 	 * data source parsing.
905 	 */
906 	if (!data_src->mem_lvl) {
907 		if (data_src->mem_op == PERF_MEM_OP_LOAD)
908 			arm_spe__synth_ld_memory_level(record, data_src);
909 		if (data_src->mem_op == PERF_MEM_OP_STORE)
910 			arm_spe__synth_st_memory_level(record, data_src);
911 	}
912 
913 	if (!data_src->mem_lvl) {
914 		data_src->mem_lvl = PERF_MEM_LVL_NA;
915 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
916 	}
917 
918 	/*
919 	 * If 'mem_snoop' has been set by data source packet, skip to set
920 	 * it at here.
921 	 */
922 	if (!data_src->mem_snoop) {
923 		if (record->type & ARM_SPE_DATA_SNOOPED) {
924 			if (record->type & ARM_SPE_HITM)
925 				data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
926 			else
927 				data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
928 		} else {
929 			u64 *metadata =
930 				arm_spe__get_metadata_by_cpu(spe, speq->cpu);
931 
932 			/*
933 			 * Set NA ("Not available") mode if no meta data or the
934 			 * SNOOPED event is not supported.
935 			 */
936 			if (!metadata ||
937 			    !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED))
938 				data_src->mem_snoop = PERF_MEM_SNOOP_NA;
939 			else
940 				data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
941 		}
942 	}
943 
944 	if (!data_src->mem_remote) {
945 		if (record->type & ARM_SPE_REMOTE_ACCESS)
946 			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
947 	}
948 }
949 
950 static void arm_spe__synth_ds(struct arm_spe_queue *speq,
951 			      const struct arm_spe_record *record,
952 			      union perf_mem_data_src *data_src)
953 {
954 	struct arm_spe *spe = speq->spe;
955 	u64 *metadata = NULL;
956 	u64 midr;
957 	unsigned int i;
958 
959 	/* Metadata version 1 assumes all CPUs are the same (old behavior) */
960 	if (spe->metadata_ver == 1) {
961 		const char *cpuid;
962 
963 		pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
964 		cpuid = perf_env__cpuid(perf_session__env(spe->session));
965 		midr = strtol(cpuid, NULL, 16);
966 	} else {
967 		metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
968 		if (!metadata)
969 			return;
970 
971 		midr = metadata[ARM_SPE_CPU_MIDR];
972 	}
973 
974 	for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
975 		if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
976 			return data_source_handles[i].ds_synth(record, data_src);
977 		}
978 	}
979 
980 	return;
981 }
982 
983 static union perf_mem_data_src
984 arm_spe__synth_data_source(struct arm_spe_queue *speq,
985 			   const struct arm_spe_record *record)
986 {
987 	union perf_mem_data_src	data_src = {};
988 
989 	/* Only synthesize data source for LDST operations */
990 	if (!is_ldst_op(record->op))
991 		return data_src;
992 
993 	if (record->op & ARM_SPE_OP_LD)
994 		data_src.mem_op = PERF_MEM_OP_LOAD;
995 	else if (record->op & ARM_SPE_OP_ST)
996 		data_src.mem_op = PERF_MEM_OP_STORE;
997 	else
998 		return data_src;
999 
1000 	arm_spe__synth_ds(speq, record, &data_src);
1001 	arm_spe__synth_memory_level(speq, record, &data_src);
1002 
1003 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
1004 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
1005 
1006 		if (record->type & ARM_SPE_TLB_MISS)
1007 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
1008 		else
1009 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
1010 	}
1011 
1012 	return data_src;
1013 }
1014 
1015 static int arm_spe_sample(struct arm_spe_queue *speq)
1016 {
1017 	const struct arm_spe_record *record = &speq->decoder->record;
1018 	struct arm_spe *spe = speq->spe;
1019 	union perf_mem_data_src data_src;
1020 	int err;
1021 
1022 	/*
1023 	 * Discard all samples until period is reached
1024 	 */
1025 	speq->sample_count++;
1026 	if (speq->sample_count < spe->synth_opts.period)
1027 		return 0;
1028 	speq->sample_count = 0;
1029 
1030 	arm_spe__sample_flags(speq);
1031 	data_src = arm_spe__synth_data_source(speq, record);
1032 
1033 	if (spe->sample_flc) {
1034 		if (record->type & ARM_SPE_L1D_MISS) {
1035 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
1036 							data_src);
1037 			if (err)
1038 				return err;
1039 		}
1040 
1041 		if (record->type & ARM_SPE_L1D_ACCESS) {
1042 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
1043 							data_src);
1044 			if (err)
1045 				return err;
1046 		}
1047 	}
1048 
1049 	if (spe->sample_llc) {
1050 		if (record->type & ARM_SPE_LLC_MISS) {
1051 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
1052 							data_src);
1053 			if (err)
1054 				return err;
1055 		}
1056 
1057 		if (record->type & ARM_SPE_LLC_ACCESS) {
1058 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
1059 							data_src);
1060 			if (err)
1061 				return err;
1062 		}
1063 	}
1064 
1065 	if (spe->sample_tlb) {
1066 		if (record->type & ARM_SPE_TLB_MISS) {
1067 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
1068 							data_src);
1069 			if (err)
1070 				return err;
1071 		}
1072 
1073 		if (record->type & ARM_SPE_TLB_ACCESS) {
1074 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
1075 							data_src);
1076 			if (err)
1077 				return err;
1078 		}
1079 	}
1080 
1081 	if (spe->synth_opts.last_branch &&
1082 	    (spe->sample_branch || spe->sample_instructions))
1083 		arm_spe__prep_branch_stack(speq);
1084 
1085 	if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
1086 		err = arm_spe__synth_branch_sample(speq, spe->branch_id);
1087 		if (err)
1088 			return err;
1089 	}
1090 
1091 	if (spe->sample_remote_access &&
1092 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
1093 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
1094 						data_src);
1095 		if (err)
1096 			return err;
1097 	}
1098 
1099 	/*
1100 	 * When data_src is zero it means the record is not a memory operation,
1101 	 * skip to synthesize memory sample for this case.
1102 	 */
1103 	if (spe->sample_memory && is_ldst_op(record->op)) {
1104 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
1105 		if (err)
1106 			return err;
1107 	}
1108 
1109 	if (spe->sample_instructions) {
1110 		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
1111 		if (err)
1112 			return err;
1113 	}
1114 
1115 	return 0;
1116 }
1117 
1118 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
1119 {
1120 	struct arm_spe *spe = speq->spe;
1121 	struct arm_spe_record *record;
1122 	int ret;
1123 
1124 	if (!spe->kernel_start)
1125 		spe->kernel_start = machine__kernel_start(spe->machine);
1126 
1127 	while (1) {
1128 		/*
1129 		 * The usual logic is firstly to decode the packets, and then
1130 		 * based the record to synthesize sample; but here the flow is
1131 		 * reversed: it calls arm_spe_sample() for synthesizing samples
1132 		 * prior to arm_spe_decode().
1133 		 *
1134 		 * Two reasons for this code logic:
1135 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
1136 		 * has decoded trace data and generated a record, but the record
1137 		 * is left to generate sample until run to here, so it's correct
1138 		 * to synthesize sample for the left record.
1139 		 * 2. After decoding trace data, it needs to compare the record
1140 		 * timestamp with the coming perf event, if the record timestamp
1141 		 * is later than the perf event, it needs bail out and pushs the
1142 		 * record into auxtrace heap, thus the record can be deferred to
1143 		 * synthesize sample until run to here at the next time; so this
1144 		 * can correlate samples between Arm SPE trace data and other
1145 		 * perf events with correct time ordering.
1146 		 */
1147 
1148 		/*
1149 		 * Update pid/tid info.
1150 		 */
1151 		record = &speq->decoder->record;
1152 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
1153 			ret = arm_spe_set_tid(speq, record->context_id);
1154 			if (ret)
1155 				return ret;
1156 
1157 			spe->use_ctx_pkt_for_pid = true;
1158 		}
1159 
1160 		ret = arm_spe_sample(speq);
1161 		if (ret)
1162 			return ret;
1163 
1164 		ret = arm_spe_decode(speq->decoder);
1165 		if (!ret) {
1166 			pr_debug("No data or all data has been processed.\n");
1167 			return 1;
1168 		}
1169 
1170 		/*
1171 		 * Error is detected when decode SPE trace data, continue to
1172 		 * the next trace data and find out more records.
1173 		 */
1174 		if (ret < 0)
1175 			continue;
1176 
1177 		record = &speq->decoder->record;
1178 
1179 		/* Update timestamp for the last record */
1180 		if (record->timestamp > speq->timestamp)
1181 			speq->timestamp = record->timestamp;
1182 
1183 		/*
1184 		 * If the timestamp of the queue is later than timestamp of the
1185 		 * coming perf event, bail out so can allow the perf event to
1186 		 * be processed ahead.
1187 		 */
1188 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
1189 			*timestamp = speq->timestamp;
1190 			return 0;
1191 		}
1192 	}
1193 
1194 	return 0;
1195 }
1196 
1197 static int arm_spe__setup_queue(struct arm_spe *spe,
1198 			       struct auxtrace_queue *queue,
1199 			       unsigned int queue_nr)
1200 {
1201 	struct arm_spe_queue *speq = queue->priv;
1202 	struct arm_spe_record *record;
1203 
1204 	if (list_empty(&queue->head) || speq)
1205 		return 0;
1206 
1207 	speq = arm_spe__alloc_queue(spe, queue_nr);
1208 
1209 	if (!speq)
1210 		return -ENOMEM;
1211 
1212 	queue->priv = speq;
1213 
1214 	if (queue->cpu != -1)
1215 		speq->cpu = queue->cpu;
1216 
1217 	if (!speq->on_heap) {
1218 		int ret;
1219 
1220 		if (spe->timeless_decoding)
1221 			return 0;
1222 
1223 retry:
1224 		ret = arm_spe_decode(speq->decoder);
1225 
1226 		if (!ret)
1227 			return 0;
1228 
1229 		if (ret < 0)
1230 			goto retry;
1231 
1232 		record = &speq->decoder->record;
1233 
1234 		speq->timestamp = record->timestamp;
1235 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
1236 		if (ret)
1237 			return ret;
1238 		speq->on_heap = true;
1239 	}
1240 
1241 	return 0;
1242 }
1243 
1244 static int arm_spe__setup_queues(struct arm_spe *spe)
1245 {
1246 	unsigned int i;
1247 	int ret;
1248 
1249 	for (i = 0; i < spe->queues.nr_queues; i++) {
1250 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
1251 		if (ret)
1252 			return ret;
1253 	}
1254 
1255 	return 0;
1256 }
1257 
1258 static int arm_spe__update_queues(struct arm_spe *spe)
1259 {
1260 	if (spe->queues.new_data) {
1261 		spe->queues.new_data = false;
1262 		return arm_spe__setup_queues(spe);
1263 	}
1264 
1265 	return 0;
1266 }
1267 
1268 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
1269 {
1270 	struct evsel *evsel;
1271 	struct evlist *evlist = spe->session->evlist;
1272 	bool timeless_decoding = true;
1273 
1274 	/*
1275 	 * Circle through the list of event and complain if we find one
1276 	 * with the time bit set.
1277 	 */
1278 	evlist__for_each_entry(evlist, evsel) {
1279 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
1280 			timeless_decoding = false;
1281 	}
1282 
1283 	return timeless_decoding;
1284 }
1285 
1286 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
1287 {
1288 	unsigned int queue_nr;
1289 	u64 ts;
1290 	int ret;
1291 
1292 	while (1) {
1293 		struct auxtrace_queue *queue;
1294 		struct arm_spe_queue *speq;
1295 
1296 		if (!spe->heap.heap_cnt)
1297 			return 0;
1298 
1299 		if (spe->heap.heap_array[0].ordinal >= timestamp)
1300 			return 0;
1301 
1302 		queue_nr = spe->heap.heap_array[0].queue_nr;
1303 		queue = &spe->queues.queue_array[queue_nr];
1304 		speq = queue->priv;
1305 
1306 		auxtrace_heap__pop(&spe->heap);
1307 
1308 		if (spe->heap.heap_cnt) {
1309 			ts = spe->heap.heap_array[0].ordinal + 1;
1310 			if (ts > timestamp)
1311 				ts = timestamp;
1312 		} else {
1313 			ts = timestamp;
1314 		}
1315 
1316 		/*
1317 		 * A previous context-switch event has set pid/tid in the machine's context, so
1318 		 * here we need to update the pid/tid in the thread and SPE queue.
1319 		 */
1320 		if (!spe->use_ctx_pkt_for_pid)
1321 			arm_spe_set_pid_tid_cpu(spe, queue);
1322 
1323 		ret = arm_spe_run_decoder(speq, &ts);
1324 		if (ret < 0) {
1325 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
1326 			return ret;
1327 		}
1328 
1329 		if (!ret) {
1330 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
1331 			if (ret < 0)
1332 				return ret;
1333 		} else {
1334 			speq->on_heap = false;
1335 		}
1336 	}
1337 
1338 	return 0;
1339 }
1340 
1341 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
1342 					    u64 time_)
1343 {
1344 	struct auxtrace_queues *queues = &spe->queues;
1345 	unsigned int i;
1346 	u64 ts = 0;
1347 
1348 	for (i = 0; i < queues->nr_queues; i++) {
1349 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
1350 		struct arm_spe_queue *speq = queue->priv;
1351 
1352 		if (speq && (tid == -1 || speq->tid == tid)) {
1353 			speq->time = time_;
1354 			arm_spe_set_pid_tid_cpu(spe, queue);
1355 			arm_spe_run_decoder(speq, &ts);
1356 		}
1357 	}
1358 	return 0;
1359 }
1360 
1361 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
1362 				  struct perf_sample *sample)
1363 {
1364 	pid_t pid, tid;
1365 	int cpu;
1366 
1367 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
1368 		return 0;
1369 
1370 	pid = event->context_switch.next_prev_pid;
1371 	tid = event->context_switch.next_prev_tid;
1372 	cpu = sample->cpu;
1373 
1374 	if (tid == -1)
1375 		pr_warning("context_switch event has no tid\n");
1376 
1377 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
1378 }
1379 
1380 static int arm_spe_process_event(struct perf_session *session,
1381 				 union perf_event *event,
1382 				 struct perf_sample *sample,
1383 				 const struct perf_tool *tool)
1384 {
1385 	int err = 0;
1386 	u64 timestamp;
1387 	struct arm_spe *spe = container_of(session->auxtrace,
1388 			struct arm_spe, auxtrace);
1389 
1390 	if (dump_trace)
1391 		return 0;
1392 
1393 	if (!tool->ordered_events) {
1394 		pr_err("SPE trace requires ordered events\n");
1395 		return -EINVAL;
1396 	}
1397 
1398 	if (sample->time && (sample->time != (u64) -1))
1399 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1400 	else
1401 		timestamp = 0;
1402 
1403 	if (timestamp || spe->timeless_decoding) {
1404 		err = arm_spe__update_queues(spe);
1405 		if (err)
1406 			return err;
1407 	}
1408 
1409 	if (spe->timeless_decoding) {
1410 		if (event->header.type == PERF_RECORD_EXIT) {
1411 			err = arm_spe_process_timeless_queues(spe,
1412 					event->fork.tid,
1413 					sample->time);
1414 		}
1415 	} else if (timestamp) {
1416 		err = arm_spe_process_queues(spe, timestamp);
1417 		if (err)
1418 			return err;
1419 
1420 		if (!spe->use_ctx_pkt_for_pid &&
1421 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1422 		    event->header.type == PERF_RECORD_SWITCH))
1423 			err = arm_spe_context_switch(spe, event, sample);
1424 	}
1425 
1426 	return err;
1427 }
1428 
1429 static int arm_spe_process_auxtrace_event(struct perf_session *session,
1430 					  union perf_event *event,
1431 					  const struct perf_tool *tool __maybe_unused)
1432 {
1433 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1434 					     auxtrace);
1435 
1436 	if (!spe->data_queued) {
1437 		struct auxtrace_buffer *buffer;
1438 		off_t data_offset;
1439 		int fd = perf_data__fd(session->data);
1440 		int err;
1441 
1442 		if (perf_data__is_pipe(session->data)) {
1443 			data_offset = 0;
1444 		} else {
1445 			data_offset = lseek(fd, 0, SEEK_CUR);
1446 			if (data_offset == -1)
1447 				return -errno;
1448 		}
1449 
1450 		err = auxtrace_queues__add_event(&spe->queues, session, event,
1451 				data_offset, &buffer);
1452 		if (err)
1453 			return err;
1454 
1455 		/* Dump here now we have copied a piped trace out of the pipe */
1456 		if (dump_trace) {
1457 			if (auxtrace_buffer__get_data(buffer, fd)) {
1458 				arm_spe_dump_event(spe, buffer->data,
1459 						buffer->size);
1460 				auxtrace_buffer__put_data(buffer);
1461 			}
1462 		}
1463 	}
1464 
1465 	return 0;
1466 }
1467 
1468 static int arm_spe_flush(struct perf_session *session __maybe_unused,
1469 			 const struct perf_tool *tool __maybe_unused)
1470 {
1471 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1472 			auxtrace);
1473 	int ret;
1474 
1475 	if (dump_trace)
1476 		return 0;
1477 
1478 	if (!tool->ordered_events)
1479 		return -EINVAL;
1480 
1481 	ret = arm_spe__update_queues(spe);
1482 	if (ret < 0)
1483 		return ret;
1484 
1485 	if (spe->timeless_decoding)
1486 		return arm_spe_process_timeless_queues(spe, -1,
1487 				MAX_TIMESTAMP - 1);
1488 
1489 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1490 	if (ret)
1491 		return ret;
1492 
1493 	if (!spe->use_ctx_pkt_for_pid)
1494 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1495 			    "Matching of TIDs to SPE events could be inaccurate.\n");
1496 
1497 	return 0;
1498 }
1499 
1500 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1501 {
1502 	u64 *metadata;
1503 
1504 	metadata = zalloc(per_cpu_size);
1505 	if (!metadata)
1506 		return NULL;
1507 
1508 	memcpy(metadata, buf, per_cpu_size);
1509 	return metadata;
1510 }
1511 
1512 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1513 {
1514 	int i;
1515 
1516 	for (i = 0; i < nr_cpu; i++)
1517 		zfree(&metadata[i]);
1518 	free(metadata);
1519 }
1520 
1521 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1522 				     u64 *ver, int *nr_cpu)
1523 {
1524 	u64 *ptr = (u64 *)info->priv;
1525 	u64 metadata_size;
1526 	u64 **metadata = NULL;
1527 	int hdr_sz, per_cpu_sz, i;
1528 
1529 	metadata_size = info->header.size -
1530 		sizeof(struct perf_record_auxtrace_info);
1531 
1532 	/* Metadata version 1 */
1533 	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1534 		*ver = 1;
1535 		*nr_cpu = 0;
1536 		/* No per CPU metadata */
1537 		return NULL;
1538 	}
1539 
1540 	*ver = ptr[ARM_SPE_HEADER_VERSION];
1541 	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1542 	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1543 
1544 	metadata = calloc(*nr_cpu, sizeof(*metadata));
1545 	if (!metadata)
1546 		return NULL;
1547 
1548 	/* Locate the start address of per CPU metadata */
1549 	ptr += hdr_sz;
1550 	per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1551 
1552 	for (i = 0; i < *nr_cpu; i++) {
1553 		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1554 		if (!metadata[i])
1555 			goto err_per_cpu_metadata;
1556 
1557 		ptr += per_cpu_sz / sizeof(u64);
1558 	}
1559 
1560 	return metadata;
1561 
1562 err_per_cpu_metadata:
1563 	arm_spe__free_metadata(metadata, *nr_cpu);
1564 	return NULL;
1565 }
1566 
1567 static void arm_spe_free_queue(void *priv)
1568 {
1569 	struct arm_spe_queue *speq = priv;
1570 
1571 	if (!speq)
1572 		return;
1573 	thread__zput(speq->thread);
1574 	arm_spe_decoder_free(speq->decoder);
1575 	zfree(&speq->event_buf);
1576 	zfree(&speq->last_branch);
1577 	free(speq);
1578 }
1579 
1580 static void arm_spe_free_events(struct perf_session *session)
1581 {
1582 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1583 					     auxtrace);
1584 	struct auxtrace_queues *queues = &spe->queues;
1585 	unsigned int i;
1586 
1587 	for (i = 0; i < queues->nr_queues; i++) {
1588 		arm_spe_free_queue(queues->queue_array[i].priv);
1589 		queues->queue_array[i].priv = NULL;
1590 	}
1591 	auxtrace_queues__free(queues);
1592 }
1593 
1594 static void arm_spe_free(struct perf_session *session)
1595 {
1596 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1597 					     auxtrace);
1598 
1599 	auxtrace_heap__free(&spe->heap);
1600 	arm_spe_free_events(session);
1601 	session->auxtrace = NULL;
1602 	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1603 	free(spe);
1604 }
1605 
1606 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1607 				      struct evsel *evsel)
1608 {
1609 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1610 
1611 	return evsel->core.attr.type == spe->pmu_type;
1612 }
1613 
1614 static const char * const metadata_hdr_v1_fmts[] = {
1615 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           :%"PRId64"\n",
1616 	[ARM_SPE_PER_CPU_MMAPS]		= "  Per CPU mmaps      :%"PRId64"\n",
1617 };
1618 
1619 static const char * const metadata_hdr_fmts[] = {
1620 	[ARM_SPE_HEADER_VERSION]	= "  Header version     :%"PRId64"\n",
1621 	[ARM_SPE_HEADER_SIZE]		= "  Header size        :%"PRId64"\n",
1622 	[ARM_SPE_PMU_TYPE_V2]		= "  PMU type v2        :%"PRId64"\n",
1623 	[ARM_SPE_CPUS_NUM]		= "  CPU number         :%"PRId64"\n",
1624 };
1625 
1626 static const char * const metadata_per_cpu_fmts[] = {
1627 	[ARM_SPE_MAGIC]			= "    Magic            :0x%"PRIx64"\n",
1628 	[ARM_SPE_CPU]			= "    CPU #            :%"PRId64"\n",
1629 	[ARM_SPE_CPU_NR_PARAMS]		= "    Num of params    :%"PRId64"\n",
1630 	[ARM_SPE_CPU_MIDR]		= "    MIDR             :0x%"PRIx64"\n",
1631 	[ARM_SPE_CPU_PMU_TYPE]		= "    PMU Type         :%"PRId64"\n",
1632 	[ARM_SPE_CAP_MIN_IVAL]		= "    Min Interval     :%"PRId64"\n",
1633 	[ARM_SPE_CAP_EVENT_FILTER]	= "    Event Filter     :0x%"PRIx64"\n",
1634 };
1635 
1636 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
1637 {
1638 	unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1639 	const char * const *hdr_fmts;
1640 
1641 	if (!dump_trace)
1642 		return;
1643 
1644 	if (spe->metadata_ver == 1) {
1645 		cpu_num = 0;
1646 		hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1647 		hdr_fmts = metadata_hdr_v1_fmts;
1648 	} else {
1649 		cpu_num = arr[ARM_SPE_CPUS_NUM];
1650 		hdr_size = arr[ARM_SPE_HEADER_SIZE];
1651 		hdr_fmts = metadata_hdr_fmts;
1652 	}
1653 
1654 	for (i = 0; i < hdr_size; i++)
1655 		fprintf(stdout, hdr_fmts[i], arr[i]);
1656 
1657 	arr += hdr_size;
1658 	for (cpu = 0; cpu < cpu_num; cpu++) {
1659 		/*
1660 		 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1661 		 * are fixed. The sequential parameter size is decided by the
1662 		 * field 'ARM_SPE_CPU_NR_PARAMS'.
1663 		 */
1664 		cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1665 		for (i = 0; i < cpu_size; i++)
1666 			fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1667 		arr += cpu_size;
1668 	}
1669 }
1670 
1671 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1672 				    const char *name)
1673 {
1674 	struct evsel *evsel;
1675 
1676 	evlist__for_each_entry(evlist, evsel) {
1677 		if (evsel->core.id && evsel->core.id[0] == id) {
1678 			if (evsel->name)
1679 				zfree(&evsel->name);
1680 			evsel->name = strdup(name);
1681 			break;
1682 		}
1683 	}
1684 }
1685 
1686 static int
1687 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1688 {
1689 	struct evlist *evlist = session->evlist;
1690 	struct evsel *evsel;
1691 	struct perf_event_attr attr;
1692 	bool found = false;
1693 	u64 id;
1694 	int err;
1695 
1696 	evlist__for_each_entry(evlist, evsel) {
1697 		if (evsel->core.attr.type == spe->pmu_type) {
1698 			found = true;
1699 			break;
1700 		}
1701 	}
1702 
1703 	if (!found) {
1704 		pr_debug("No selected events with SPE trace data\n");
1705 		return 0;
1706 	}
1707 
1708 	memset(&attr, 0, sizeof(struct perf_event_attr));
1709 	attr.size = sizeof(struct perf_event_attr);
1710 	attr.type = PERF_TYPE_HARDWARE;
1711 	attr.sample_type = evsel->core.attr.sample_type &
1712 				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1713 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1714 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1715 			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1716 	if (spe->timeless_decoding)
1717 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1718 	else
1719 		attr.sample_type |= PERF_SAMPLE_TIME;
1720 
1721 	spe->sample_type = attr.sample_type;
1722 
1723 	attr.exclude_user = evsel->core.attr.exclude_user;
1724 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1725 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1726 	attr.exclude_host = evsel->core.attr.exclude_host;
1727 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1728 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1729 	attr.read_format = evsel->core.attr.read_format;
1730 	attr.sample_period = spe->synth_opts.period;
1731 
1732 	/* create new id val to be a fixed offset from evsel id */
1733 	id = auxtrace_synth_id_range_start(evsel);
1734 
1735 	if (spe->synth_opts.flc) {
1736 		spe->sample_flc = true;
1737 
1738 		/* Level 1 data cache miss */
1739 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1740 		if (err)
1741 			return err;
1742 		spe->l1d_miss_id = id;
1743 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1744 		id += 1;
1745 
1746 		/* Level 1 data cache access */
1747 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1748 		if (err)
1749 			return err;
1750 		spe->l1d_access_id = id;
1751 		arm_spe_set_event_name(evlist, id, "l1d-access");
1752 		id += 1;
1753 	}
1754 
1755 	if (spe->synth_opts.llc) {
1756 		spe->sample_llc = true;
1757 
1758 		/* Last level cache miss */
1759 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1760 		if (err)
1761 			return err;
1762 		spe->llc_miss_id = id;
1763 		arm_spe_set_event_name(evlist, id, "llc-miss");
1764 		id += 1;
1765 
1766 		/* Last level cache access */
1767 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1768 		if (err)
1769 			return err;
1770 		spe->llc_access_id = id;
1771 		arm_spe_set_event_name(evlist, id, "llc-access");
1772 		id += 1;
1773 	}
1774 
1775 	if (spe->synth_opts.tlb) {
1776 		spe->sample_tlb = true;
1777 
1778 		/* TLB miss */
1779 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1780 		if (err)
1781 			return err;
1782 		spe->tlb_miss_id = id;
1783 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1784 		id += 1;
1785 
1786 		/* TLB access */
1787 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1788 		if (err)
1789 			return err;
1790 		spe->tlb_access_id = id;
1791 		arm_spe_set_event_name(evlist, id, "tlb-access");
1792 		id += 1;
1793 	}
1794 
1795 	if (spe->synth_opts.last_branch) {
1796 		if (spe->synth_opts.last_branch_sz > 2)
1797 			pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n");
1798 
1799 		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1800 		/*
1801 		 * We don't use the hardware index, but the sample generation
1802 		 * code uses the new format branch_stack with this field,
1803 		 * so the event attributes must indicate that it's present.
1804 		 */
1805 		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1806 	}
1807 
1808 	if (spe->synth_opts.branches) {
1809 		spe->sample_branch = true;
1810 
1811 		/* Branch */
1812 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1813 		if (err)
1814 			return err;
1815 		spe->branch_id = id;
1816 		arm_spe_set_event_name(evlist, id, "branch");
1817 		id += 1;
1818 	}
1819 
1820 	if (spe->synth_opts.remote_access) {
1821 		spe->sample_remote_access = true;
1822 
1823 		/* Remote access */
1824 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1825 		if (err)
1826 			return err;
1827 		spe->remote_access_id = id;
1828 		arm_spe_set_event_name(evlist, id, "remote-access");
1829 		id += 1;
1830 	}
1831 
1832 	if (spe->synth_opts.mem) {
1833 		spe->sample_memory = true;
1834 
1835 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1836 		if (err)
1837 			return err;
1838 		spe->memory_id = id;
1839 		arm_spe_set_event_name(evlist, id, "memory");
1840 		id += 1;
1841 	}
1842 
1843 	if (spe->synth_opts.instructions) {
1844 		spe->sample_instructions = true;
1845 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1846 
1847 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1848 		if (err)
1849 			return err;
1850 		spe->instructions_id = id;
1851 		arm_spe_set_event_name(evlist, id, "instructions");
1852 	}
1853 
1854 	return 0;
1855 }
1856 
1857 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1858 {
1859 	u64 midr;
1860 	int i;
1861 
1862 	if (!nr_cpu)
1863 		return false;
1864 
1865 	for (i = 0; i < nr_cpu; i++) {
1866 		if (!metadata[i])
1867 			return false;
1868 
1869 		if (i == 0) {
1870 			midr = metadata[i][ARM_SPE_CPU_MIDR];
1871 			continue;
1872 		}
1873 
1874 		if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1875 			return false;
1876 	}
1877 
1878 	return true;
1879 }
1880 
1881 int arm_spe_process_auxtrace_info(union perf_event *event,
1882 				  struct perf_session *session)
1883 {
1884 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1885 	size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1886 	struct perf_record_time_conv *tc = &session->time_conv;
1887 	struct arm_spe *spe;
1888 	u64 **metadata = NULL;
1889 	u64 metadata_ver;
1890 	int nr_cpu, err;
1891 
1892 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1893 					min_sz)
1894 		return -EINVAL;
1895 
1896 	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1897 					   &nr_cpu);
1898 	if (!metadata && metadata_ver != 1) {
1899 		pr_err("Failed to parse Arm SPE metadata.\n");
1900 		return -EINVAL;
1901 	}
1902 
1903 	spe = zalloc(sizeof(struct arm_spe));
1904 	if (!spe) {
1905 		err = -ENOMEM;
1906 		goto err_free_metadata;
1907 	}
1908 
1909 	err = auxtrace_queues__init(&spe->queues);
1910 	if (err)
1911 		goto err_free;
1912 
1913 	spe->session = session;
1914 	spe->machine = &session->machines.host; /* No kvm support */
1915 	spe->auxtrace_type = auxtrace_info->type;
1916 	if (metadata_ver == 1)
1917 		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1918 	else
1919 		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1920 	spe->metadata = metadata;
1921 	spe->metadata_ver = metadata_ver;
1922 	spe->metadata_nr_cpu = nr_cpu;
1923 	spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1924 
1925 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1926 
1927 	/*
1928 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1929 	 * and the parameters for hardware clock are stored in the session
1930 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1931 	 * in "spe->tc", which is used for later conversion between clock
1932 	 * counter and timestamp.
1933 	 *
1934 	 * For backward compatibility, copies the fields starting from
1935 	 * "time_cycles" only if they are contained in the event.
1936 	 */
1937 	spe->tc.time_shift = tc->time_shift;
1938 	spe->tc.time_mult = tc->time_mult;
1939 	spe->tc.time_zero = tc->time_zero;
1940 
1941 	if (event_contains(*tc, time_cycles)) {
1942 		spe->tc.time_cycles = tc->time_cycles;
1943 		spe->tc.time_mask = tc->time_mask;
1944 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1945 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1946 	}
1947 
1948 	spe->auxtrace.process_event = arm_spe_process_event;
1949 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1950 	spe->auxtrace.flush_events = arm_spe_flush;
1951 	spe->auxtrace.free_events = arm_spe_free_events;
1952 	spe->auxtrace.free = arm_spe_free;
1953 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1954 	session->auxtrace = &spe->auxtrace;
1955 
1956 	arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1957 
1958 	if (dump_trace)
1959 		return 0;
1960 
1961 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
1962 		spe->synth_opts = *session->itrace_synth_opts;
1963 	} else {
1964 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1965 		/* Default nanoseconds period not supported */
1966 		spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
1967 		spe->synth_opts.period = 1;
1968 	}
1969 
1970 	if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1971 		ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n");
1972 		err = -EINVAL;
1973 		goto err_free_queues;
1974 	}
1975 	if (spe->synth_opts.period > 1)
1976 		ui__warning("Arm SPE has a hardware-based sampling period.\n\n"
1977 			    "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n");
1978 
1979 	err = arm_spe_synth_events(spe, session);
1980 	if (err)
1981 		goto err_free_queues;
1982 
1983 	err = auxtrace_queues__process_index(&spe->queues, session);
1984 	if (err)
1985 		goto err_free_queues;
1986 
1987 	if (spe->queues.populated)
1988 		spe->data_queued = true;
1989 
1990 	return 0;
1991 
1992 err_free_queues:
1993 	auxtrace_queues__free(&spe->queues);
1994 	session->auxtrace = NULL;
1995 err_free:
1996 	free(spe);
1997 err_free_metadata:
1998 	arm_spe__free_metadata(metadata, nr_cpu);
1999 	return err;
2000 }
2001