xref: /linux/tools/perf/util/arm-spe.c (revision c34e9ab9a612ee8b18273398ef75c207b01f516d)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #include "../../arch/arm64/include/asm/cputype.h"
38 #define MAX_TIMESTAMP (~0ULL)
39 
40 struct arm_spe {
41 	struct auxtrace			auxtrace;
42 	struct auxtrace_queues		queues;
43 	struct auxtrace_heap		heap;
44 	struct itrace_synth_opts        synth_opts;
45 	u32				auxtrace_type;
46 	struct perf_session		*session;
47 	struct machine			*machine;
48 	u32				pmu_type;
49 
50 	struct perf_tsc_conversion	tc;
51 
52 	u8				timeless_decoding;
53 	u8				data_queued;
54 
55 	u64				sample_type;
56 	u8				sample_flc;
57 	u8				sample_llc;
58 	u8				sample_tlb;
59 	u8				sample_branch;
60 	u8				sample_remote_access;
61 	u8				sample_memory;
62 	u8				sample_instructions;
63 	u64				instructions_sample_period;
64 
65 	u64				l1d_miss_id;
66 	u64				l1d_access_id;
67 	u64				llc_miss_id;
68 	u64				llc_access_id;
69 	u64				tlb_miss_id;
70 	u64				tlb_access_id;
71 	u64				branch_id;
72 	u64				remote_access_id;
73 	u64				memory_id;
74 	u64				instructions_id;
75 
76 	u64				kernel_start;
77 
78 	unsigned long			num_events;
79 	u8				use_ctx_pkt_for_pid;
80 
81 	u64				**metadata;
82 	u64				metadata_ver;
83 	u64				metadata_nr_cpu;
84 	bool				is_homogeneous;
85 };
86 
87 struct arm_spe_queue {
88 	struct arm_spe			*spe;
89 	unsigned int			queue_nr;
90 	struct auxtrace_buffer		*buffer;
91 	struct auxtrace_buffer		*old_buffer;
92 	union perf_event		*event_buf;
93 	bool				on_heap;
94 	bool				done;
95 	pid_t				pid;
96 	pid_t				tid;
97 	int				cpu;
98 	struct arm_spe_decoder		*decoder;
99 	u64				time;
100 	u64				timestamp;
101 	struct thread			*thread;
102 	u64				period_instructions;
103 	u32				flags;
104 };
105 
106 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
107 			 unsigned char *buf, size_t len)
108 {
109 	struct arm_spe_pkt packet;
110 	size_t pos = 0;
111 	int ret, pkt_len, i;
112 	char desc[ARM_SPE_PKT_DESC_MAX];
113 	const char *color = PERF_COLOR_BLUE;
114 
115 	color_fprintf(stdout, color,
116 		      ". ... ARM SPE data: size %#zx bytes\n",
117 		      len);
118 
119 	while (len) {
120 		ret = arm_spe_get_packet(buf, len, &packet);
121 		if (ret > 0)
122 			pkt_len = ret;
123 		else
124 			pkt_len = 1;
125 		printf(".");
126 		color_fprintf(stdout, color, "  %08zx: ", pos);
127 		for (i = 0; i < pkt_len; i++)
128 			color_fprintf(stdout, color, " %02x", buf[i]);
129 		for (; i < 16; i++)
130 			color_fprintf(stdout, color, "   ");
131 		if (ret > 0) {
132 			ret = arm_spe_pkt_desc(&packet, desc,
133 					       ARM_SPE_PKT_DESC_MAX);
134 			if (!ret)
135 				color_fprintf(stdout, color, " %s\n", desc);
136 		} else {
137 			color_fprintf(stdout, color, " Bad packet!\n");
138 		}
139 		pos += pkt_len;
140 		buf += pkt_len;
141 		len -= pkt_len;
142 	}
143 }
144 
145 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
146 			       size_t len)
147 {
148 	printf(".\n");
149 	arm_spe_dump(spe, buf, len);
150 }
151 
152 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
153 {
154 	struct arm_spe_queue *speq = data;
155 	struct auxtrace_buffer *buffer = speq->buffer;
156 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
157 	struct auxtrace_queue *queue;
158 
159 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
160 
161 	buffer = auxtrace_buffer__next(queue, buffer);
162 	/* If no more data, drop the previous auxtrace_buffer and return */
163 	if (!buffer) {
164 		if (old_buffer)
165 			auxtrace_buffer__drop_data(old_buffer);
166 		b->len = 0;
167 		return 0;
168 	}
169 
170 	speq->buffer = buffer;
171 
172 	/* If the aux_buffer doesn't have data associated, try to load it */
173 	if (!buffer->data) {
174 		/* get the file desc associated with the perf data file */
175 		int fd = perf_data__fd(speq->spe->session->data);
176 
177 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
178 		if (!buffer->data)
179 			return -ENOMEM;
180 	}
181 
182 	b->len = buffer->size;
183 	b->buf = buffer->data;
184 
185 	if (b->len) {
186 		if (old_buffer)
187 			auxtrace_buffer__drop_data(old_buffer);
188 		speq->old_buffer = buffer;
189 	} else {
190 		auxtrace_buffer__drop_data(buffer);
191 		return arm_spe_get_trace(b, data);
192 	}
193 
194 	return 0;
195 }
196 
197 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
198 		unsigned int queue_nr)
199 {
200 	struct arm_spe_params params = { .get_trace = 0, };
201 	struct arm_spe_queue *speq;
202 
203 	speq = zalloc(sizeof(*speq));
204 	if (!speq)
205 		return NULL;
206 
207 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
208 	if (!speq->event_buf)
209 		goto out_free;
210 
211 	speq->spe = spe;
212 	speq->queue_nr = queue_nr;
213 	speq->pid = -1;
214 	speq->tid = -1;
215 	speq->cpu = -1;
216 	speq->period_instructions = 0;
217 
218 	/* params set */
219 	params.get_trace = arm_spe_get_trace;
220 	params.data = speq;
221 
222 	/* create new decoder */
223 	speq->decoder = arm_spe_decoder_new(&params);
224 	if (!speq->decoder)
225 		goto out_free;
226 
227 	return speq;
228 
229 out_free:
230 	zfree(&speq->event_buf);
231 	free(speq);
232 
233 	return NULL;
234 }
235 
236 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
237 {
238 	return ip >= spe->kernel_start ?
239 		PERF_RECORD_MISC_KERNEL :
240 		PERF_RECORD_MISC_USER;
241 }
242 
243 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
244 				    struct auxtrace_queue *queue)
245 {
246 	struct arm_spe_queue *speq = queue->priv;
247 	pid_t tid;
248 
249 	tid = machine__get_current_tid(spe->machine, speq->cpu);
250 	if (tid != -1) {
251 		speq->tid = tid;
252 		thread__zput(speq->thread);
253 	} else
254 		speq->tid = queue->tid;
255 
256 	if ((!speq->thread) && (speq->tid != -1)) {
257 		speq->thread = machine__find_thread(spe->machine, -1,
258 						    speq->tid);
259 	}
260 
261 	if (speq->thread) {
262 		speq->pid = thread__pid(speq->thread);
263 		if (queue->cpu == -1)
264 			speq->cpu = thread__cpu(speq->thread);
265 	}
266 }
267 
268 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
269 {
270 	struct arm_spe *spe = speq->spe;
271 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
272 
273 	if (err)
274 		return err;
275 
276 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
277 
278 	return 0;
279 }
280 
281 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
282 {
283 	u64 i;
284 
285 	if (!spe->metadata)
286 		return NULL;
287 
288 	for (i = 0; i < spe->metadata_nr_cpu; i++)
289 		if (spe->metadata[i][ARM_SPE_CPU] == cpu)
290 			return spe->metadata[i];
291 
292 	return NULL;
293 }
294 
295 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
296 {
297 	struct simd_flags simd_flags = {};
298 
299 	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
300 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
301 
302 	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
303 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
304 
305 	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
306 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
307 
308 	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
309 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
310 
311 	return simd_flags;
312 }
313 
314 static void arm_spe_prep_sample(struct arm_spe *spe,
315 				struct arm_spe_queue *speq,
316 				union perf_event *event,
317 				struct perf_sample *sample)
318 {
319 	struct arm_spe_record *record = &speq->decoder->record;
320 
321 	if (!spe->timeless_decoding)
322 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
323 
324 	sample->ip = record->from_ip;
325 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
326 	sample->pid = speq->pid;
327 	sample->tid = speq->tid;
328 	sample->period = 1;
329 	sample->cpu = speq->cpu;
330 	sample->simd_flags = arm_spe__synth_simd_flags(record);
331 
332 	event->sample.header.type = PERF_RECORD_SAMPLE;
333 	event->sample.header.misc = sample->cpumode;
334 	event->sample.header.size = sizeof(struct perf_event_header);
335 }
336 
337 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
338 {
339 	event->header.size = perf_event__sample_event_size(sample, type, 0);
340 	return perf_event__synthesize_sample(event, type, 0, sample);
341 }
342 
343 static inline int
344 arm_spe_deliver_synth_event(struct arm_spe *spe,
345 			    struct arm_spe_queue *speq __maybe_unused,
346 			    union perf_event *event,
347 			    struct perf_sample *sample)
348 {
349 	int ret;
350 
351 	if (spe->synth_opts.inject) {
352 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
353 		if (ret)
354 			return ret;
355 	}
356 
357 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
358 	if (ret)
359 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
360 
361 	return ret;
362 }
363 
364 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
365 				     u64 spe_events_id, u64 data_src)
366 {
367 	struct arm_spe *spe = speq->spe;
368 	struct arm_spe_record *record = &speq->decoder->record;
369 	union perf_event *event = speq->event_buf;
370 	struct perf_sample sample = { .ip = 0, };
371 
372 	arm_spe_prep_sample(spe, speq, event, &sample);
373 
374 	sample.id = spe_events_id;
375 	sample.stream_id = spe_events_id;
376 	sample.addr = record->virt_addr;
377 	sample.phys_addr = record->phys_addr;
378 	sample.data_src = data_src;
379 	sample.weight = record->latency;
380 
381 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
382 }
383 
384 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
385 					u64 spe_events_id)
386 {
387 	struct arm_spe *spe = speq->spe;
388 	struct arm_spe_record *record = &speq->decoder->record;
389 	union perf_event *event = speq->event_buf;
390 	struct perf_sample sample = { .ip = 0, };
391 
392 	arm_spe_prep_sample(spe, speq, event, &sample);
393 
394 	sample.id = spe_events_id;
395 	sample.stream_id = spe_events_id;
396 	sample.addr = record->to_ip;
397 	sample.weight = record->latency;
398 	sample.flags = speq->flags;
399 
400 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
401 }
402 
403 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
404 					     u64 spe_events_id, u64 data_src)
405 {
406 	struct arm_spe *spe = speq->spe;
407 	struct arm_spe_record *record = &speq->decoder->record;
408 	union perf_event *event = speq->event_buf;
409 	struct perf_sample sample = { .ip = 0, };
410 
411 	/*
412 	 * Handles perf instruction sampling period.
413 	 */
414 	speq->period_instructions++;
415 	if (speq->period_instructions < spe->instructions_sample_period)
416 		return 0;
417 	speq->period_instructions = 0;
418 
419 	arm_spe_prep_sample(spe, speq, event, &sample);
420 
421 	sample.id = spe_events_id;
422 	sample.stream_id = spe_events_id;
423 	sample.addr = record->to_ip;
424 	sample.phys_addr = record->phys_addr;
425 	sample.data_src = data_src;
426 	sample.period = spe->instructions_sample_period;
427 	sample.weight = record->latency;
428 	sample.flags = speq->flags;
429 
430 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
431 }
432 
433 static const struct midr_range common_ds_encoding_cpus[] = {
434 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
435 	MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
436 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
437 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
438 	MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
439 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
440 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
441 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
442 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
443 	{},
444 };
445 
446 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
447 {
448 	const struct arm_spe_record *record = &speq->decoder->record;
449 
450 	speq->flags = 0;
451 	if (record->op & ARM_SPE_OP_BRANCH_ERET) {
452 		speq->flags = PERF_IP_FLAG_BRANCH;
453 
454 		if (record->type & ARM_SPE_BRANCH_MISS)
455 			speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
456 	}
457 }
458 
459 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
460 					      union perf_mem_data_src *data_src)
461 {
462 	/*
463 	 * Even though four levels of cache hierarchy are possible, no known
464 	 * production Neoverse systems currently include more than three levels
465 	 * so for the time being we assume three exist. If a production system
466 	 * is built with four the this function would have to be changed to
467 	 * detect the number of levels for reporting.
468 	 */
469 
470 	/*
471 	 * We have no data on the hit level or data source for stores in the
472 	 * Neoverse SPE records.
473 	 */
474 	if (record->op & ARM_SPE_OP_ST) {
475 		data_src->mem_lvl = PERF_MEM_LVL_NA;
476 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
477 		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
478 		return;
479 	}
480 
481 	switch (record->source) {
482 	case ARM_SPE_COMMON_DS_L1D:
483 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
484 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
485 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
486 		break;
487 	case ARM_SPE_COMMON_DS_L2:
488 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
489 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
490 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
491 		break;
492 	case ARM_SPE_COMMON_DS_PEER_CORE:
493 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
494 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
495 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
496 		break;
497 	/*
498 	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
499 	 * transfer, so set SNOOPX_PEER
500 	 */
501 	case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
502 	case ARM_SPE_COMMON_DS_PEER_CLUSTER:
503 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
504 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
505 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
506 		break;
507 	/*
508 	 * System cache is assumed to be L3
509 	 */
510 	case ARM_SPE_COMMON_DS_SYS_CACHE:
511 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
512 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
513 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
514 		break;
515 	/*
516 	 * We don't know what level it hit in, except it came from the other
517 	 * socket
518 	 */
519 	case ARM_SPE_COMMON_DS_REMOTE:
520 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
521 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
522 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
523 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
524 		break;
525 	case ARM_SPE_COMMON_DS_DRAM:
526 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
527 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
528 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
529 		break;
530 	default:
531 		break;
532 	}
533 }
534 
535 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
536 					union perf_mem_data_src *data_src)
537 {
538 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
539 		data_src->mem_lvl = PERF_MEM_LVL_L3;
540 
541 		if (record->type & ARM_SPE_LLC_MISS)
542 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
543 		else
544 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
545 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
546 		data_src->mem_lvl = PERF_MEM_LVL_L1;
547 
548 		if (record->type & ARM_SPE_L1D_MISS)
549 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
550 		else
551 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
552 	}
553 
554 	if (record->type & ARM_SPE_REMOTE_ACCESS)
555 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
556 }
557 
558 static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
559 {
560 	struct arm_spe *spe = speq->spe;
561 	bool is_in_cpu_list;
562 	u64 *metadata = NULL;
563 	u64 midr = 0;
564 
565 	/* Metadata version 1 assumes all CPUs are the same (old behavior) */
566 	if (spe->metadata_ver == 1) {
567 		const char *cpuid;
568 
569 		pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
570 		cpuid = perf_env__cpuid(spe->session->evlist->env);
571 		midr = strtol(cpuid, NULL, 16);
572 	} else {
573 		/* CPU ID is -1 for per-thread mode */
574 		if (speq->cpu < 0) {
575 			/*
576 			 * On the heterogeneous system, due to CPU ID is -1,
577 			 * cannot confirm the data source packet is supported.
578 			 */
579 			if (!spe->is_homogeneous)
580 				return false;
581 
582 			/* In homogeneous system, simply use CPU0's metadata */
583 			if (spe->metadata)
584 				metadata = spe->metadata[0];
585 		} else {
586 			metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
587 		}
588 
589 		if (!metadata)
590 			return false;
591 
592 		midr = metadata[ARM_SPE_CPU_MIDR];
593 	}
594 
595 	is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
596 	if (is_in_cpu_list)
597 		return true;
598 	else
599 		return false;
600 }
601 
602 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
603 				      const struct arm_spe_record *record)
604 {
605 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
606 	bool is_common = arm_spe__is_common_ds_encoding(speq);
607 
608 	if (record->op & ARM_SPE_OP_LD)
609 		data_src.mem_op = PERF_MEM_OP_LOAD;
610 	else if (record->op & ARM_SPE_OP_ST)
611 		data_src.mem_op = PERF_MEM_OP_STORE;
612 	else
613 		return 0;
614 
615 	if (is_common)
616 		arm_spe__synth_data_source_common(record, &data_src);
617 	else
618 		arm_spe__synth_memory_level(record, &data_src);
619 
620 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
621 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
622 
623 		if (record->type & ARM_SPE_TLB_MISS)
624 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
625 		else
626 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
627 	}
628 
629 	return data_src.val;
630 }
631 
632 static int arm_spe_sample(struct arm_spe_queue *speq)
633 {
634 	const struct arm_spe_record *record = &speq->decoder->record;
635 	struct arm_spe *spe = speq->spe;
636 	u64 data_src;
637 	int err;
638 
639 	arm_spe__sample_flags(speq);
640 	data_src = arm_spe__synth_data_source(speq, record);
641 
642 	if (spe->sample_flc) {
643 		if (record->type & ARM_SPE_L1D_MISS) {
644 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
645 							data_src);
646 			if (err)
647 				return err;
648 		}
649 
650 		if (record->type & ARM_SPE_L1D_ACCESS) {
651 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
652 							data_src);
653 			if (err)
654 				return err;
655 		}
656 	}
657 
658 	if (spe->sample_llc) {
659 		if (record->type & ARM_SPE_LLC_MISS) {
660 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
661 							data_src);
662 			if (err)
663 				return err;
664 		}
665 
666 		if (record->type & ARM_SPE_LLC_ACCESS) {
667 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
668 							data_src);
669 			if (err)
670 				return err;
671 		}
672 	}
673 
674 	if (spe->sample_tlb) {
675 		if (record->type & ARM_SPE_TLB_MISS) {
676 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
677 							data_src);
678 			if (err)
679 				return err;
680 		}
681 
682 		if (record->type & ARM_SPE_TLB_ACCESS) {
683 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
684 							data_src);
685 			if (err)
686 				return err;
687 		}
688 	}
689 
690 	if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
691 		err = arm_spe__synth_branch_sample(speq, spe->branch_id);
692 		if (err)
693 			return err;
694 	}
695 
696 	if (spe->sample_remote_access &&
697 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
698 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
699 						data_src);
700 		if (err)
701 			return err;
702 	}
703 
704 	/*
705 	 * When data_src is zero it means the record is not a memory operation,
706 	 * skip to synthesize memory sample for this case.
707 	 */
708 	if (spe->sample_memory && data_src) {
709 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
710 		if (err)
711 			return err;
712 	}
713 
714 	if (spe->sample_instructions) {
715 		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
716 		if (err)
717 			return err;
718 	}
719 
720 	return 0;
721 }
722 
723 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
724 {
725 	struct arm_spe *spe = speq->spe;
726 	struct arm_spe_record *record;
727 	int ret;
728 
729 	if (!spe->kernel_start)
730 		spe->kernel_start = machine__kernel_start(spe->machine);
731 
732 	while (1) {
733 		/*
734 		 * The usual logic is firstly to decode the packets, and then
735 		 * based the record to synthesize sample; but here the flow is
736 		 * reversed: it calls arm_spe_sample() for synthesizing samples
737 		 * prior to arm_spe_decode().
738 		 *
739 		 * Two reasons for this code logic:
740 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
741 		 * has decoded trace data and generated a record, but the record
742 		 * is left to generate sample until run to here, so it's correct
743 		 * to synthesize sample for the left record.
744 		 * 2. After decoding trace data, it needs to compare the record
745 		 * timestamp with the coming perf event, if the record timestamp
746 		 * is later than the perf event, it needs bail out and pushs the
747 		 * record into auxtrace heap, thus the record can be deferred to
748 		 * synthesize sample until run to here at the next time; so this
749 		 * can correlate samples between Arm SPE trace data and other
750 		 * perf events with correct time ordering.
751 		 */
752 
753 		/*
754 		 * Update pid/tid info.
755 		 */
756 		record = &speq->decoder->record;
757 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
758 			ret = arm_spe_set_tid(speq, record->context_id);
759 			if (ret)
760 				return ret;
761 
762 			spe->use_ctx_pkt_for_pid = true;
763 		}
764 
765 		ret = arm_spe_sample(speq);
766 		if (ret)
767 			return ret;
768 
769 		ret = arm_spe_decode(speq->decoder);
770 		if (!ret) {
771 			pr_debug("No data or all data has been processed.\n");
772 			return 1;
773 		}
774 
775 		/*
776 		 * Error is detected when decode SPE trace data, continue to
777 		 * the next trace data and find out more records.
778 		 */
779 		if (ret < 0)
780 			continue;
781 
782 		record = &speq->decoder->record;
783 
784 		/* Update timestamp for the last record */
785 		if (record->timestamp > speq->timestamp)
786 			speq->timestamp = record->timestamp;
787 
788 		/*
789 		 * If the timestamp of the queue is later than timestamp of the
790 		 * coming perf event, bail out so can allow the perf event to
791 		 * be processed ahead.
792 		 */
793 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
794 			*timestamp = speq->timestamp;
795 			return 0;
796 		}
797 	}
798 
799 	return 0;
800 }
801 
802 static int arm_spe__setup_queue(struct arm_spe *spe,
803 			       struct auxtrace_queue *queue,
804 			       unsigned int queue_nr)
805 {
806 	struct arm_spe_queue *speq = queue->priv;
807 	struct arm_spe_record *record;
808 
809 	if (list_empty(&queue->head) || speq)
810 		return 0;
811 
812 	speq = arm_spe__alloc_queue(spe, queue_nr);
813 
814 	if (!speq)
815 		return -ENOMEM;
816 
817 	queue->priv = speq;
818 
819 	if (queue->cpu != -1)
820 		speq->cpu = queue->cpu;
821 
822 	if (!speq->on_heap) {
823 		int ret;
824 
825 		if (spe->timeless_decoding)
826 			return 0;
827 
828 retry:
829 		ret = arm_spe_decode(speq->decoder);
830 
831 		if (!ret)
832 			return 0;
833 
834 		if (ret < 0)
835 			goto retry;
836 
837 		record = &speq->decoder->record;
838 
839 		speq->timestamp = record->timestamp;
840 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
841 		if (ret)
842 			return ret;
843 		speq->on_heap = true;
844 	}
845 
846 	return 0;
847 }
848 
849 static int arm_spe__setup_queues(struct arm_spe *spe)
850 {
851 	unsigned int i;
852 	int ret;
853 
854 	for (i = 0; i < spe->queues.nr_queues; i++) {
855 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
856 		if (ret)
857 			return ret;
858 	}
859 
860 	return 0;
861 }
862 
863 static int arm_spe__update_queues(struct arm_spe *spe)
864 {
865 	if (spe->queues.new_data) {
866 		spe->queues.new_data = false;
867 		return arm_spe__setup_queues(spe);
868 	}
869 
870 	return 0;
871 }
872 
873 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
874 {
875 	struct evsel *evsel;
876 	struct evlist *evlist = spe->session->evlist;
877 	bool timeless_decoding = true;
878 
879 	/*
880 	 * Circle through the list of event and complain if we find one
881 	 * with the time bit set.
882 	 */
883 	evlist__for_each_entry(evlist, evsel) {
884 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
885 			timeless_decoding = false;
886 	}
887 
888 	return timeless_decoding;
889 }
890 
891 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
892 {
893 	unsigned int queue_nr;
894 	u64 ts;
895 	int ret;
896 
897 	while (1) {
898 		struct auxtrace_queue *queue;
899 		struct arm_spe_queue *speq;
900 
901 		if (!spe->heap.heap_cnt)
902 			return 0;
903 
904 		if (spe->heap.heap_array[0].ordinal >= timestamp)
905 			return 0;
906 
907 		queue_nr = spe->heap.heap_array[0].queue_nr;
908 		queue = &spe->queues.queue_array[queue_nr];
909 		speq = queue->priv;
910 
911 		auxtrace_heap__pop(&spe->heap);
912 
913 		if (spe->heap.heap_cnt) {
914 			ts = spe->heap.heap_array[0].ordinal + 1;
915 			if (ts > timestamp)
916 				ts = timestamp;
917 		} else {
918 			ts = timestamp;
919 		}
920 
921 		/*
922 		 * A previous context-switch event has set pid/tid in the machine's context, so
923 		 * here we need to update the pid/tid in the thread and SPE queue.
924 		 */
925 		if (!spe->use_ctx_pkt_for_pid)
926 			arm_spe_set_pid_tid_cpu(spe, queue);
927 
928 		ret = arm_spe_run_decoder(speq, &ts);
929 		if (ret < 0) {
930 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
931 			return ret;
932 		}
933 
934 		if (!ret) {
935 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
936 			if (ret < 0)
937 				return ret;
938 		} else {
939 			speq->on_heap = false;
940 		}
941 	}
942 
943 	return 0;
944 }
945 
946 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
947 					    u64 time_)
948 {
949 	struct auxtrace_queues *queues = &spe->queues;
950 	unsigned int i;
951 	u64 ts = 0;
952 
953 	for (i = 0; i < queues->nr_queues; i++) {
954 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
955 		struct arm_spe_queue *speq = queue->priv;
956 
957 		if (speq && (tid == -1 || speq->tid == tid)) {
958 			speq->time = time_;
959 			arm_spe_set_pid_tid_cpu(spe, queue);
960 			arm_spe_run_decoder(speq, &ts);
961 		}
962 	}
963 	return 0;
964 }
965 
966 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
967 				  struct perf_sample *sample)
968 {
969 	pid_t pid, tid;
970 	int cpu;
971 
972 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
973 		return 0;
974 
975 	pid = event->context_switch.next_prev_pid;
976 	tid = event->context_switch.next_prev_tid;
977 	cpu = sample->cpu;
978 
979 	if (tid == -1)
980 		pr_warning("context_switch event has no tid\n");
981 
982 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
983 }
984 
985 static int arm_spe_process_event(struct perf_session *session,
986 				 union perf_event *event,
987 				 struct perf_sample *sample,
988 				 const struct perf_tool *tool)
989 {
990 	int err = 0;
991 	u64 timestamp;
992 	struct arm_spe *spe = container_of(session->auxtrace,
993 			struct arm_spe, auxtrace);
994 
995 	if (dump_trace)
996 		return 0;
997 
998 	if (!tool->ordered_events) {
999 		pr_err("SPE trace requires ordered events\n");
1000 		return -EINVAL;
1001 	}
1002 
1003 	if (sample->time && (sample->time != (u64) -1))
1004 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1005 	else
1006 		timestamp = 0;
1007 
1008 	if (timestamp || spe->timeless_decoding) {
1009 		err = arm_spe__update_queues(spe);
1010 		if (err)
1011 			return err;
1012 	}
1013 
1014 	if (spe->timeless_decoding) {
1015 		if (event->header.type == PERF_RECORD_EXIT) {
1016 			err = arm_spe_process_timeless_queues(spe,
1017 					event->fork.tid,
1018 					sample->time);
1019 		}
1020 	} else if (timestamp) {
1021 		err = arm_spe_process_queues(spe, timestamp);
1022 		if (err)
1023 			return err;
1024 
1025 		if (!spe->use_ctx_pkt_for_pid &&
1026 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1027 		    event->header.type == PERF_RECORD_SWITCH))
1028 			err = arm_spe_context_switch(spe, event, sample);
1029 	}
1030 
1031 	return err;
1032 }
1033 
1034 static int arm_spe_process_auxtrace_event(struct perf_session *session,
1035 					  union perf_event *event,
1036 					  const struct perf_tool *tool __maybe_unused)
1037 {
1038 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1039 					     auxtrace);
1040 
1041 	if (!spe->data_queued) {
1042 		struct auxtrace_buffer *buffer;
1043 		off_t data_offset;
1044 		int fd = perf_data__fd(session->data);
1045 		int err;
1046 
1047 		if (perf_data__is_pipe(session->data)) {
1048 			data_offset = 0;
1049 		} else {
1050 			data_offset = lseek(fd, 0, SEEK_CUR);
1051 			if (data_offset == -1)
1052 				return -errno;
1053 		}
1054 
1055 		err = auxtrace_queues__add_event(&spe->queues, session, event,
1056 				data_offset, &buffer);
1057 		if (err)
1058 			return err;
1059 
1060 		/* Dump here now we have copied a piped trace out of the pipe */
1061 		if (dump_trace) {
1062 			if (auxtrace_buffer__get_data(buffer, fd)) {
1063 				arm_spe_dump_event(spe, buffer->data,
1064 						buffer->size);
1065 				auxtrace_buffer__put_data(buffer);
1066 			}
1067 		}
1068 	}
1069 
1070 	return 0;
1071 }
1072 
1073 static int arm_spe_flush(struct perf_session *session __maybe_unused,
1074 			 const struct perf_tool *tool __maybe_unused)
1075 {
1076 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1077 			auxtrace);
1078 	int ret;
1079 
1080 	if (dump_trace)
1081 		return 0;
1082 
1083 	if (!tool->ordered_events)
1084 		return -EINVAL;
1085 
1086 	ret = arm_spe__update_queues(spe);
1087 	if (ret < 0)
1088 		return ret;
1089 
1090 	if (spe->timeless_decoding)
1091 		return arm_spe_process_timeless_queues(spe, -1,
1092 				MAX_TIMESTAMP - 1);
1093 
1094 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1095 	if (ret)
1096 		return ret;
1097 
1098 	if (!spe->use_ctx_pkt_for_pid)
1099 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1100 			    "Matching of TIDs to SPE events could be inaccurate.\n");
1101 
1102 	return 0;
1103 }
1104 
1105 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1106 {
1107 	u64 *metadata;
1108 
1109 	metadata = zalloc(per_cpu_size);
1110 	if (!metadata)
1111 		return NULL;
1112 
1113 	memcpy(metadata, buf, per_cpu_size);
1114 	return metadata;
1115 }
1116 
1117 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1118 {
1119 	int i;
1120 
1121 	for (i = 0; i < nr_cpu; i++)
1122 		zfree(&metadata[i]);
1123 	free(metadata);
1124 }
1125 
1126 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1127 				     u64 *ver, int *nr_cpu)
1128 {
1129 	u64 *ptr = (u64 *)info->priv;
1130 	u64 metadata_size;
1131 	u64 **metadata = NULL;
1132 	int hdr_sz, per_cpu_sz, i;
1133 
1134 	metadata_size = info->header.size -
1135 		sizeof(struct perf_record_auxtrace_info);
1136 
1137 	/* Metadata version 1 */
1138 	if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1139 		*ver = 1;
1140 		*nr_cpu = 0;
1141 		/* No per CPU metadata */
1142 		return NULL;
1143 	}
1144 
1145 	*ver = ptr[ARM_SPE_HEADER_VERSION];
1146 	hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1147 	*nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1148 
1149 	metadata = calloc(*nr_cpu, sizeof(*metadata));
1150 	if (!metadata)
1151 		return NULL;
1152 
1153 	/* Locate the start address of per CPU metadata */
1154 	ptr += hdr_sz;
1155 	per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1156 
1157 	for (i = 0; i < *nr_cpu; i++) {
1158 		metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1159 		if (!metadata[i])
1160 			goto err_per_cpu_metadata;
1161 
1162 		ptr += per_cpu_sz / sizeof(u64);
1163 	}
1164 
1165 	return metadata;
1166 
1167 err_per_cpu_metadata:
1168 	arm_spe__free_metadata(metadata, *nr_cpu);
1169 	return NULL;
1170 }
1171 
1172 static void arm_spe_free_queue(void *priv)
1173 {
1174 	struct arm_spe_queue *speq = priv;
1175 
1176 	if (!speq)
1177 		return;
1178 	thread__zput(speq->thread);
1179 	arm_spe_decoder_free(speq->decoder);
1180 	zfree(&speq->event_buf);
1181 	free(speq);
1182 }
1183 
1184 static void arm_spe_free_events(struct perf_session *session)
1185 {
1186 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1187 					     auxtrace);
1188 	struct auxtrace_queues *queues = &spe->queues;
1189 	unsigned int i;
1190 
1191 	for (i = 0; i < queues->nr_queues; i++) {
1192 		arm_spe_free_queue(queues->queue_array[i].priv);
1193 		queues->queue_array[i].priv = NULL;
1194 	}
1195 	auxtrace_queues__free(queues);
1196 }
1197 
1198 static void arm_spe_free(struct perf_session *session)
1199 {
1200 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1201 					     auxtrace);
1202 
1203 	auxtrace_heap__free(&spe->heap);
1204 	arm_spe_free_events(session);
1205 	session->auxtrace = NULL;
1206 	arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1207 	free(spe);
1208 }
1209 
1210 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1211 				      struct evsel *evsel)
1212 {
1213 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1214 
1215 	return evsel->core.attr.type == spe->pmu_type;
1216 }
1217 
1218 static const char * const metadata_hdr_v1_fmts[] = {
1219 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           :%"PRId64"\n",
1220 	[ARM_SPE_PER_CPU_MMAPS]		= "  Per CPU mmaps      :%"PRId64"\n",
1221 };
1222 
1223 static const char * const metadata_hdr_fmts[] = {
1224 	[ARM_SPE_HEADER_VERSION]	= "  Header version     :%"PRId64"\n",
1225 	[ARM_SPE_HEADER_SIZE]		= "  Header size        :%"PRId64"\n",
1226 	[ARM_SPE_PMU_TYPE_V2]		= "  PMU type v2        :%"PRId64"\n",
1227 	[ARM_SPE_CPUS_NUM]		= "  CPU number         :%"PRId64"\n",
1228 };
1229 
1230 static const char * const metadata_per_cpu_fmts[] = {
1231 	[ARM_SPE_MAGIC]			= "    Magic            :0x%"PRIx64"\n",
1232 	[ARM_SPE_CPU]			= "    CPU #            :%"PRId64"\n",
1233 	[ARM_SPE_CPU_NR_PARAMS]		= "    Num of params    :%"PRId64"\n",
1234 	[ARM_SPE_CPU_MIDR]		= "    MIDR             :0x%"PRIx64"\n",
1235 	[ARM_SPE_CPU_PMU_TYPE]		= "    PMU Type         :%"PRId64"\n",
1236 	[ARM_SPE_CAP_MIN_IVAL]		= "    Min Interval     :%"PRId64"\n",
1237 };
1238 
1239 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
1240 {
1241 	unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1242 	const char * const *hdr_fmts;
1243 
1244 	if (!dump_trace)
1245 		return;
1246 
1247 	if (spe->metadata_ver == 1) {
1248 		cpu_num = 0;
1249 		hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1250 		hdr_fmts = metadata_hdr_v1_fmts;
1251 	} else {
1252 		cpu_num = arr[ARM_SPE_CPUS_NUM];
1253 		hdr_size = arr[ARM_SPE_HEADER_SIZE];
1254 		hdr_fmts = metadata_hdr_fmts;
1255 	}
1256 
1257 	for (i = 0; i < hdr_size; i++)
1258 		fprintf(stdout, hdr_fmts[i], arr[i]);
1259 
1260 	arr += hdr_size;
1261 	for (cpu = 0; cpu < cpu_num; cpu++) {
1262 		/*
1263 		 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1264 		 * are fixed. The sequential parameter size is decided by the
1265 		 * field 'ARM_SPE_CPU_NR_PARAMS'.
1266 		 */
1267 		cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1268 		for (i = 0; i < cpu_size; i++)
1269 			fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1270 		arr += cpu_size;
1271 	}
1272 }
1273 
1274 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1275 				    const char *name)
1276 {
1277 	struct evsel *evsel;
1278 
1279 	evlist__for_each_entry(evlist, evsel) {
1280 		if (evsel->core.id && evsel->core.id[0] == id) {
1281 			if (evsel->name)
1282 				zfree(&evsel->name);
1283 			evsel->name = strdup(name);
1284 			break;
1285 		}
1286 	}
1287 }
1288 
1289 static int
1290 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1291 {
1292 	struct evlist *evlist = session->evlist;
1293 	struct evsel *evsel;
1294 	struct perf_event_attr attr;
1295 	bool found = false;
1296 	u64 id;
1297 	int err;
1298 
1299 	evlist__for_each_entry(evlist, evsel) {
1300 		if (evsel->core.attr.type == spe->pmu_type) {
1301 			found = true;
1302 			break;
1303 		}
1304 	}
1305 
1306 	if (!found) {
1307 		pr_debug("No selected events with SPE trace data\n");
1308 		return 0;
1309 	}
1310 
1311 	memset(&attr, 0, sizeof(struct perf_event_attr));
1312 	attr.size = sizeof(struct perf_event_attr);
1313 	attr.type = PERF_TYPE_HARDWARE;
1314 	attr.sample_type = evsel->core.attr.sample_type &
1315 				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1316 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1317 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1318 			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1319 	if (spe->timeless_decoding)
1320 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1321 	else
1322 		attr.sample_type |= PERF_SAMPLE_TIME;
1323 
1324 	spe->sample_type = attr.sample_type;
1325 
1326 	attr.exclude_user = evsel->core.attr.exclude_user;
1327 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1328 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1329 	attr.exclude_host = evsel->core.attr.exclude_host;
1330 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1331 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1332 	attr.read_format = evsel->core.attr.read_format;
1333 
1334 	/* create new id val to be a fixed offset from evsel id */
1335 	id = evsel->core.id[0] + 1000000000;
1336 
1337 	if (!id)
1338 		id = 1;
1339 
1340 	if (spe->synth_opts.flc) {
1341 		spe->sample_flc = true;
1342 
1343 		/* Level 1 data cache miss */
1344 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1345 		if (err)
1346 			return err;
1347 		spe->l1d_miss_id = id;
1348 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1349 		id += 1;
1350 
1351 		/* Level 1 data cache access */
1352 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1353 		if (err)
1354 			return err;
1355 		spe->l1d_access_id = id;
1356 		arm_spe_set_event_name(evlist, id, "l1d-access");
1357 		id += 1;
1358 	}
1359 
1360 	if (spe->synth_opts.llc) {
1361 		spe->sample_llc = true;
1362 
1363 		/* Last level cache miss */
1364 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1365 		if (err)
1366 			return err;
1367 		spe->llc_miss_id = id;
1368 		arm_spe_set_event_name(evlist, id, "llc-miss");
1369 		id += 1;
1370 
1371 		/* Last level cache access */
1372 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1373 		if (err)
1374 			return err;
1375 		spe->llc_access_id = id;
1376 		arm_spe_set_event_name(evlist, id, "llc-access");
1377 		id += 1;
1378 	}
1379 
1380 	if (spe->synth_opts.tlb) {
1381 		spe->sample_tlb = true;
1382 
1383 		/* TLB miss */
1384 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1385 		if (err)
1386 			return err;
1387 		spe->tlb_miss_id = id;
1388 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1389 		id += 1;
1390 
1391 		/* TLB access */
1392 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1393 		if (err)
1394 			return err;
1395 		spe->tlb_access_id = id;
1396 		arm_spe_set_event_name(evlist, id, "tlb-access");
1397 		id += 1;
1398 	}
1399 
1400 	if (spe->synth_opts.branches) {
1401 		spe->sample_branch = true;
1402 
1403 		/* Branch */
1404 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1405 		if (err)
1406 			return err;
1407 		spe->branch_id = id;
1408 		arm_spe_set_event_name(evlist, id, "branch");
1409 		id += 1;
1410 	}
1411 
1412 	if (spe->synth_opts.remote_access) {
1413 		spe->sample_remote_access = true;
1414 
1415 		/* Remote access */
1416 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1417 		if (err)
1418 			return err;
1419 		spe->remote_access_id = id;
1420 		arm_spe_set_event_name(evlist, id, "remote-access");
1421 		id += 1;
1422 	}
1423 
1424 	if (spe->synth_opts.mem) {
1425 		spe->sample_memory = true;
1426 
1427 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1428 		if (err)
1429 			return err;
1430 		spe->memory_id = id;
1431 		arm_spe_set_event_name(evlist, id, "memory");
1432 		id += 1;
1433 	}
1434 
1435 	if (spe->synth_opts.instructions) {
1436 		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1437 			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1438 			goto synth_instructions_out;
1439 		}
1440 		if (spe->synth_opts.period > 1)
1441 			pr_warning("Arm SPE has a hardware-based sample period.\n"
1442 				   "Additional instruction events will be discarded by --itrace\n");
1443 
1444 		spe->sample_instructions = true;
1445 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1446 		attr.sample_period = spe->synth_opts.period;
1447 		spe->instructions_sample_period = attr.sample_period;
1448 		err = perf_session__deliver_synth_attr_event(session, &attr, id);
1449 		if (err)
1450 			return err;
1451 		spe->instructions_id = id;
1452 		arm_spe_set_event_name(evlist, id, "instructions");
1453 	}
1454 synth_instructions_out:
1455 
1456 	return 0;
1457 }
1458 
1459 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1460 {
1461 	u64 midr;
1462 	int i;
1463 
1464 	if (!nr_cpu)
1465 		return false;
1466 
1467 	for (i = 0; i < nr_cpu; i++) {
1468 		if (!metadata[i])
1469 			return false;
1470 
1471 		if (i == 0) {
1472 			midr = metadata[i][ARM_SPE_CPU_MIDR];
1473 			continue;
1474 		}
1475 
1476 		if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1477 			return false;
1478 	}
1479 
1480 	return true;
1481 }
1482 
1483 int arm_spe_process_auxtrace_info(union perf_event *event,
1484 				  struct perf_session *session)
1485 {
1486 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1487 	size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1488 	struct perf_record_time_conv *tc = &session->time_conv;
1489 	struct arm_spe *spe;
1490 	u64 **metadata = NULL;
1491 	u64 metadata_ver;
1492 	int nr_cpu, err;
1493 
1494 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1495 					min_sz)
1496 		return -EINVAL;
1497 
1498 	metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1499 					   &nr_cpu);
1500 	if (!metadata && metadata_ver != 1) {
1501 		pr_err("Failed to parse Arm SPE metadata.\n");
1502 		return -EINVAL;
1503 	}
1504 
1505 	spe = zalloc(sizeof(struct arm_spe));
1506 	if (!spe) {
1507 		err = -ENOMEM;
1508 		goto err_free_metadata;
1509 	}
1510 
1511 	err = auxtrace_queues__init(&spe->queues);
1512 	if (err)
1513 		goto err_free;
1514 
1515 	spe->session = session;
1516 	spe->machine = &session->machines.host; /* No kvm support */
1517 	spe->auxtrace_type = auxtrace_info->type;
1518 	if (metadata_ver == 1)
1519 		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1520 	else
1521 		spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1522 	spe->metadata = metadata;
1523 	spe->metadata_ver = metadata_ver;
1524 	spe->metadata_nr_cpu = nr_cpu;
1525 	spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1526 
1527 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1528 
1529 	/*
1530 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1531 	 * and the parameters for hardware clock are stored in the session
1532 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1533 	 * in "spe->tc", which is used for later conversion between clock
1534 	 * counter and timestamp.
1535 	 *
1536 	 * For backward compatibility, copies the fields starting from
1537 	 * "time_cycles" only if they are contained in the event.
1538 	 */
1539 	spe->tc.time_shift = tc->time_shift;
1540 	spe->tc.time_mult = tc->time_mult;
1541 	spe->tc.time_zero = tc->time_zero;
1542 
1543 	if (event_contains(*tc, time_cycles)) {
1544 		spe->tc.time_cycles = tc->time_cycles;
1545 		spe->tc.time_mask = tc->time_mask;
1546 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1547 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1548 	}
1549 
1550 	spe->auxtrace.process_event = arm_spe_process_event;
1551 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1552 	spe->auxtrace.flush_events = arm_spe_flush;
1553 	spe->auxtrace.free_events = arm_spe_free_events;
1554 	spe->auxtrace.free = arm_spe_free;
1555 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1556 	session->auxtrace = &spe->auxtrace;
1557 
1558 	arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1559 
1560 	if (dump_trace)
1561 		return 0;
1562 
1563 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1564 		spe->synth_opts = *session->itrace_synth_opts;
1565 	else
1566 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1567 
1568 	err = arm_spe_synth_events(spe, session);
1569 	if (err)
1570 		goto err_free_queues;
1571 
1572 	err = auxtrace_queues__process_index(&spe->queues, session);
1573 	if (err)
1574 		goto err_free_queues;
1575 
1576 	if (spe->queues.populated)
1577 		spe->data_queued = true;
1578 
1579 	return 0;
1580 
1581 err_free_queues:
1582 	auxtrace_queues__free(&spe->queues);
1583 	session->auxtrace = NULL;
1584 err_free:
1585 	free(spe);
1586 err_free_metadata:
1587 	arm_spe__free_metadata(metadata, nr_cpu);
1588 	return err;
1589 }
1590