xref: /linux/tools/perf/util/arm-spe.c (revision e7d759f31ca295d589f7420719c311870bb3166f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #include "../../arch/arm64/include/asm/cputype.h"
38 #define MAX_TIMESTAMP (~0ULL)
39 
40 struct arm_spe {
41 	struct auxtrace			auxtrace;
42 	struct auxtrace_queues		queues;
43 	struct auxtrace_heap		heap;
44 	struct itrace_synth_opts        synth_opts;
45 	u32				auxtrace_type;
46 	struct perf_session		*session;
47 	struct machine			*machine;
48 	u32				pmu_type;
49 	u64				midr;
50 
51 	struct perf_tsc_conversion	tc;
52 
53 	u8				timeless_decoding;
54 	u8				data_queued;
55 
56 	u64				sample_type;
57 	u8				sample_flc;
58 	u8				sample_llc;
59 	u8				sample_tlb;
60 	u8				sample_branch;
61 	u8				sample_remote_access;
62 	u8				sample_memory;
63 	u8				sample_instructions;
64 	u64				instructions_sample_period;
65 
66 	u64				l1d_miss_id;
67 	u64				l1d_access_id;
68 	u64				llc_miss_id;
69 	u64				llc_access_id;
70 	u64				tlb_miss_id;
71 	u64				tlb_access_id;
72 	u64				branch_miss_id;
73 	u64				remote_access_id;
74 	u64				memory_id;
75 	u64				instructions_id;
76 
77 	u64				kernel_start;
78 
79 	unsigned long			num_events;
80 	u8				use_ctx_pkt_for_pid;
81 };
82 
83 struct arm_spe_queue {
84 	struct arm_spe			*spe;
85 	unsigned int			queue_nr;
86 	struct auxtrace_buffer		*buffer;
87 	struct auxtrace_buffer		*old_buffer;
88 	union perf_event		*event_buf;
89 	bool				on_heap;
90 	bool				done;
91 	pid_t				pid;
92 	pid_t				tid;
93 	int				cpu;
94 	struct arm_spe_decoder		*decoder;
95 	u64				time;
96 	u64				timestamp;
97 	struct thread			*thread;
98 	u64				period_instructions;
99 };
100 
101 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
102 			 unsigned char *buf, size_t len)
103 {
104 	struct arm_spe_pkt packet;
105 	size_t pos = 0;
106 	int ret, pkt_len, i;
107 	char desc[ARM_SPE_PKT_DESC_MAX];
108 	const char *color = PERF_COLOR_BLUE;
109 
110 	color_fprintf(stdout, color,
111 		      ". ... ARM SPE data: size %#zx bytes\n",
112 		      len);
113 
114 	while (len) {
115 		ret = arm_spe_get_packet(buf, len, &packet);
116 		if (ret > 0)
117 			pkt_len = ret;
118 		else
119 			pkt_len = 1;
120 		printf(".");
121 		color_fprintf(stdout, color, "  %08x: ", pos);
122 		for (i = 0; i < pkt_len; i++)
123 			color_fprintf(stdout, color, " %02x", buf[i]);
124 		for (; i < 16; i++)
125 			color_fprintf(stdout, color, "   ");
126 		if (ret > 0) {
127 			ret = arm_spe_pkt_desc(&packet, desc,
128 					       ARM_SPE_PKT_DESC_MAX);
129 			if (!ret)
130 				color_fprintf(stdout, color, " %s\n", desc);
131 		} else {
132 			color_fprintf(stdout, color, " Bad packet!\n");
133 		}
134 		pos += pkt_len;
135 		buf += pkt_len;
136 		len -= pkt_len;
137 	}
138 }
139 
140 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
141 			       size_t len)
142 {
143 	printf(".\n");
144 	arm_spe_dump(spe, buf, len);
145 }
146 
147 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
148 {
149 	struct arm_spe_queue *speq = data;
150 	struct auxtrace_buffer *buffer = speq->buffer;
151 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
152 	struct auxtrace_queue *queue;
153 
154 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
155 
156 	buffer = auxtrace_buffer__next(queue, buffer);
157 	/* If no more data, drop the previous auxtrace_buffer and return */
158 	if (!buffer) {
159 		if (old_buffer)
160 			auxtrace_buffer__drop_data(old_buffer);
161 		b->len = 0;
162 		return 0;
163 	}
164 
165 	speq->buffer = buffer;
166 
167 	/* If the aux_buffer doesn't have data associated, try to load it */
168 	if (!buffer->data) {
169 		/* get the file desc associated with the perf data file */
170 		int fd = perf_data__fd(speq->spe->session->data);
171 
172 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
173 		if (!buffer->data)
174 			return -ENOMEM;
175 	}
176 
177 	b->len = buffer->size;
178 	b->buf = buffer->data;
179 
180 	if (b->len) {
181 		if (old_buffer)
182 			auxtrace_buffer__drop_data(old_buffer);
183 		speq->old_buffer = buffer;
184 	} else {
185 		auxtrace_buffer__drop_data(buffer);
186 		return arm_spe_get_trace(b, data);
187 	}
188 
189 	return 0;
190 }
191 
192 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
193 		unsigned int queue_nr)
194 {
195 	struct arm_spe_params params = { .get_trace = 0, };
196 	struct arm_spe_queue *speq;
197 
198 	speq = zalloc(sizeof(*speq));
199 	if (!speq)
200 		return NULL;
201 
202 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
203 	if (!speq->event_buf)
204 		goto out_free;
205 
206 	speq->spe = spe;
207 	speq->queue_nr = queue_nr;
208 	speq->pid = -1;
209 	speq->tid = -1;
210 	speq->cpu = -1;
211 	speq->period_instructions = 0;
212 
213 	/* params set */
214 	params.get_trace = arm_spe_get_trace;
215 	params.data = speq;
216 
217 	/* create new decoder */
218 	speq->decoder = arm_spe_decoder_new(&params);
219 	if (!speq->decoder)
220 		goto out_free;
221 
222 	return speq;
223 
224 out_free:
225 	zfree(&speq->event_buf);
226 	free(speq);
227 
228 	return NULL;
229 }
230 
231 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
232 {
233 	return ip >= spe->kernel_start ?
234 		PERF_RECORD_MISC_KERNEL :
235 		PERF_RECORD_MISC_USER;
236 }
237 
238 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
239 				    struct auxtrace_queue *queue)
240 {
241 	struct arm_spe_queue *speq = queue->priv;
242 	pid_t tid;
243 
244 	tid = machine__get_current_tid(spe->machine, speq->cpu);
245 	if (tid != -1) {
246 		speq->tid = tid;
247 		thread__zput(speq->thread);
248 	} else
249 		speq->tid = queue->tid;
250 
251 	if ((!speq->thread) && (speq->tid != -1)) {
252 		speq->thread = machine__find_thread(spe->machine, -1,
253 						    speq->tid);
254 	}
255 
256 	if (speq->thread) {
257 		speq->pid = thread__pid(speq->thread);
258 		if (queue->cpu == -1)
259 			speq->cpu = thread__cpu(speq->thread);
260 	}
261 }
262 
263 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
264 {
265 	struct arm_spe *spe = speq->spe;
266 	int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
267 
268 	if (err)
269 		return err;
270 
271 	arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
272 
273 	return 0;
274 }
275 
276 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
277 {
278 	struct simd_flags simd_flags = {};
279 
280 	if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
281 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
282 
283 	if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
284 		simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
285 
286 	if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
287 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
288 
289 	if (record->type & ARM_SPE_SVE_EMPTY_PRED)
290 		simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
291 
292 	return simd_flags;
293 }
294 
295 static void arm_spe_prep_sample(struct arm_spe *spe,
296 				struct arm_spe_queue *speq,
297 				union perf_event *event,
298 				struct perf_sample *sample)
299 {
300 	struct arm_spe_record *record = &speq->decoder->record;
301 
302 	if (!spe->timeless_decoding)
303 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
304 
305 	sample->ip = record->from_ip;
306 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
307 	sample->pid = speq->pid;
308 	sample->tid = speq->tid;
309 	sample->period = 1;
310 	sample->cpu = speq->cpu;
311 	sample->simd_flags = arm_spe__synth_simd_flags(record);
312 
313 	event->sample.header.type = PERF_RECORD_SAMPLE;
314 	event->sample.header.misc = sample->cpumode;
315 	event->sample.header.size = sizeof(struct perf_event_header);
316 }
317 
318 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
319 {
320 	event->header.size = perf_event__sample_event_size(sample, type, 0);
321 	return perf_event__synthesize_sample(event, type, 0, sample);
322 }
323 
324 static inline int
325 arm_spe_deliver_synth_event(struct arm_spe *spe,
326 			    struct arm_spe_queue *speq __maybe_unused,
327 			    union perf_event *event,
328 			    struct perf_sample *sample)
329 {
330 	int ret;
331 
332 	if (spe->synth_opts.inject) {
333 		ret = arm_spe__inject_event(event, sample, spe->sample_type);
334 		if (ret)
335 			return ret;
336 	}
337 
338 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
339 	if (ret)
340 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
341 
342 	return ret;
343 }
344 
345 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
346 				     u64 spe_events_id, u64 data_src)
347 {
348 	struct arm_spe *spe = speq->spe;
349 	struct arm_spe_record *record = &speq->decoder->record;
350 	union perf_event *event = speq->event_buf;
351 	struct perf_sample sample = { .ip = 0, };
352 
353 	arm_spe_prep_sample(spe, speq, event, &sample);
354 
355 	sample.id = spe_events_id;
356 	sample.stream_id = spe_events_id;
357 	sample.addr = record->virt_addr;
358 	sample.phys_addr = record->phys_addr;
359 	sample.data_src = data_src;
360 	sample.weight = record->latency;
361 
362 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
363 }
364 
365 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
366 					u64 spe_events_id)
367 {
368 	struct arm_spe *spe = speq->spe;
369 	struct arm_spe_record *record = &speq->decoder->record;
370 	union perf_event *event = speq->event_buf;
371 	struct perf_sample sample = { .ip = 0, };
372 
373 	arm_spe_prep_sample(spe, speq, event, &sample);
374 
375 	sample.id = spe_events_id;
376 	sample.stream_id = spe_events_id;
377 	sample.addr = record->to_ip;
378 	sample.weight = record->latency;
379 
380 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
381 }
382 
383 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
384 					     u64 spe_events_id, u64 data_src)
385 {
386 	struct arm_spe *spe = speq->spe;
387 	struct arm_spe_record *record = &speq->decoder->record;
388 	union perf_event *event = speq->event_buf;
389 	struct perf_sample sample = { .ip = 0, };
390 
391 	/*
392 	 * Handles perf instruction sampling period.
393 	 */
394 	speq->period_instructions++;
395 	if (speq->period_instructions < spe->instructions_sample_period)
396 		return 0;
397 	speq->period_instructions = 0;
398 
399 	arm_spe_prep_sample(spe, speq, event, &sample);
400 
401 	sample.id = spe_events_id;
402 	sample.stream_id = spe_events_id;
403 	sample.addr = record->virt_addr;
404 	sample.phys_addr = record->phys_addr;
405 	sample.data_src = data_src;
406 	sample.period = spe->instructions_sample_period;
407 	sample.weight = record->latency;
408 
409 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
410 }
411 
412 static const struct midr_range neoverse_spe[] = {
413 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
414 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
415 	MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
416 	{},
417 };
418 
419 static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
420 						union perf_mem_data_src *data_src)
421 {
422 	/*
423 	 * Even though four levels of cache hierarchy are possible, no known
424 	 * production Neoverse systems currently include more than three levels
425 	 * so for the time being we assume three exist. If a production system
426 	 * is built with four the this function would have to be changed to
427 	 * detect the number of levels for reporting.
428 	 */
429 
430 	/*
431 	 * We have no data on the hit level or data source for stores in the
432 	 * Neoverse SPE records.
433 	 */
434 	if (record->op & ARM_SPE_OP_ST) {
435 		data_src->mem_lvl = PERF_MEM_LVL_NA;
436 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
437 		data_src->mem_snoop = PERF_MEM_SNOOP_NA;
438 		return;
439 	}
440 
441 	switch (record->source) {
442 	case ARM_SPE_NV_L1D:
443 		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
444 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
445 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
446 		break;
447 	case ARM_SPE_NV_L2:
448 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
449 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
450 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
451 		break;
452 	case ARM_SPE_NV_PEER_CORE:
453 		data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
454 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
455 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
456 		break;
457 	/*
458 	 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
459 	 * transfer, so set SNOOPX_PEER
460 	 */
461 	case ARM_SPE_NV_LOCAL_CLUSTER:
462 	case ARM_SPE_NV_PEER_CLUSTER:
463 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
464 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
465 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
466 		break;
467 	/*
468 	 * System cache is assumed to be L3
469 	 */
470 	case ARM_SPE_NV_SYS_CACHE:
471 		data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
472 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
473 		data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
474 		break;
475 	/*
476 	 * We don't know what level it hit in, except it came from the other
477 	 * socket
478 	 */
479 	case ARM_SPE_NV_REMOTE:
480 		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
481 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
482 		data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
483 		data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
484 		break;
485 	case ARM_SPE_NV_DRAM:
486 		data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
487 		data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
488 		data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
489 		break;
490 	default:
491 		break;
492 	}
493 }
494 
495 static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
496 					       union perf_mem_data_src *data_src)
497 {
498 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
499 		data_src->mem_lvl = PERF_MEM_LVL_L3;
500 
501 		if (record->type & ARM_SPE_LLC_MISS)
502 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
503 		else
504 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
505 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
506 		data_src->mem_lvl = PERF_MEM_LVL_L1;
507 
508 		if (record->type & ARM_SPE_L1D_MISS)
509 			data_src->mem_lvl |= PERF_MEM_LVL_MISS;
510 		else
511 			data_src->mem_lvl |= PERF_MEM_LVL_HIT;
512 	}
513 
514 	if (record->type & ARM_SPE_REMOTE_ACCESS)
515 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
516 }
517 
518 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
519 {
520 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
521 	bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
522 
523 	if (record->op & ARM_SPE_OP_LD)
524 		data_src.mem_op = PERF_MEM_OP_LOAD;
525 	else if (record->op & ARM_SPE_OP_ST)
526 		data_src.mem_op = PERF_MEM_OP_STORE;
527 	else
528 		return 0;
529 
530 	if (is_neoverse)
531 		arm_spe__synth_data_source_neoverse(record, &data_src);
532 	else
533 		arm_spe__synth_data_source_generic(record, &data_src);
534 
535 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
536 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
537 
538 		if (record->type & ARM_SPE_TLB_MISS)
539 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
540 		else
541 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
542 	}
543 
544 	return data_src.val;
545 }
546 
547 static int arm_spe_sample(struct arm_spe_queue *speq)
548 {
549 	const struct arm_spe_record *record = &speq->decoder->record;
550 	struct arm_spe *spe = speq->spe;
551 	u64 data_src;
552 	int err;
553 
554 	data_src = arm_spe__synth_data_source(record, spe->midr);
555 
556 	if (spe->sample_flc) {
557 		if (record->type & ARM_SPE_L1D_MISS) {
558 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
559 							data_src);
560 			if (err)
561 				return err;
562 		}
563 
564 		if (record->type & ARM_SPE_L1D_ACCESS) {
565 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
566 							data_src);
567 			if (err)
568 				return err;
569 		}
570 	}
571 
572 	if (spe->sample_llc) {
573 		if (record->type & ARM_SPE_LLC_MISS) {
574 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
575 							data_src);
576 			if (err)
577 				return err;
578 		}
579 
580 		if (record->type & ARM_SPE_LLC_ACCESS) {
581 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
582 							data_src);
583 			if (err)
584 				return err;
585 		}
586 	}
587 
588 	if (spe->sample_tlb) {
589 		if (record->type & ARM_SPE_TLB_MISS) {
590 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
591 							data_src);
592 			if (err)
593 				return err;
594 		}
595 
596 		if (record->type & ARM_SPE_TLB_ACCESS) {
597 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
598 							data_src);
599 			if (err)
600 				return err;
601 		}
602 	}
603 
604 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
605 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
606 		if (err)
607 			return err;
608 	}
609 
610 	if (spe->sample_remote_access &&
611 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
612 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
613 						data_src);
614 		if (err)
615 			return err;
616 	}
617 
618 	/*
619 	 * When data_src is zero it means the record is not a memory operation,
620 	 * skip to synthesize memory sample for this case.
621 	 */
622 	if (spe->sample_memory && data_src) {
623 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
624 		if (err)
625 			return err;
626 	}
627 
628 	if (spe->sample_instructions) {
629 		err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
630 		if (err)
631 			return err;
632 	}
633 
634 	return 0;
635 }
636 
637 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
638 {
639 	struct arm_spe *spe = speq->spe;
640 	struct arm_spe_record *record;
641 	int ret;
642 
643 	if (!spe->kernel_start)
644 		spe->kernel_start = machine__kernel_start(spe->machine);
645 
646 	while (1) {
647 		/*
648 		 * The usual logic is firstly to decode the packets, and then
649 		 * based the record to synthesize sample; but here the flow is
650 		 * reversed: it calls arm_spe_sample() for synthesizing samples
651 		 * prior to arm_spe_decode().
652 		 *
653 		 * Two reasons for this code logic:
654 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
655 		 * has decoded trace data and generated a record, but the record
656 		 * is left to generate sample until run to here, so it's correct
657 		 * to synthesize sample for the left record.
658 		 * 2. After decoding trace data, it needs to compare the record
659 		 * timestamp with the coming perf event, if the record timestamp
660 		 * is later than the perf event, it needs bail out and pushs the
661 		 * record into auxtrace heap, thus the record can be deferred to
662 		 * synthesize sample until run to here at the next time; so this
663 		 * can correlate samples between Arm SPE trace data and other
664 		 * perf events with correct time ordering.
665 		 */
666 
667 		/*
668 		 * Update pid/tid info.
669 		 */
670 		record = &speq->decoder->record;
671 		if (!spe->timeless_decoding && record->context_id != (u64)-1) {
672 			ret = arm_spe_set_tid(speq, record->context_id);
673 			if (ret)
674 				return ret;
675 
676 			spe->use_ctx_pkt_for_pid = true;
677 		}
678 
679 		ret = arm_spe_sample(speq);
680 		if (ret)
681 			return ret;
682 
683 		ret = arm_spe_decode(speq->decoder);
684 		if (!ret) {
685 			pr_debug("No data or all data has been processed.\n");
686 			return 1;
687 		}
688 
689 		/*
690 		 * Error is detected when decode SPE trace data, continue to
691 		 * the next trace data and find out more records.
692 		 */
693 		if (ret < 0)
694 			continue;
695 
696 		record = &speq->decoder->record;
697 
698 		/* Update timestamp for the last record */
699 		if (record->timestamp > speq->timestamp)
700 			speq->timestamp = record->timestamp;
701 
702 		/*
703 		 * If the timestamp of the queue is later than timestamp of the
704 		 * coming perf event, bail out so can allow the perf event to
705 		 * be processed ahead.
706 		 */
707 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
708 			*timestamp = speq->timestamp;
709 			return 0;
710 		}
711 	}
712 
713 	return 0;
714 }
715 
716 static int arm_spe__setup_queue(struct arm_spe *spe,
717 			       struct auxtrace_queue *queue,
718 			       unsigned int queue_nr)
719 {
720 	struct arm_spe_queue *speq = queue->priv;
721 	struct arm_spe_record *record;
722 
723 	if (list_empty(&queue->head) || speq)
724 		return 0;
725 
726 	speq = arm_spe__alloc_queue(spe, queue_nr);
727 
728 	if (!speq)
729 		return -ENOMEM;
730 
731 	queue->priv = speq;
732 
733 	if (queue->cpu != -1)
734 		speq->cpu = queue->cpu;
735 
736 	if (!speq->on_heap) {
737 		int ret;
738 
739 		if (spe->timeless_decoding)
740 			return 0;
741 
742 retry:
743 		ret = arm_spe_decode(speq->decoder);
744 
745 		if (!ret)
746 			return 0;
747 
748 		if (ret < 0)
749 			goto retry;
750 
751 		record = &speq->decoder->record;
752 
753 		speq->timestamp = record->timestamp;
754 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
755 		if (ret)
756 			return ret;
757 		speq->on_heap = true;
758 	}
759 
760 	return 0;
761 }
762 
763 static int arm_spe__setup_queues(struct arm_spe *spe)
764 {
765 	unsigned int i;
766 	int ret;
767 
768 	for (i = 0; i < spe->queues.nr_queues; i++) {
769 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
770 		if (ret)
771 			return ret;
772 	}
773 
774 	return 0;
775 }
776 
777 static int arm_spe__update_queues(struct arm_spe *spe)
778 {
779 	if (spe->queues.new_data) {
780 		spe->queues.new_data = false;
781 		return arm_spe__setup_queues(spe);
782 	}
783 
784 	return 0;
785 }
786 
787 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
788 {
789 	struct evsel *evsel;
790 	struct evlist *evlist = spe->session->evlist;
791 	bool timeless_decoding = true;
792 
793 	/*
794 	 * Circle through the list of event and complain if we find one
795 	 * with the time bit set.
796 	 */
797 	evlist__for_each_entry(evlist, evsel) {
798 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
799 			timeless_decoding = false;
800 	}
801 
802 	return timeless_decoding;
803 }
804 
805 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
806 {
807 	unsigned int queue_nr;
808 	u64 ts;
809 	int ret;
810 
811 	while (1) {
812 		struct auxtrace_queue *queue;
813 		struct arm_spe_queue *speq;
814 
815 		if (!spe->heap.heap_cnt)
816 			return 0;
817 
818 		if (spe->heap.heap_array[0].ordinal >= timestamp)
819 			return 0;
820 
821 		queue_nr = spe->heap.heap_array[0].queue_nr;
822 		queue = &spe->queues.queue_array[queue_nr];
823 		speq = queue->priv;
824 
825 		auxtrace_heap__pop(&spe->heap);
826 
827 		if (spe->heap.heap_cnt) {
828 			ts = spe->heap.heap_array[0].ordinal + 1;
829 			if (ts > timestamp)
830 				ts = timestamp;
831 		} else {
832 			ts = timestamp;
833 		}
834 
835 		/*
836 		 * A previous context-switch event has set pid/tid in the machine's context, so
837 		 * here we need to update the pid/tid in the thread and SPE queue.
838 		 */
839 		if (!spe->use_ctx_pkt_for_pid)
840 			arm_spe_set_pid_tid_cpu(spe, queue);
841 
842 		ret = arm_spe_run_decoder(speq, &ts);
843 		if (ret < 0) {
844 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
845 			return ret;
846 		}
847 
848 		if (!ret) {
849 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
850 			if (ret < 0)
851 				return ret;
852 		} else {
853 			speq->on_heap = false;
854 		}
855 	}
856 
857 	return 0;
858 }
859 
860 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
861 					    u64 time_)
862 {
863 	struct auxtrace_queues *queues = &spe->queues;
864 	unsigned int i;
865 	u64 ts = 0;
866 
867 	for (i = 0; i < queues->nr_queues; i++) {
868 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
869 		struct arm_spe_queue *speq = queue->priv;
870 
871 		if (speq && (tid == -1 || speq->tid == tid)) {
872 			speq->time = time_;
873 			arm_spe_set_pid_tid_cpu(spe, queue);
874 			arm_spe_run_decoder(speq, &ts);
875 		}
876 	}
877 	return 0;
878 }
879 
880 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
881 				  struct perf_sample *sample)
882 {
883 	pid_t pid, tid;
884 	int cpu;
885 
886 	if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
887 		return 0;
888 
889 	pid = event->context_switch.next_prev_pid;
890 	tid = event->context_switch.next_prev_tid;
891 	cpu = sample->cpu;
892 
893 	if (tid == -1)
894 		pr_warning("context_switch event has no tid\n");
895 
896 	return machine__set_current_tid(spe->machine, cpu, pid, tid);
897 }
898 
899 static int arm_spe_process_event(struct perf_session *session,
900 				 union perf_event *event,
901 				 struct perf_sample *sample,
902 				 struct perf_tool *tool)
903 {
904 	int err = 0;
905 	u64 timestamp;
906 	struct arm_spe *spe = container_of(session->auxtrace,
907 			struct arm_spe, auxtrace);
908 
909 	if (dump_trace)
910 		return 0;
911 
912 	if (!tool->ordered_events) {
913 		pr_err("SPE trace requires ordered events\n");
914 		return -EINVAL;
915 	}
916 
917 	if (sample->time && (sample->time != (u64) -1))
918 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
919 	else
920 		timestamp = 0;
921 
922 	if (timestamp || spe->timeless_decoding) {
923 		err = arm_spe__update_queues(spe);
924 		if (err)
925 			return err;
926 	}
927 
928 	if (spe->timeless_decoding) {
929 		if (event->header.type == PERF_RECORD_EXIT) {
930 			err = arm_spe_process_timeless_queues(spe,
931 					event->fork.tid,
932 					sample->time);
933 		}
934 	} else if (timestamp) {
935 		err = arm_spe_process_queues(spe, timestamp);
936 		if (err)
937 			return err;
938 
939 		if (!spe->use_ctx_pkt_for_pid &&
940 		    (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
941 		    event->header.type == PERF_RECORD_SWITCH))
942 			err = arm_spe_context_switch(spe, event, sample);
943 	}
944 
945 	return err;
946 }
947 
948 static int arm_spe_process_auxtrace_event(struct perf_session *session,
949 					  union perf_event *event,
950 					  struct perf_tool *tool __maybe_unused)
951 {
952 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
953 					     auxtrace);
954 
955 	if (!spe->data_queued) {
956 		struct auxtrace_buffer *buffer;
957 		off_t data_offset;
958 		int fd = perf_data__fd(session->data);
959 		int err;
960 
961 		if (perf_data__is_pipe(session->data)) {
962 			data_offset = 0;
963 		} else {
964 			data_offset = lseek(fd, 0, SEEK_CUR);
965 			if (data_offset == -1)
966 				return -errno;
967 		}
968 
969 		err = auxtrace_queues__add_event(&spe->queues, session, event,
970 				data_offset, &buffer);
971 		if (err)
972 			return err;
973 
974 		/* Dump here now we have copied a piped trace out of the pipe */
975 		if (dump_trace) {
976 			if (auxtrace_buffer__get_data(buffer, fd)) {
977 				arm_spe_dump_event(spe, buffer->data,
978 						buffer->size);
979 				auxtrace_buffer__put_data(buffer);
980 			}
981 		}
982 	}
983 
984 	return 0;
985 }
986 
987 static int arm_spe_flush(struct perf_session *session __maybe_unused,
988 			 struct perf_tool *tool __maybe_unused)
989 {
990 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
991 			auxtrace);
992 	int ret;
993 
994 	if (dump_trace)
995 		return 0;
996 
997 	if (!tool->ordered_events)
998 		return -EINVAL;
999 
1000 	ret = arm_spe__update_queues(spe);
1001 	if (ret < 0)
1002 		return ret;
1003 
1004 	if (spe->timeless_decoding)
1005 		return arm_spe_process_timeless_queues(spe, -1,
1006 				MAX_TIMESTAMP - 1);
1007 
1008 	ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1009 	if (ret)
1010 		return ret;
1011 
1012 	if (!spe->use_ctx_pkt_for_pid)
1013 		ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1014 			    "Matching of TIDs to SPE events could be inaccurate.\n");
1015 
1016 	return 0;
1017 }
1018 
1019 static void arm_spe_free_queue(void *priv)
1020 {
1021 	struct arm_spe_queue *speq = priv;
1022 
1023 	if (!speq)
1024 		return;
1025 	thread__zput(speq->thread);
1026 	arm_spe_decoder_free(speq->decoder);
1027 	zfree(&speq->event_buf);
1028 	free(speq);
1029 }
1030 
1031 static void arm_spe_free_events(struct perf_session *session)
1032 {
1033 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1034 					     auxtrace);
1035 	struct auxtrace_queues *queues = &spe->queues;
1036 	unsigned int i;
1037 
1038 	for (i = 0; i < queues->nr_queues; i++) {
1039 		arm_spe_free_queue(queues->queue_array[i].priv);
1040 		queues->queue_array[i].priv = NULL;
1041 	}
1042 	auxtrace_queues__free(queues);
1043 }
1044 
1045 static void arm_spe_free(struct perf_session *session)
1046 {
1047 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1048 					     auxtrace);
1049 
1050 	auxtrace_heap__free(&spe->heap);
1051 	arm_spe_free_events(session);
1052 	session->auxtrace = NULL;
1053 	free(spe);
1054 }
1055 
1056 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1057 				      struct evsel *evsel)
1058 {
1059 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1060 
1061 	return evsel->core.attr.type == spe->pmu_type;
1062 }
1063 
1064 static const char * const arm_spe_info_fmts[] = {
1065 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
1066 };
1067 
1068 static void arm_spe_print_info(__u64 *arr)
1069 {
1070 	if (!dump_trace)
1071 		return;
1072 
1073 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
1074 }
1075 
1076 struct arm_spe_synth {
1077 	struct perf_tool dummy_tool;
1078 	struct perf_session *session;
1079 };
1080 
1081 static int arm_spe_event_synth(struct perf_tool *tool,
1082 			       union perf_event *event,
1083 			       struct perf_sample *sample __maybe_unused,
1084 			       struct machine *machine __maybe_unused)
1085 {
1086 	struct arm_spe_synth *arm_spe_synth =
1087 		      container_of(tool, struct arm_spe_synth, dummy_tool);
1088 
1089 	return perf_session__deliver_synth_event(arm_spe_synth->session,
1090 						 event, NULL);
1091 }
1092 
1093 static int arm_spe_synth_event(struct perf_session *session,
1094 			       struct perf_event_attr *attr, u64 id)
1095 {
1096 	struct arm_spe_synth arm_spe_synth;
1097 
1098 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
1099 	arm_spe_synth.session = session;
1100 
1101 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
1102 					   &id, arm_spe_event_synth);
1103 }
1104 
1105 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1106 				    const char *name)
1107 {
1108 	struct evsel *evsel;
1109 
1110 	evlist__for_each_entry(evlist, evsel) {
1111 		if (evsel->core.id && evsel->core.id[0] == id) {
1112 			if (evsel->name)
1113 				zfree(&evsel->name);
1114 			evsel->name = strdup(name);
1115 			break;
1116 		}
1117 	}
1118 }
1119 
1120 static int
1121 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1122 {
1123 	struct evlist *evlist = session->evlist;
1124 	struct evsel *evsel;
1125 	struct perf_event_attr attr;
1126 	bool found = false;
1127 	u64 id;
1128 	int err;
1129 
1130 	evlist__for_each_entry(evlist, evsel) {
1131 		if (evsel->core.attr.type == spe->pmu_type) {
1132 			found = true;
1133 			break;
1134 		}
1135 	}
1136 
1137 	if (!found) {
1138 		pr_debug("No selected events with SPE trace data\n");
1139 		return 0;
1140 	}
1141 
1142 	memset(&attr, 0, sizeof(struct perf_event_attr));
1143 	attr.size = sizeof(struct perf_event_attr);
1144 	attr.type = PERF_TYPE_HARDWARE;
1145 	attr.sample_type = evsel->core.attr.sample_type &
1146 				(PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1147 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1148 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1149 			    PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1150 	if (spe->timeless_decoding)
1151 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1152 	else
1153 		attr.sample_type |= PERF_SAMPLE_TIME;
1154 
1155 	spe->sample_type = attr.sample_type;
1156 
1157 	attr.exclude_user = evsel->core.attr.exclude_user;
1158 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1159 	attr.exclude_hv = evsel->core.attr.exclude_hv;
1160 	attr.exclude_host = evsel->core.attr.exclude_host;
1161 	attr.exclude_guest = evsel->core.attr.exclude_guest;
1162 	attr.sample_id_all = evsel->core.attr.sample_id_all;
1163 	attr.read_format = evsel->core.attr.read_format;
1164 
1165 	/* create new id val to be a fixed offset from evsel id */
1166 	id = evsel->core.id[0] + 1000000000;
1167 
1168 	if (!id)
1169 		id = 1;
1170 
1171 	if (spe->synth_opts.flc) {
1172 		spe->sample_flc = true;
1173 
1174 		/* Level 1 data cache miss */
1175 		err = arm_spe_synth_event(session, &attr, id);
1176 		if (err)
1177 			return err;
1178 		spe->l1d_miss_id = id;
1179 		arm_spe_set_event_name(evlist, id, "l1d-miss");
1180 		id += 1;
1181 
1182 		/* Level 1 data cache access */
1183 		err = arm_spe_synth_event(session, &attr, id);
1184 		if (err)
1185 			return err;
1186 		spe->l1d_access_id = id;
1187 		arm_spe_set_event_name(evlist, id, "l1d-access");
1188 		id += 1;
1189 	}
1190 
1191 	if (spe->synth_opts.llc) {
1192 		spe->sample_llc = true;
1193 
1194 		/* Last level cache miss */
1195 		err = arm_spe_synth_event(session, &attr, id);
1196 		if (err)
1197 			return err;
1198 		spe->llc_miss_id = id;
1199 		arm_spe_set_event_name(evlist, id, "llc-miss");
1200 		id += 1;
1201 
1202 		/* Last level cache access */
1203 		err = arm_spe_synth_event(session, &attr, id);
1204 		if (err)
1205 			return err;
1206 		spe->llc_access_id = id;
1207 		arm_spe_set_event_name(evlist, id, "llc-access");
1208 		id += 1;
1209 	}
1210 
1211 	if (spe->synth_opts.tlb) {
1212 		spe->sample_tlb = true;
1213 
1214 		/* TLB miss */
1215 		err = arm_spe_synth_event(session, &attr, id);
1216 		if (err)
1217 			return err;
1218 		spe->tlb_miss_id = id;
1219 		arm_spe_set_event_name(evlist, id, "tlb-miss");
1220 		id += 1;
1221 
1222 		/* TLB access */
1223 		err = arm_spe_synth_event(session, &attr, id);
1224 		if (err)
1225 			return err;
1226 		spe->tlb_access_id = id;
1227 		arm_spe_set_event_name(evlist, id, "tlb-access");
1228 		id += 1;
1229 	}
1230 
1231 	if (spe->synth_opts.branches) {
1232 		spe->sample_branch = true;
1233 
1234 		/* Branch miss */
1235 		err = arm_spe_synth_event(session, &attr, id);
1236 		if (err)
1237 			return err;
1238 		spe->branch_miss_id = id;
1239 		arm_spe_set_event_name(evlist, id, "branch-miss");
1240 		id += 1;
1241 	}
1242 
1243 	if (spe->synth_opts.remote_access) {
1244 		spe->sample_remote_access = true;
1245 
1246 		/* Remote access */
1247 		err = arm_spe_synth_event(session, &attr, id);
1248 		if (err)
1249 			return err;
1250 		spe->remote_access_id = id;
1251 		arm_spe_set_event_name(evlist, id, "remote-access");
1252 		id += 1;
1253 	}
1254 
1255 	if (spe->synth_opts.mem) {
1256 		spe->sample_memory = true;
1257 
1258 		err = arm_spe_synth_event(session, &attr, id);
1259 		if (err)
1260 			return err;
1261 		spe->memory_id = id;
1262 		arm_spe_set_event_name(evlist, id, "memory");
1263 		id += 1;
1264 	}
1265 
1266 	if (spe->synth_opts.instructions) {
1267 		if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1268 			pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1269 			goto synth_instructions_out;
1270 		}
1271 		if (spe->synth_opts.period > 1)
1272 			pr_warning("Arm SPE has a hardware-based sample period.\n"
1273 				   "Additional instruction events will be discarded by --itrace\n");
1274 
1275 		spe->sample_instructions = true;
1276 		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1277 		attr.sample_period = spe->synth_opts.period;
1278 		spe->instructions_sample_period = attr.sample_period;
1279 		err = arm_spe_synth_event(session, &attr, id);
1280 		if (err)
1281 			return err;
1282 		spe->instructions_id = id;
1283 		arm_spe_set_event_name(evlist, id, "instructions");
1284 	}
1285 synth_instructions_out:
1286 
1287 	return 0;
1288 }
1289 
1290 int arm_spe_process_auxtrace_info(union perf_event *event,
1291 				  struct perf_session *session)
1292 {
1293 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1294 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1295 	struct perf_record_time_conv *tc = &session->time_conv;
1296 	const char *cpuid = perf_env__cpuid(session->evlist->env);
1297 	u64 midr = strtol(cpuid, NULL, 16);
1298 	struct arm_spe *spe;
1299 	int err;
1300 
1301 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1302 					min_sz)
1303 		return -EINVAL;
1304 
1305 	spe = zalloc(sizeof(struct arm_spe));
1306 	if (!spe)
1307 		return -ENOMEM;
1308 
1309 	err = auxtrace_queues__init(&spe->queues);
1310 	if (err)
1311 		goto err_free;
1312 
1313 	spe->session = session;
1314 	spe->machine = &session->machines.host; /* No kvm support */
1315 	spe->auxtrace_type = auxtrace_info->type;
1316 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1317 	spe->midr = midr;
1318 
1319 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1320 
1321 	/*
1322 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1323 	 * and the parameters for hardware clock are stored in the session
1324 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1325 	 * in "spe->tc", which is used for later conversion between clock
1326 	 * counter and timestamp.
1327 	 *
1328 	 * For backward compatibility, copies the fields starting from
1329 	 * "time_cycles" only if they are contained in the event.
1330 	 */
1331 	spe->tc.time_shift = tc->time_shift;
1332 	spe->tc.time_mult = tc->time_mult;
1333 	spe->tc.time_zero = tc->time_zero;
1334 
1335 	if (event_contains(*tc, time_cycles)) {
1336 		spe->tc.time_cycles = tc->time_cycles;
1337 		spe->tc.time_mask = tc->time_mask;
1338 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1339 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1340 	}
1341 
1342 	spe->auxtrace.process_event = arm_spe_process_event;
1343 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1344 	spe->auxtrace.flush_events = arm_spe_flush;
1345 	spe->auxtrace.free_events = arm_spe_free_events;
1346 	spe->auxtrace.free = arm_spe_free;
1347 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1348 	session->auxtrace = &spe->auxtrace;
1349 
1350 	arm_spe_print_info(&auxtrace_info->priv[0]);
1351 
1352 	if (dump_trace)
1353 		return 0;
1354 
1355 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1356 		spe->synth_opts = *session->itrace_synth_opts;
1357 	else
1358 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1359 
1360 	err = arm_spe_synth_events(spe, session);
1361 	if (err)
1362 		goto err_free_queues;
1363 
1364 	err = auxtrace_queues__process_index(&spe->queues, session);
1365 	if (err)
1366 		goto err_free_queues;
1367 
1368 	if (spe->queues.populated)
1369 		spe->data_queued = true;
1370 
1371 	return 0;
1372 
1373 err_free_queues:
1374 	auxtrace_queues__free(&spe->queues);
1375 	session->auxtrace = NULL;
1376 err_free:
1377 	free(spe);
1378 	return err;
1379 }
1380