xref: /linux/tools/perf/util/arm-spe.c (revision 6beeaf48db6c548fcfc2ad32739d33af2fef3a5b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Arm Statistical Profiling Extensions (SPE) support
4  * Copyright (c) 2017-2018, Arm Ltd.
5  */
6 
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18 
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32 
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36 
37 #define MAX_TIMESTAMP (~0ULL)
38 
39 struct arm_spe {
40 	struct auxtrace			auxtrace;
41 	struct auxtrace_queues		queues;
42 	struct auxtrace_heap		heap;
43 	struct itrace_synth_opts        synth_opts;
44 	u32				auxtrace_type;
45 	struct perf_session		*session;
46 	struct machine			*machine;
47 	u32				pmu_type;
48 
49 	struct perf_tsc_conversion	tc;
50 
51 	u8				timeless_decoding;
52 	u8				data_queued;
53 
54 	u8				sample_flc;
55 	u8				sample_llc;
56 	u8				sample_tlb;
57 	u8				sample_branch;
58 	u8				sample_remote_access;
59 	u8				sample_memory;
60 
61 	u64				l1d_miss_id;
62 	u64				l1d_access_id;
63 	u64				llc_miss_id;
64 	u64				llc_access_id;
65 	u64				tlb_miss_id;
66 	u64				tlb_access_id;
67 	u64				branch_miss_id;
68 	u64				remote_access_id;
69 	u64				memory_id;
70 
71 	u64				kernel_start;
72 
73 	unsigned long			num_events;
74 };
75 
76 struct arm_spe_queue {
77 	struct arm_spe			*spe;
78 	unsigned int			queue_nr;
79 	struct auxtrace_buffer		*buffer;
80 	struct auxtrace_buffer		*old_buffer;
81 	union perf_event		*event_buf;
82 	bool				on_heap;
83 	bool				done;
84 	pid_t				pid;
85 	pid_t				tid;
86 	int				cpu;
87 	struct arm_spe_decoder		*decoder;
88 	u64				time;
89 	u64				timestamp;
90 	struct thread			*thread;
91 };
92 
93 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
94 			 unsigned char *buf, size_t len)
95 {
96 	struct arm_spe_pkt packet;
97 	size_t pos = 0;
98 	int ret, pkt_len, i;
99 	char desc[ARM_SPE_PKT_DESC_MAX];
100 	const char *color = PERF_COLOR_BLUE;
101 
102 	color_fprintf(stdout, color,
103 		      ". ... ARM SPE data: size %zu bytes\n",
104 		      len);
105 
106 	while (len) {
107 		ret = arm_spe_get_packet(buf, len, &packet);
108 		if (ret > 0)
109 			pkt_len = ret;
110 		else
111 			pkt_len = 1;
112 		printf(".");
113 		color_fprintf(stdout, color, "  %08x: ", pos);
114 		for (i = 0; i < pkt_len; i++)
115 			color_fprintf(stdout, color, " %02x", buf[i]);
116 		for (; i < 16; i++)
117 			color_fprintf(stdout, color, "   ");
118 		if (ret > 0) {
119 			ret = arm_spe_pkt_desc(&packet, desc,
120 					       ARM_SPE_PKT_DESC_MAX);
121 			if (!ret)
122 				color_fprintf(stdout, color, " %s\n", desc);
123 		} else {
124 			color_fprintf(stdout, color, " Bad packet!\n");
125 		}
126 		pos += pkt_len;
127 		buf += pkt_len;
128 		len -= pkt_len;
129 	}
130 }
131 
132 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
133 			       size_t len)
134 {
135 	printf(".\n");
136 	arm_spe_dump(spe, buf, len);
137 }
138 
139 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
140 {
141 	struct arm_spe_queue *speq = data;
142 	struct auxtrace_buffer *buffer = speq->buffer;
143 	struct auxtrace_buffer *old_buffer = speq->old_buffer;
144 	struct auxtrace_queue *queue;
145 
146 	queue = &speq->spe->queues.queue_array[speq->queue_nr];
147 
148 	buffer = auxtrace_buffer__next(queue, buffer);
149 	/* If no more data, drop the previous auxtrace_buffer and return */
150 	if (!buffer) {
151 		if (old_buffer)
152 			auxtrace_buffer__drop_data(old_buffer);
153 		b->len = 0;
154 		return 0;
155 	}
156 
157 	speq->buffer = buffer;
158 
159 	/* If the aux_buffer doesn't have data associated, try to load it */
160 	if (!buffer->data) {
161 		/* get the file desc associated with the perf data file */
162 		int fd = perf_data__fd(speq->spe->session->data);
163 
164 		buffer->data = auxtrace_buffer__get_data(buffer, fd);
165 		if (!buffer->data)
166 			return -ENOMEM;
167 	}
168 
169 	b->len = buffer->size;
170 	b->buf = buffer->data;
171 
172 	if (b->len) {
173 		if (old_buffer)
174 			auxtrace_buffer__drop_data(old_buffer);
175 		speq->old_buffer = buffer;
176 	} else {
177 		auxtrace_buffer__drop_data(buffer);
178 		return arm_spe_get_trace(b, data);
179 	}
180 
181 	return 0;
182 }
183 
184 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
185 		unsigned int queue_nr)
186 {
187 	struct arm_spe_params params = { .get_trace = 0, };
188 	struct arm_spe_queue *speq;
189 
190 	speq = zalloc(sizeof(*speq));
191 	if (!speq)
192 		return NULL;
193 
194 	speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
195 	if (!speq->event_buf)
196 		goto out_free;
197 
198 	speq->spe = spe;
199 	speq->queue_nr = queue_nr;
200 	speq->pid = -1;
201 	speq->tid = -1;
202 	speq->cpu = -1;
203 
204 	/* params set */
205 	params.get_trace = arm_spe_get_trace;
206 	params.data = speq;
207 
208 	/* create new decoder */
209 	speq->decoder = arm_spe_decoder_new(&params);
210 	if (!speq->decoder)
211 		goto out_free;
212 
213 	return speq;
214 
215 out_free:
216 	zfree(&speq->event_buf);
217 	free(speq);
218 
219 	return NULL;
220 }
221 
222 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
223 {
224 	return ip >= spe->kernel_start ?
225 		PERF_RECORD_MISC_KERNEL :
226 		PERF_RECORD_MISC_USER;
227 }
228 
229 static void arm_spe_prep_sample(struct arm_spe *spe,
230 				struct arm_spe_queue *speq,
231 				union perf_event *event,
232 				struct perf_sample *sample)
233 {
234 	struct arm_spe_record *record = &speq->decoder->record;
235 
236 	if (!spe->timeless_decoding)
237 		sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
238 
239 	sample->ip = record->from_ip;
240 	sample->cpumode = arm_spe_cpumode(spe, sample->ip);
241 	sample->pid = speq->pid;
242 	sample->tid = speq->tid;
243 	sample->period = 1;
244 	sample->cpu = speq->cpu;
245 
246 	event->sample.header.type = PERF_RECORD_SAMPLE;
247 	event->sample.header.misc = sample->cpumode;
248 	event->sample.header.size = sizeof(struct perf_event_header);
249 }
250 
251 static inline int
252 arm_spe_deliver_synth_event(struct arm_spe *spe,
253 			    struct arm_spe_queue *speq __maybe_unused,
254 			    union perf_event *event,
255 			    struct perf_sample *sample)
256 {
257 	int ret;
258 
259 	ret = perf_session__deliver_synth_event(spe->session, event, sample);
260 	if (ret)
261 		pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
262 
263 	return ret;
264 }
265 
266 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
267 				     u64 spe_events_id, u64 data_src)
268 {
269 	struct arm_spe *spe = speq->spe;
270 	struct arm_spe_record *record = &speq->decoder->record;
271 	union perf_event *event = speq->event_buf;
272 	struct perf_sample sample = { .ip = 0, };
273 
274 	arm_spe_prep_sample(spe, speq, event, &sample);
275 
276 	sample.id = spe_events_id;
277 	sample.stream_id = spe_events_id;
278 	sample.addr = record->virt_addr;
279 	sample.phys_addr = record->phys_addr;
280 	sample.data_src = data_src;
281 
282 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
283 }
284 
285 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
286 					u64 spe_events_id)
287 {
288 	struct arm_spe *spe = speq->spe;
289 	struct arm_spe_record *record = &speq->decoder->record;
290 	union perf_event *event = speq->event_buf;
291 	struct perf_sample sample = { .ip = 0, };
292 
293 	arm_spe_prep_sample(spe, speq, event, &sample);
294 
295 	sample.id = spe_events_id;
296 	sample.stream_id = spe_events_id;
297 	sample.addr = record->to_ip;
298 
299 	return arm_spe_deliver_synth_event(spe, speq, event, &sample);
300 }
301 
302 #define SPE_MEM_TYPE	(ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
303 			 ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
304 			 ARM_SPE_REMOTE_ACCESS)
305 
306 static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
307 {
308 	if (type & SPE_MEM_TYPE)
309 		return true;
310 
311 	return false;
312 }
313 
314 static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
315 {
316 	union perf_mem_data_src	data_src = { 0 };
317 
318 	if (record->op == ARM_SPE_LD)
319 		data_src.mem_op = PERF_MEM_OP_LOAD;
320 	else
321 		data_src.mem_op = PERF_MEM_OP_STORE;
322 
323 	if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
324 		data_src.mem_lvl = PERF_MEM_LVL_L3;
325 
326 		if (record->type & ARM_SPE_LLC_MISS)
327 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
328 		else
329 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
330 	} else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
331 		data_src.mem_lvl = PERF_MEM_LVL_L1;
332 
333 		if (record->type & ARM_SPE_L1D_MISS)
334 			data_src.mem_lvl |= PERF_MEM_LVL_MISS;
335 		else
336 			data_src.mem_lvl |= PERF_MEM_LVL_HIT;
337 	}
338 
339 	if (record->type & ARM_SPE_REMOTE_ACCESS)
340 		data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
341 
342 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
343 		data_src.mem_dtlb = PERF_MEM_TLB_WK;
344 
345 		if (record->type & ARM_SPE_TLB_MISS)
346 			data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
347 		else
348 			data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
349 	}
350 
351 	return data_src.val;
352 }
353 
354 static int arm_spe_sample(struct arm_spe_queue *speq)
355 {
356 	const struct arm_spe_record *record = &speq->decoder->record;
357 	struct arm_spe *spe = speq->spe;
358 	u64 data_src;
359 	int err;
360 
361 	data_src = arm_spe__synth_data_source(record);
362 
363 	if (spe->sample_flc) {
364 		if (record->type & ARM_SPE_L1D_MISS) {
365 			err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
366 							data_src);
367 			if (err)
368 				return err;
369 		}
370 
371 		if (record->type & ARM_SPE_L1D_ACCESS) {
372 			err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
373 							data_src);
374 			if (err)
375 				return err;
376 		}
377 	}
378 
379 	if (spe->sample_llc) {
380 		if (record->type & ARM_SPE_LLC_MISS) {
381 			err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
382 							data_src);
383 			if (err)
384 				return err;
385 		}
386 
387 		if (record->type & ARM_SPE_LLC_ACCESS) {
388 			err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
389 							data_src);
390 			if (err)
391 				return err;
392 		}
393 	}
394 
395 	if (spe->sample_tlb) {
396 		if (record->type & ARM_SPE_TLB_MISS) {
397 			err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
398 							data_src);
399 			if (err)
400 				return err;
401 		}
402 
403 		if (record->type & ARM_SPE_TLB_ACCESS) {
404 			err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
405 							data_src);
406 			if (err)
407 				return err;
408 		}
409 	}
410 
411 	if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
412 		err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
413 		if (err)
414 			return err;
415 	}
416 
417 	if (spe->sample_remote_access &&
418 	    (record->type & ARM_SPE_REMOTE_ACCESS)) {
419 		err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
420 						data_src);
421 		if (err)
422 			return err;
423 	}
424 
425 	if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
426 		err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
427 		if (err)
428 			return err;
429 	}
430 
431 	return 0;
432 }
433 
434 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
435 {
436 	struct arm_spe *spe = speq->spe;
437 	struct arm_spe_record *record;
438 	int ret;
439 
440 	if (!spe->kernel_start)
441 		spe->kernel_start = machine__kernel_start(spe->machine);
442 
443 	while (1) {
444 		/*
445 		 * The usual logic is firstly to decode the packets, and then
446 		 * based the record to synthesize sample; but here the flow is
447 		 * reversed: it calls arm_spe_sample() for synthesizing samples
448 		 * prior to arm_spe_decode().
449 		 *
450 		 * Two reasons for this code logic:
451 		 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
452 		 * has decoded trace data and generated a record, but the record
453 		 * is left to generate sample until run to here, so it's correct
454 		 * to synthesize sample for the left record.
455 		 * 2. After decoding trace data, it needs to compare the record
456 		 * timestamp with the coming perf event, if the record timestamp
457 		 * is later than the perf event, it needs bail out and pushs the
458 		 * record into auxtrace heap, thus the record can be deferred to
459 		 * synthesize sample until run to here at the next time; so this
460 		 * can correlate samples between Arm SPE trace data and other
461 		 * perf events with correct time ordering.
462 		 */
463 		ret = arm_spe_sample(speq);
464 		if (ret)
465 			return ret;
466 
467 		ret = arm_spe_decode(speq->decoder);
468 		if (!ret) {
469 			pr_debug("No data or all data has been processed.\n");
470 			return 1;
471 		}
472 
473 		/*
474 		 * Error is detected when decode SPE trace data, continue to
475 		 * the next trace data and find out more records.
476 		 */
477 		if (ret < 0)
478 			continue;
479 
480 		record = &speq->decoder->record;
481 
482 		/* Update timestamp for the last record */
483 		if (record->timestamp > speq->timestamp)
484 			speq->timestamp = record->timestamp;
485 
486 		/*
487 		 * If the timestamp of the queue is later than timestamp of the
488 		 * coming perf event, bail out so can allow the perf event to
489 		 * be processed ahead.
490 		 */
491 		if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
492 			*timestamp = speq->timestamp;
493 			return 0;
494 		}
495 	}
496 
497 	return 0;
498 }
499 
500 static int arm_spe__setup_queue(struct arm_spe *spe,
501 			       struct auxtrace_queue *queue,
502 			       unsigned int queue_nr)
503 {
504 	struct arm_spe_queue *speq = queue->priv;
505 	struct arm_spe_record *record;
506 
507 	if (list_empty(&queue->head) || speq)
508 		return 0;
509 
510 	speq = arm_spe__alloc_queue(spe, queue_nr);
511 
512 	if (!speq)
513 		return -ENOMEM;
514 
515 	queue->priv = speq;
516 
517 	if (queue->cpu != -1)
518 		speq->cpu = queue->cpu;
519 
520 	if (!speq->on_heap) {
521 		int ret;
522 
523 		if (spe->timeless_decoding)
524 			return 0;
525 
526 retry:
527 		ret = arm_spe_decode(speq->decoder);
528 
529 		if (!ret)
530 			return 0;
531 
532 		if (ret < 0)
533 			goto retry;
534 
535 		record = &speq->decoder->record;
536 
537 		speq->timestamp = record->timestamp;
538 		ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
539 		if (ret)
540 			return ret;
541 		speq->on_heap = true;
542 	}
543 
544 	return 0;
545 }
546 
547 static int arm_spe__setup_queues(struct arm_spe *spe)
548 {
549 	unsigned int i;
550 	int ret;
551 
552 	for (i = 0; i < spe->queues.nr_queues; i++) {
553 		ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
554 		if (ret)
555 			return ret;
556 	}
557 
558 	return 0;
559 }
560 
561 static int arm_spe__update_queues(struct arm_spe *spe)
562 {
563 	if (spe->queues.new_data) {
564 		spe->queues.new_data = false;
565 		return arm_spe__setup_queues(spe);
566 	}
567 
568 	return 0;
569 }
570 
571 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
572 {
573 	struct evsel *evsel;
574 	struct evlist *evlist = spe->session->evlist;
575 	bool timeless_decoding = true;
576 
577 	/*
578 	 * Circle through the list of event and complain if we find one
579 	 * with the time bit set.
580 	 */
581 	evlist__for_each_entry(evlist, evsel) {
582 		if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
583 			timeless_decoding = false;
584 	}
585 
586 	return timeless_decoding;
587 }
588 
589 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
590 				    struct auxtrace_queue *queue)
591 {
592 	struct arm_spe_queue *speq = queue->priv;
593 	pid_t tid;
594 
595 	tid = machine__get_current_tid(spe->machine, speq->cpu);
596 	if (tid != -1) {
597 		speq->tid = tid;
598 		thread__zput(speq->thread);
599 	} else
600 		speq->tid = queue->tid;
601 
602 	if ((!speq->thread) && (speq->tid != -1)) {
603 		speq->thread = machine__find_thread(spe->machine, -1,
604 						    speq->tid);
605 	}
606 
607 	if (speq->thread) {
608 		speq->pid = speq->thread->pid_;
609 		if (queue->cpu == -1)
610 			speq->cpu = speq->thread->cpu;
611 	}
612 }
613 
614 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
615 {
616 	unsigned int queue_nr;
617 	u64 ts;
618 	int ret;
619 
620 	while (1) {
621 		struct auxtrace_queue *queue;
622 		struct arm_spe_queue *speq;
623 
624 		if (!spe->heap.heap_cnt)
625 			return 0;
626 
627 		if (spe->heap.heap_array[0].ordinal >= timestamp)
628 			return 0;
629 
630 		queue_nr = spe->heap.heap_array[0].queue_nr;
631 		queue = &spe->queues.queue_array[queue_nr];
632 		speq = queue->priv;
633 
634 		auxtrace_heap__pop(&spe->heap);
635 
636 		if (spe->heap.heap_cnt) {
637 			ts = spe->heap.heap_array[0].ordinal + 1;
638 			if (ts > timestamp)
639 				ts = timestamp;
640 		} else {
641 			ts = timestamp;
642 		}
643 
644 		arm_spe_set_pid_tid_cpu(spe, queue);
645 
646 		ret = arm_spe_run_decoder(speq, &ts);
647 		if (ret < 0) {
648 			auxtrace_heap__add(&spe->heap, queue_nr, ts);
649 			return ret;
650 		}
651 
652 		if (!ret) {
653 			ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
654 			if (ret < 0)
655 				return ret;
656 		} else {
657 			speq->on_heap = false;
658 		}
659 	}
660 
661 	return 0;
662 }
663 
664 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
665 					    u64 time_)
666 {
667 	struct auxtrace_queues *queues = &spe->queues;
668 	unsigned int i;
669 	u64 ts = 0;
670 
671 	for (i = 0; i < queues->nr_queues; i++) {
672 		struct auxtrace_queue *queue = &spe->queues.queue_array[i];
673 		struct arm_spe_queue *speq = queue->priv;
674 
675 		if (speq && (tid == -1 || speq->tid == tid)) {
676 			speq->time = time_;
677 			arm_spe_set_pid_tid_cpu(spe, queue);
678 			arm_spe_run_decoder(speq, &ts);
679 		}
680 	}
681 	return 0;
682 }
683 
684 static int arm_spe_process_event(struct perf_session *session,
685 				 union perf_event *event,
686 				 struct perf_sample *sample,
687 				 struct perf_tool *tool)
688 {
689 	int err = 0;
690 	u64 timestamp;
691 	struct arm_spe *spe = container_of(session->auxtrace,
692 			struct arm_spe, auxtrace);
693 
694 	if (dump_trace)
695 		return 0;
696 
697 	if (!tool->ordered_events) {
698 		pr_err("SPE trace requires ordered events\n");
699 		return -EINVAL;
700 	}
701 
702 	if (sample->time && (sample->time != (u64) -1))
703 		timestamp = perf_time_to_tsc(sample->time, &spe->tc);
704 	else
705 		timestamp = 0;
706 
707 	if (timestamp || spe->timeless_decoding) {
708 		err = arm_spe__update_queues(spe);
709 		if (err)
710 			return err;
711 	}
712 
713 	if (spe->timeless_decoding) {
714 		if (event->header.type == PERF_RECORD_EXIT) {
715 			err = arm_spe_process_timeless_queues(spe,
716 					event->fork.tid,
717 					sample->time);
718 		}
719 	} else if (timestamp) {
720 		err = arm_spe_process_queues(spe, timestamp);
721 	}
722 
723 	return err;
724 }
725 
726 static int arm_spe_process_auxtrace_event(struct perf_session *session,
727 					  union perf_event *event,
728 					  struct perf_tool *tool __maybe_unused)
729 {
730 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
731 					     auxtrace);
732 
733 	if (!spe->data_queued) {
734 		struct auxtrace_buffer *buffer;
735 		off_t data_offset;
736 		int fd = perf_data__fd(session->data);
737 		int err;
738 
739 		if (perf_data__is_pipe(session->data)) {
740 			data_offset = 0;
741 		} else {
742 			data_offset = lseek(fd, 0, SEEK_CUR);
743 			if (data_offset == -1)
744 				return -errno;
745 		}
746 
747 		err = auxtrace_queues__add_event(&spe->queues, session, event,
748 				data_offset, &buffer);
749 		if (err)
750 			return err;
751 
752 		/* Dump here now we have copied a piped trace out of the pipe */
753 		if (dump_trace) {
754 			if (auxtrace_buffer__get_data(buffer, fd)) {
755 				arm_spe_dump_event(spe, buffer->data,
756 						buffer->size);
757 				auxtrace_buffer__put_data(buffer);
758 			}
759 		}
760 	}
761 
762 	return 0;
763 }
764 
765 static int arm_spe_flush(struct perf_session *session __maybe_unused,
766 			 struct perf_tool *tool __maybe_unused)
767 {
768 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
769 			auxtrace);
770 	int ret;
771 
772 	if (dump_trace)
773 		return 0;
774 
775 	if (!tool->ordered_events)
776 		return -EINVAL;
777 
778 	ret = arm_spe__update_queues(spe);
779 	if (ret < 0)
780 		return ret;
781 
782 	if (spe->timeless_decoding)
783 		return arm_spe_process_timeless_queues(spe, -1,
784 				MAX_TIMESTAMP - 1);
785 
786 	return arm_spe_process_queues(spe, MAX_TIMESTAMP);
787 }
788 
789 static void arm_spe_free_queue(void *priv)
790 {
791 	struct arm_spe_queue *speq = priv;
792 
793 	if (!speq)
794 		return;
795 	thread__zput(speq->thread);
796 	arm_spe_decoder_free(speq->decoder);
797 	zfree(&speq->event_buf);
798 	free(speq);
799 }
800 
801 static void arm_spe_free_events(struct perf_session *session)
802 {
803 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
804 					     auxtrace);
805 	struct auxtrace_queues *queues = &spe->queues;
806 	unsigned int i;
807 
808 	for (i = 0; i < queues->nr_queues; i++) {
809 		arm_spe_free_queue(queues->queue_array[i].priv);
810 		queues->queue_array[i].priv = NULL;
811 	}
812 	auxtrace_queues__free(queues);
813 }
814 
815 static void arm_spe_free(struct perf_session *session)
816 {
817 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
818 					     auxtrace);
819 
820 	auxtrace_heap__free(&spe->heap);
821 	arm_spe_free_events(session);
822 	session->auxtrace = NULL;
823 	free(spe);
824 }
825 
826 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
827 				      struct evsel *evsel)
828 {
829 	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
830 
831 	return evsel->core.attr.type == spe->pmu_type;
832 }
833 
834 static const char * const arm_spe_info_fmts[] = {
835 	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n",
836 };
837 
838 static void arm_spe_print_info(__u64 *arr)
839 {
840 	if (!dump_trace)
841 		return;
842 
843 	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
844 }
845 
846 struct arm_spe_synth {
847 	struct perf_tool dummy_tool;
848 	struct perf_session *session;
849 };
850 
851 static int arm_spe_event_synth(struct perf_tool *tool,
852 			       union perf_event *event,
853 			       struct perf_sample *sample __maybe_unused,
854 			       struct machine *machine __maybe_unused)
855 {
856 	struct arm_spe_synth *arm_spe_synth =
857 		      container_of(tool, struct arm_spe_synth, dummy_tool);
858 
859 	return perf_session__deliver_synth_event(arm_spe_synth->session,
860 						 event, NULL);
861 }
862 
863 static int arm_spe_synth_event(struct perf_session *session,
864 			       struct perf_event_attr *attr, u64 id)
865 {
866 	struct arm_spe_synth arm_spe_synth;
867 
868 	memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
869 	arm_spe_synth.session = session;
870 
871 	return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
872 					   &id, arm_spe_event_synth);
873 }
874 
875 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
876 				    const char *name)
877 {
878 	struct evsel *evsel;
879 
880 	evlist__for_each_entry(evlist, evsel) {
881 		if (evsel->core.id && evsel->core.id[0] == id) {
882 			if (evsel->name)
883 				zfree(&evsel->name);
884 			evsel->name = strdup(name);
885 			break;
886 		}
887 	}
888 }
889 
890 static int
891 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
892 {
893 	struct evlist *evlist = session->evlist;
894 	struct evsel *evsel;
895 	struct perf_event_attr attr;
896 	bool found = false;
897 	u64 id;
898 	int err;
899 
900 	evlist__for_each_entry(evlist, evsel) {
901 		if (evsel->core.attr.type == spe->pmu_type) {
902 			found = true;
903 			break;
904 		}
905 	}
906 
907 	if (!found) {
908 		pr_debug("No selected events with SPE trace data\n");
909 		return 0;
910 	}
911 
912 	memset(&attr, 0, sizeof(struct perf_event_attr));
913 	attr.size = sizeof(struct perf_event_attr);
914 	attr.type = PERF_TYPE_HARDWARE;
915 	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
916 	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
917 			    PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
918 	if (spe->timeless_decoding)
919 		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
920 	else
921 		attr.sample_type |= PERF_SAMPLE_TIME;
922 
923 	attr.exclude_user = evsel->core.attr.exclude_user;
924 	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
925 	attr.exclude_hv = evsel->core.attr.exclude_hv;
926 	attr.exclude_host = evsel->core.attr.exclude_host;
927 	attr.exclude_guest = evsel->core.attr.exclude_guest;
928 	attr.sample_id_all = evsel->core.attr.sample_id_all;
929 	attr.read_format = evsel->core.attr.read_format;
930 
931 	/* create new id val to be a fixed offset from evsel id */
932 	id = evsel->core.id[0] + 1000000000;
933 
934 	if (!id)
935 		id = 1;
936 
937 	if (spe->synth_opts.flc) {
938 		spe->sample_flc = true;
939 
940 		/* Level 1 data cache miss */
941 		err = arm_spe_synth_event(session, &attr, id);
942 		if (err)
943 			return err;
944 		spe->l1d_miss_id = id;
945 		arm_spe_set_event_name(evlist, id, "l1d-miss");
946 		id += 1;
947 
948 		/* Level 1 data cache access */
949 		err = arm_spe_synth_event(session, &attr, id);
950 		if (err)
951 			return err;
952 		spe->l1d_access_id = id;
953 		arm_spe_set_event_name(evlist, id, "l1d-access");
954 		id += 1;
955 	}
956 
957 	if (spe->synth_opts.llc) {
958 		spe->sample_llc = true;
959 
960 		/* Last level cache miss */
961 		err = arm_spe_synth_event(session, &attr, id);
962 		if (err)
963 			return err;
964 		spe->llc_miss_id = id;
965 		arm_spe_set_event_name(evlist, id, "llc-miss");
966 		id += 1;
967 
968 		/* Last level cache access */
969 		err = arm_spe_synth_event(session, &attr, id);
970 		if (err)
971 			return err;
972 		spe->llc_access_id = id;
973 		arm_spe_set_event_name(evlist, id, "llc-access");
974 		id += 1;
975 	}
976 
977 	if (spe->synth_opts.tlb) {
978 		spe->sample_tlb = true;
979 
980 		/* TLB miss */
981 		err = arm_spe_synth_event(session, &attr, id);
982 		if (err)
983 			return err;
984 		spe->tlb_miss_id = id;
985 		arm_spe_set_event_name(evlist, id, "tlb-miss");
986 		id += 1;
987 
988 		/* TLB access */
989 		err = arm_spe_synth_event(session, &attr, id);
990 		if (err)
991 			return err;
992 		spe->tlb_access_id = id;
993 		arm_spe_set_event_name(evlist, id, "tlb-access");
994 		id += 1;
995 	}
996 
997 	if (spe->synth_opts.branches) {
998 		spe->sample_branch = true;
999 
1000 		/* Branch miss */
1001 		err = arm_spe_synth_event(session, &attr, id);
1002 		if (err)
1003 			return err;
1004 		spe->branch_miss_id = id;
1005 		arm_spe_set_event_name(evlist, id, "branch-miss");
1006 		id += 1;
1007 	}
1008 
1009 	if (spe->synth_opts.remote_access) {
1010 		spe->sample_remote_access = true;
1011 
1012 		/* Remote access */
1013 		err = arm_spe_synth_event(session, &attr, id);
1014 		if (err)
1015 			return err;
1016 		spe->remote_access_id = id;
1017 		arm_spe_set_event_name(evlist, id, "remote-access");
1018 		id += 1;
1019 	}
1020 
1021 	if (spe->synth_opts.mem) {
1022 		spe->sample_memory = true;
1023 
1024 		err = arm_spe_synth_event(session, &attr, id);
1025 		if (err)
1026 			return err;
1027 		spe->memory_id = id;
1028 		arm_spe_set_event_name(evlist, id, "memory");
1029 	}
1030 
1031 	return 0;
1032 }
1033 
1034 int arm_spe_process_auxtrace_info(union perf_event *event,
1035 				  struct perf_session *session)
1036 {
1037 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1038 	size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1039 	struct perf_record_time_conv *tc = &session->time_conv;
1040 	struct arm_spe *spe;
1041 	int err;
1042 
1043 	if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1044 					min_sz)
1045 		return -EINVAL;
1046 
1047 	spe = zalloc(sizeof(struct arm_spe));
1048 	if (!spe)
1049 		return -ENOMEM;
1050 
1051 	err = auxtrace_queues__init(&spe->queues);
1052 	if (err)
1053 		goto err_free;
1054 
1055 	spe->session = session;
1056 	spe->machine = &session->machines.host; /* No kvm support */
1057 	spe->auxtrace_type = auxtrace_info->type;
1058 	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1059 
1060 	spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1061 
1062 	/*
1063 	 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1064 	 * and the parameters for hardware clock are stored in the session
1065 	 * context.  Passes these parameters to the struct perf_tsc_conversion
1066 	 * in "spe->tc", which is used for later conversion between clock
1067 	 * counter and timestamp.
1068 	 *
1069 	 * For backward compatibility, copies the fields starting from
1070 	 * "time_cycles" only if they are contained in the event.
1071 	 */
1072 	spe->tc.time_shift = tc->time_shift;
1073 	spe->tc.time_mult = tc->time_mult;
1074 	spe->tc.time_zero = tc->time_zero;
1075 
1076 	if (event_contains(*tc, time_cycles)) {
1077 		spe->tc.time_cycles = tc->time_cycles;
1078 		spe->tc.time_mask = tc->time_mask;
1079 		spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1080 		spe->tc.cap_user_time_short = tc->cap_user_time_short;
1081 	}
1082 
1083 	spe->auxtrace.process_event = arm_spe_process_event;
1084 	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1085 	spe->auxtrace.flush_events = arm_spe_flush;
1086 	spe->auxtrace.free_events = arm_spe_free_events;
1087 	spe->auxtrace.free = arm_spe_free;
1088 	spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1089 	session->auxtrace = &spe->auxtrace;
1090 
1091 	arm_spe_print_info(&auxtrace_info->priv[0]);
1092 
1093 	if (dump_trace)
1094 		return 0;
1095 
1096 	if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1097 		spe->synth_opts = *session->itrace_synth_opts;
1098 	else
1099 		itrace_synth_opts__set_default(&spe->synth_opts, false);
1100 
1101 	err = arm_spe_synth_events(spe, session);
1102 	if (err)
1103 		goto err_free_queues;
1104 
1105 	err = auxtrace_queues__process_index(&spe->queues, session);
1106 	if (err)
1107 		goto err_free_queues;
1108 
1109 	if (spe->queues.populated)
1110 		spe->data_queued = true;
1111 
1112 	return 0;
1113 
1114 err_free_queues:
1115 	auxtrace_queues__free(&spe->queues);
1116 	session->auxtrace = NULL;
1117 err_free:
1118 	free(spe);
1119 	return err;
1120 }
1121