1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Arm Statistical Profiling Extensions (SPE) support
4 * Copyright (c) 2017-2018, Arm Ltd.
5 */
6
7 #include <byteswap.h>
8 #include <endian.h>
9 #include <errno.h>
10 #include <inttypes.h>
11 #include <linux/bitops.h>
12 #include <linux/kernel.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "debug.h"
22 #include "evlist.h"
23 #include "evsel.h"
24 #include "machine.h"
25 #include "session.h"
26 #include "symbol.h"
27 #include "thread.h"
28 #include "thread-stack.h"
29 #include "tsc.h"
30 #include "tool.h"
31 #include "util/synthetic-events.h"
32
33 #include "arm-spe.h"
34 #include "arm-spe-decoder/arm-spe-decoder.h"
35 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
36
37 #include "../../arch/arm64/include/asm/cputype.h"
38 #define MAX_TIMESTAMP (~0ULL)
39
40 #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
41
42 struct arm_spe {
43 struct auxtrace auxtrace;
44 struct auxtrace_queues queues;
45 struct auxtrace_heap heap;
46 struct itrace_synth_opts synth_opts;
47 u32 auxtrace_type;
48 struct perf_session *session;
49 struct machine *machine;
50 u32 pmu_type;
51
52 struct perf_tsc_conversion tc;
53
54 u8 timeless_decoding;
55 u8 data_queued;
56
57 u64 sample_type;
58 u8 sample_flc;
59 u8 sample_llc;
60 u8 sample_tlb;
61 u8 sample_branch;
62 u8 sample_remote_access;
63 u8 sample_memory;
64 u8 sample_instructions;
65 u64 instructions_sample_period;
66
67 u64 l1d_miss_id;
68 u64 l1d_access_id;
69 u64 llc_miss_id;
70 u64 llc_access_id;
71 u64 tlb_miss_id;
72 u64 tlb_access_id;
73 u64 branch_id;
74 u64 remote_access_id;
75 u64 memory_id;
76 u64 instructions_id;
77
78 u64 kernel_start;
79
80 unsigned long num_events;
81 u8 use_ctx_pkt_for_pid;
82
83 u64 **metadata;
84 u64 metadata_ver;
85 u64 metadata_nr_cpu;
86 bool is_homogeneous;
87 };
88
89 struct arm_spe_queue {
90 struct arm_spe *spe;
91 unsigned int queue_nr;
92 struct auxtrace_buffer *buffer;
93 struct auxtrace_buffer *old_buffer;
94 union perf_event *event_buf;
95 bool on_heap;
96 bool done;
97 pid_t pid;
98 pid_t tid;
99 int cpu;
100 struct arm_spe_decoder *decoder;
101 u64 time;
102 u64 timestamp;
103 struct thread *thread;
104 u64 period_instructions;
105 u32 flags;
106 struct branch_stack *last_branch;
107 };
108
109 struct data_source_handle {
110 const struct midr_range *midr_ranges;
111 void (*ds_synth)(const struct arm_spe_record *record,
112 union perf_mem_data_src *data_src);
113 };
114
115 #define DS(range, func) \
116 { \
117 .midr_ranges = range, \
118 .ds_synth = arm_spe__synth_##func, \
119 }
120
arm_spe_dump(struct arm_spe * spe __maybe_unused,unsigned char * buf,size_t len)121 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
122 unsigned char *buf, size_t len)
123 {
124 struct arm_spe_pkt packet;
125 size_t pos = 0;
126 int ret, pkt_len, i;
127 char desc[ARM_SPE_PKT_DESC_MAX];
128 const char *color = PERF_COLOR_BLUE;
129
130 color_fprintf(stdout, color,
131 ". ... ARM SPE data: size %#zx bytes\n",
132 len);
133
134 while (len) {
135 ret = arm_spe_get_packet(buf, len, &packet);
136 if (ret > 0)
137 pkt_len = ret;
138 else
139 pkt_len = 1;
140 printf(".");
141 color_fprintf(stdout, color, " %08zx: ", pos);
142 for (i = 0; i < pkt_len; i++)
143 color_fprintf(stdout, color, " %02x", buf[i]);
144 for (; i < 16; i++)
145 color_fprintf(stdout, color, " ");
146 if (ret > 0) {
147 ret = arm_spe_pkt_desc(&packet, desc,
148 ARM_SPE_PKT_DESC_MAX);
149 if (!ret)
150 color_fprintf(stdout, color, " %s\n", desc);
151 } else {
152 color_fprintf(stdout, color, " Bad packet!\n");
153 }
154 pos += pkt_len;
155 buf += pkt_len;
156 len -= pkt_len;
157 }
158 }
159
arm_spe_dump_event(struct arm_spe * spe,unsigned char * buf,size_t len)160 static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
161 size_t len)
162 {
163 printf(".\n");
164 arm_spe_dump(spe, buf, len);
165 }
166
arm_spe_get_trace(struct arm_spe_buffer * b,void * data)167 static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
168 {
169 struct arm_spe_queue *speq = data;
170 struct auxtrace_buffer *buffer = speq->buffer;
171 struct auxtrace_buffer *old_buffer = speq->old_buffer;
172 struct auxtrace_queue *queue;
173
174 queue = &speq->spe->queues.queue_array[speq->queue_nr];
175
176 buffer = auxtrace_buffer__next(queue, buffer);
177 /* If no more data, drop the previous auxtrace_buffer and return */
178 if (!buffer) {
179 if (old_buffer)
180 auxtrace_buffer__drop_data(old_buffer);
181 b->len = 0;
182 return 0;
183 }
184
185 speq->buffer = buffer;
186
187 /* If the aux_buffer doesn't have data associated, try to load it */
188 if (!buffer->data) {
189 /* get the file desc associated with the perf data file */
190 int fd = perf_data__fd(speq->spe->session->data);
191
192 buffer->data = auxtrace_buffer__get_data(buffer, fd);
193 if (!buffer->data)
194 return -ENOMEM;
195 }
196
197 b->len = buffer->size;
198 b->buf = buffer->data;
199
200 if (b->len) {
201 if (old_buffer)
202 auxtrace_buffer__drop_data(old_buffer);
203 speq->old_buffer = buffer;
204 } else {
205 auxtrace_buffer__drop_data(buffer);
206 return arm_spe_get_trace(b, data);
207 }
208
209 return 0;
210 }
211
arm_spe__alloc_queue(struct arm_spe * spe,unsigned int queue_nr)212 static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
213 unsigned int queue_nr)
214 {
215 struct arm_spe_params params = { .get_trace = 0, };
216 struct arm_spe_queue *speq;
217
218 speq = zalloc(sizeof(*speq));
219 if (!speq)
220 return NULL;
221
222 speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
223 if (!speq->event_buf)
224 goto out_free;
225
226 speq->spe = spe;
227 speq->queue_nr = queue_nr;
228 speq->pid = -1;
229 speq->tid = -1;
230 speq->cpu = -1;
231 speq->period_instructions = 0;
232
233 /* params set */
234 params.get_trace = arm_spe_get_trace;
235 params.data = speq;
236
237 if (spe->synth_opts.last_branch) {
238 size_t sz = sizeof(struct branch_stack);
239
240 /* Allocate up to two entries for PBT + TGT */
241 sz += sizeof(struct branch_entry) *
242 min(spe->synth_opts.last_branch_sz, 2U);
243 speq->last_branch = zalloc(sz);
244 if (!speq->last_branch)
245 goto out_free;
246 }
247
248 /* create new decoder */
249 speq->decoder = arm_spe_decoder_new(¶ms);
250 if (!speq->decoder)
251 goto out_free;
252
253 return speq;
254
255 out_free:
256 zfree(&speq->event_buf);
257 zfree(&speq->last_branch);
258 free(speq);
259
260 return NULL;
261 }
262
arm_spe_cpumode(struct arm_spe * spe,u64 ip)263 static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
264 {
265 return ip >= spe->kernel_start ?
266 PERF_RECORD_MISC_KERNEL :
267 PERF_RECORD_MISC_USER;
268 }
269
arm_spe_set_pid_tid_cpu(struct arm_spe * spe,struct auxtrace_queue * queue)270 static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
271 struct auxtrace_queue *queue)
272 {
273 struct arm_spe_queue *speq = queue->priv;
274 pid_t tid;
275
276 tid = machine__get_current_tid(spe->machine, speq->cpu);
277 if (tid != -1) {
278 speq->tid = tid;
279 thread__zput(speq->thread);
280 } else
281 speq->tid = queue->tid;
282
283 if ((!speq->thread) && (speq->tid != -1)) {
284 speq->thread = machine__find_thread(spe->machine, -1,
285 speq->tid);
286 }
287
288 if (speq->thread) {
289 speq->pid = thread__pid(speq->thread);
290 if (queue->cpu == -1)
291 speq->cpu = thread__cpu(speq->thread);
292 }
293 }
294
arm_spe_set_tid(struct arm_spe_queue * speq,pid_t tid)295 static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
296 {
297 struct arm_spe *spe = speq->spe;
298 int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
299
300 if (err)
301 return err;
302
303 arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
304
305 return 0;
306 }
307
arm_spe__get_metadata_by_cpu(struct arm_spe * spe,u64 cpu)308 static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
309 {
310 u64 i;
311
312 if (!spe->metadata)
313 return NULL;
314
315 for (i = 0; i < spe->metadata_nr_cpu; i++)
316 if (spe->metadata[i][ARM_SPE_CPU] == cpu)
317 return spe->metadata[i];
318
319 return NULL;
320 }
321
arm_spe__synth_simd_flags(const struct arm_spe_record * record)322 static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
323 {
324 struct simd_flags simd_flags = {};
325
326 if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
327 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
328
329 if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
330 simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
331
332 if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
333 simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
334
335 if (record->type & ARM_SPE_SVE_EMPTY_PRED)
336 simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
337
338 return simd_flags;
339 }
340
arm_spe_prep_sample(struct arm_spe * spe,struct arm_spe_queue * speq,union perf_event * event,struct perf_sample * sample)341 static void arm_spe_prep_sample(struct arm_spe *spe,
342 struct arm_spe_queue *speq,
343 union perf_event *event,
344 struct perf_sample *sample)
345 {
346 struct arm_spe_record *record = &speq->decoder->record;
347
348 if (!spe->timeless_decoding)
349 sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
350
351 sample->ip = record->from_ip;
352 sample->cpumode = arm_spe_cpumode(spe, sample->ip);
353 sample->pid = speq->pid;
354 sample->tid = speq->tid;
355 sample->period = 1;
356 sample->cpu = speq->cpu;
357 sample->simd_flags = arm_spe__synth_simd_flags(record);
358
359 event->sample.header.type = PERF_RECORD_SAMPLE;
360 event->sample.header.misc = sample->cpumode;
361 event->sample.header.size = sizeof(struct perf_event_header);
362 }
363
arm_spe__prep_branch_stack(struct arm_spe_queue * speq)364 static void arm_spe__prep_branch_stack(struct arm_spe_queue *speq)
365 {
366 struct arm_spe *spe = speq->spe;
367 struct arm_spe_record *record = &speq->decoder->record;
368 struct branch_stack *bstack = speq->last_branch;
369 struct branch_flags *bs_flags;
370 unsigned int last_branch_sz = spe->synth_opts.last_branch_sz;
371 bool have_tgt = !!(speq->flags & PERF_IP_FLAG_BRANCH);
372 bool have_pbt = last_branch_sz >= (have_tgt + 1U) && record->prev_br_tgt;
373 size_t sz = sizeof(struct branch_stack) +
374 sizeof(struct branch_entry) * min(last_branch_sz, 2U) /* PBT + TGT */;
375 int i = 0;
376
377 /* Clean up branch stack */
378 memset(bstack, 0x0, sz);
379
380 if (!have_tgt && !have_pbt)
381 return;
382
383 if (have_tgt) {
384 bstack->entries[i].from = record->from_ip;
385 bstack->entries[i].to = record->to_ip;
386
387 bs_flags = &bstack->entries[i].flags;
388 bs_flags->value = 0;
389
390 if (record->op & ARM_SPE_OP_BR_CR_BL) {
391 if (record->op & ARM_SPE_OP_BR_COND)
392 bs_flags->type |= PERF_BR_COND_CALL;
393 else
394 bs_flags->type |= PERF_BR_CALL;
395 /*
396 * Indirect branch instruction without link (e.g. BR),
397 * take this case as function return.
398 */
399 } else if (record->op & ARM_SPE_OP_BR_CR_RET ||
400 record->op & ARM_SPE_OP_BR_INDIRECT) {
401 if (record->op & ARM_SPE_OP_BR_COND)
402 bs_flags->type |= PERF_BR_COND_RET;
403 else
404 bs_flags->type |= PERF_BR_RET;
405 } else if (record->op & ARM_SPE_OP_BR_CR_NON_BL_RET) {
406 if (record->op & ARM_SPE_OP_BR_COND)
407 bs_flags->type |= PERF_BR_COND;
408 else
409 bs_flags->type |= PERF_BR_UNCOND;
410 } else {
411 if (record->op & ARM_SPE_OP_BR_COND)
412 bs_flags->type |= PERF_BR_COND;
413 else
414 bs_flags->type |= PERF_BR_UNKNOWN;
415 }
416
417 if (record->type & ARM_SPE_BRANCH_MISS) {
418 bs_flags->mispred = 1;
419 bs_flags->predicted = 0;
420 } else {
421 bs_flags->mispred = 0;
422 bs_flags->predicted = 1;
423 }
424
425 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
426 bs_flags->not_taken = 1;
427
428 if (record->type & ARM_SPE_IN_TXN)
429 bs_flags->in_tx = 1;
430
431 bs_flags->cycles = min(record->latency, 0xFFFFU);
432 i++;
433 }
434
435 if (have_pbt) {
436 bs_flags = &bstack->entries[i].flags;
437 bs_flags->type |= PERF_BR_UNKNOWN;
438 bstack->entries[i].to = record->prev_br_tgt;
439 i++;
440 }
441
442 bstack->nr = i;
443 bstack->hw_idx = -1ULL;
444 }
445
arm_spe__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)446 static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type)
447 {
448 event->header.size = perf_event__sample_event_size(sample, type, 0);
449 return perf_event__synthesize_sample(event, type, 0, sample);
450 }
451
452 static inline int
arm_spe_deliver_synth_event(struct arm_spe * spe,struct arm_spe_queue * speq __maybe_unused,union perf_event * event,struct perf_sample * sample)453 arm_spe_deliver_synth_event(struct arm_spe *spe,
454 struct arm_spe_queue *speq __maybe_unused,
455 union perf_event *event,
456 struct perf_sample *sample)
457 {
458 int ret;
459
460 if (spe->synth_opts.inject) {
461 ret = arm_spe__inject_event(event, sample, spe->sample_type);
462 if (ret)
463 return ret;
464 }
465
466 ret = perf_session__deliver_synth_event(spe->session, event, sample);
467 if (ret)
468 pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
469
470 return ret;
471 }
472
arm_spe__synth_mem_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)473 static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
474 u64 spe_events_id, u64 data_src)
475 {
476 struct arm_spe *spe = speq->spe;
477 struct arm_spe_record *record = &speq->decoder->record;
478 union perf_event *event = speq->event_buf;
479 struct perf_sample sample;
480 int ret;
481
482 perf_sample__init(&sample, /*all=*/true);
483 arm_spe_prep_sample(spe, speq, event, &sample);
484
485 sample.id = spe_events_id;
486 sample.stream_id = spe_events_id;
487 sample.addr = record->virt_addr;
488 sample.phys_addr = record->phys_addr;
489 sample.data_src = data_src;
490 sample.weight = record->latency;
491
492 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
493 perf_sample__exit(&sample);
494 return ret;
495 }
496
arm_spe__synth_branch_sample(struct arm_spe_queue * speq,u64 spe_events_id)497 static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
498 u64 spe_events_id)
499 {
500 struct arm_spe *spe = speq->spe;
501 struct arm_spe_record *record = &speq->decoder->record;
502 union perf_event *event = speq->event_buf;
503 struct perf_sample sample;
504 int ret;
505
506 perf_sample__init(&sample, /*all=*/true);
507 arm_spe_prep_sample(spe, speq, event, &sample);
508
509 sample.id = spe_events_id;
510 sample.stream_id = spe_events_id;
511 sample.addr = record->to_ip;
512 sample.weight = record->latency;
513 sample.flags = speq->flags;
514 sample.branch_stack = speq->last_branch;
515
516 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
517 perf_sample__exit(&sample);
518 return ret;
519 }
520
arm_spe__synth_instruction_sample(struct arm_spe_queue * speq,u64 spe_events_id,u64 data_src)521 static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
522 u64 spe_events_id, u64 data_src)
523 {
524 struct arm_spe *spe = speq->spe;
525 struct arm_spe_record *record = &speq->decoder->record;
526 union perf_event *event = speq->event_buf;
527 struct perf_sample sample;
528 int ret;
529
530 /*
531 * Handles perf instruction sampling period.
532 */
533 speq->period_instructions++;
534 if (speq->period_instructions < spe->instructions_sample_period)
535 return 0;
536 speq->period_instructions = 0;
537
538 perf_sample__init(&sample, /*all=*/true);
539 arm_spe_prep_sample(spe, speq, event, &sample);
540
541 sample.id = spe_events_id;
542 sample.stream_id = spe_events_id;
543 sample.addr = record->to_ip;
544 sample.phys_addr = record->phys_addr;
545 sample.data_src = data_src;
546 sample.period = spe->instructions_sample_period;
547 sample.weight = record->latency;
548 sample.flags = speq->flags;
549 sample.branch_stack = speq->last_branch;
550
551 ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
552 perf_sample__exit(&sample);
553 return ret;
554 }
555
556 static const struct midr_range common_ds_encoding_cpus[] = {
557 MIDR_ALL_VERSIONS(MIDR_CORTEX_A720),
558 MIDR_ALL_VERSIONS(MIDR_CORTEX_A725),
559 MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C),
560 MIDR_ALL_VERSIONS(MIDR_CORTEX_X3),
561 MIDR_ALL_VERSIONS(MIDR_CORTEX_X925),
562 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
563 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
564 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
565 MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2),
566 {},
567 };
568
569 static const struct midr_range ampereone_ds_encoding_cpus[] = {
570 MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
571 {},
572 };
573
574 static const struct midr_range hisi_hip_ds_encoding_cpus[] = {
575 MIDR_ALL_VERSIONS(MIDR_HISI_HIP12),
576 {},
577 };
578
arm_spe__sample_flags(struct arm_spe_queue * speq)579 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
580 {
581 const struct arm_spe_record *record = &speq->decoder->record;
582
583 speq->flags = 0;
584 if (record->op & ARM_SPE_OP_BRANCH_ERET) {
585 speq->flags = PERF_IP_FLAG_BRANCH;
586
587 if (record->type & ARM_SPE_BRANCH_MISS)
588 speq->flags |= PERF_IP_FLAG_BRANCH_MISS;
589
590 if (record->type & ARM_SPE_BRANCH_NOT_TAKEN)
591 speq->flags |= PERF_IP_FLAG_NOT_TAKEN;
592
593 if (record->type & ARM_SPE_IN_TXN)
594 speq->flags |= PERF_IP_FLAG_IN_TX;
595
596 if (record->op & ARM_SPE_OP_BR_COND)
597 speq->flags |= PERF_IP_FLAG_CONDITIONAL;
598
599 if (record->op & ARM_SPE_OP_BR_CR_BL)
600 speq->flags |= PERF_IP_FLAG_CALL;
601 else if (record->op & ARM_SPE_OP_BR_CR_RET)
602 speq->flags |= PERF_IP_FLAG_RETURN;
603 /*
604 * Indirect branch instruction without link (e.g. BR),
605 * take it as a function return.
606 */
607 else if (record->op & ARM_SPE_OP_BR_INDIRECT)
608 speq->flags |= PERF_IP_FLAG_RETURN;
609 }
610 }
611
arm_spe__synth_data_source_common(const struct arm_spe_record * record,union perf_mem_data_src * data_src)612 static void arm_spe__synth_data_source_common(const struct arm_spe_record *record,
613 union perf_mem_data_src *data_src)
614 {
615 /*
616 * Even though four levels of cache hierarchy are possible, no known
617 * production Neoverse systems currently include more than three levels
618 * so for the time being we assume three exist. If a production system
619 * is built with four the this function would have to be changed to
620 * detect the number of levels for reporting.
621 */
622
623 /*
624 * We have no data on the hit level or data source for stores in the
625 * Neoverse SPE records.
626 */
627 if (record->op & ARM_SPE_OP_ST) {
628 data_src->mem_lvl = PERF_MEM_LVL_NA;
629 data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
630 data_src->mem_snoop = PERF_MEM_SNOOP_NA;
631 return;
632 }
633
634 switch (record->source) {
635 case ARM_SPE_COMMON_DS_L1D:
636 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
637 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
638 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
639 break;
640 case ARM_SPE_COMMON_DS_L2:
641 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
642 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
643 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
644 break;
645 case ARM_SPE_COMMON_DS_PEER_CORE:
646 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
647 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
648 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
649 break;
650 /*
651 * We don't know if this is L1, L2 but we do know it was a cache-2-cache
652 * transfer, so set SNOOPX_PEER
653 */
654 case ARM_SPE_COMMON_DS_LOCAL_CLUSTER:
655 case ARM_SPE_COMMON_DS_PEER_CLUSTER:
656 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
657 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
658 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
659 break;
660 /*
661 * System cache is assumed to be L3
662 */
663 case ARM_SPE_COMMON_DS_SYS_CACHE:
664 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
665 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
666 data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
667 break;
668 /*
669 * We don't know what level it hit in, except it came from the other
670 * socket
671 */
672 case ARM_SPE_COMMON_DS_REMOTE:
673 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
674 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
675 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
676 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
677 break;
678 case ARM_SPE_COMMON_DS_DRAM:
679 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
680 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
681 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
682 break;
683 default:
684 break;
685 }
686 }
687
688 /*
689 * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
690 * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
691 */
arm_spe__synth_data_source_ampereone(const struct arm_spe_record * record,union perf_mem_data_src * data_src)692 static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
693 union perf_mem_data_src *data_src)
694 {
695 struct arm_spe_record common_record;
696
697 switch (record->source) {
698 case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
699 common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
700 break;
701 case ARM_SPE_AMPEREONE_SLC:
702 common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
703 break;
704 case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
705 common_record.source = ARM_SPE_COMMON_DS_REMOTE;
706 break;
707 case ARM_SPE_AMPEREONE_DDR:
708 common_record.source = ARM_SPE_COMMON_DS_DRAM;
709 break;
710 case ARM_SPE_AMPEREONE_L1D:
711 common_record.source = ARM_SPE_COMMON_DS_L1D;
712 break;
713 case ARM_SPE_AMPEREONE_L2D:
714 common_record.source = ARM_SPE_COMMON_DS_L2;
715 break;
716 default:
717 pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
718 record->source);
719 return;
720 }
721
722 common_record.op = record->op;
723 arm_spe__synth_data_source_common(&common_record, data_src);
724 }
725
arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record * record,union perf_mem_data_src * data_src)726 static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record,
727 union perf_mem_data_src *data_src)
728 {
729 /* Use common synthesis method to handle store operations */
730 if (record->op & ARM_SPE_OP_ST) {
731 arm_spe__synth_data_source_common(record, data_src);
732 return;
733 }
734
735 switch (record->source) {
736 case ARM_SPE_HISI_HIP_PEER_CPU:
737 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
738 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
739 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
740 break;
741 case ARM_SPE_HISI_HIP_PEER_CPU_HITM:
742 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
743 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
744 data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
745 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
746 break;
747 case ARM_SPE_HISI_HIP_L3:
748 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
749 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
750 data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
751 break;
752 case ARM_SPE_HISI_HIP_L3_HITM:
753 data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
754 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
755 data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
756 break;
757 case ARM_SPE_HISI_HIP_PEER_CLUSTER:
758 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
759 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
760 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
761 break;
762 case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM:
763 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
764 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
765 data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
766 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
767 break;
768 case ARM_SPE_HISI_HIP_REMOTE_SOCKET:
769 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
770 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
771 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
772 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
773 break;
774 case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM:
775 data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2;
776 data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
777 data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
778 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
779 data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
780 break;
781 case ARM_SPE_HISI_HIP_LOCAL_MEM:
782 data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
783 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
784 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
785 break;
786 case ARM_SPE_HISI_HIP_REMOTE_MEM:
787 data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
788 data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
789 data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
790 break;
791 case ARM_SPE_HISI_HIP_NC_DEV:
792 data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT;
793 data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
794 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
795 break;
796 case ARM_SPE_HISI_HIP_L2:
797 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
798 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
799 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
800 break;
801 case ARM_SPE_HISI_HIP_L2_HITM:
802 data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
803 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
804 data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
805 break;
806 case ARM_SPE_HISI_HIP_L1:
807 data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
808 data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
809 data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
810 break;
811 default:
812 break;
813 }
814 }
815
816 static const struct data_source_handle data_source_handles[] = {
817 DS(common_ds_encoding_cpus, data_source_common),
818 DS(ampereone_ds_encoding_cpus, data_source_ampereone),
819 DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
820 };
821
arm_spe__synth_memory_level(const struct arm_spe_record * record,union perf_mem_data_src * data_src)822 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
823 union perf_mem_data_src *data_src)
824 {
825 if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
826 data_src->mem_lvl = PERF_MEM_LVL_L3;
827
828 if (record->type & ARM_SPE_LLC_MISS)
829 data_src->mem_lvl |= PERF_MEM_LVL_MISS;
830 else
831 data_src->mem_lvl |= PERF_MEM_LVL_HIT;
832 } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
833 data_src->mem_lvl = PERF_MEM_LVL_L1;
834
835 if (record->type & ARM_SPE_L1D_MISS)
836 data_src->mem_lvl |= PERF_MEM_LVL_MISS;
837 else
838 data_src->mem_lvl |= PERF_MEM_LVL_HIT;
839 }
840
841 if (record->type & ARM_SPE_REMOTE_ACCESS)
842 data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
843 }
844
arm_spe__synth_ds(struct arm_spe_queue * speq,const struct arm_spe_record * record,union perf_mem_data_src * data_src)845 static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
846 const struct arm_spe_record *record,
847 union perf_mem_data_src *data_src)
848 {
849 struct arm_spe *spe = speq->spe;
850 u64 *metadata = NULL;
851 u64 midr;
852 unsigned int i;
853
854 /* Metadata version 1 assumes all CPUs are the same (old behavior) */
855 if (spe->metadata_ver == 1) {
856 const char *cpuid;
857
858 pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n");
859 cpuid = perf_env__cpuid(perf_session__env(spe->session));
860 midr = strtol(cpuid, NULL, 16);
861 } else {
862 /* CPU ID is -1 for per-thread mode */
863 if (speq->cpu < 0) {
864 /*
865 * On the heterogeneous system, due to CPU ID is -1,
866 * cannot confirm the data source packet is supported.
867 */
868 if (!spe->is_homogeneous)
869 return false;
870
871 /* In homogeneous system, simply use CPU0's metadata */
872 if (spe->metadata)
873 metadata = spe->metadata[0];
874 } else {
875 metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
876 }
877
878 if (!metadata)
879 return false;
880
881 midr = metadata[ARM_SPE_CPU_MIDR];
882 }
883
884 for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
885 if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
886 data_source_handles[i].ds_synth(record, data_src);
887 return true;
888 }
889 }
890
891 return false;
892 }
893
arm_spe__synth_data_source(struct arm_spe_queue * speq,const struct arm_spe_record * record)894 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
895 const struct arm_spe_record *record)
896 {
897 union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
898
899 /* Only synthesize data source for LDST operations */
900 if (!is_ldst_op(record->op))
901 return 0;
902
903 if (record->op & ARM_SPE_OP_LD)
904 data_src.mem_op = PERF_MEM_OP_LOAD;
905 else if (record->op & ARM_SPE_OP_ST)
906 data_src.mem_op = PERF_MEM_OP_STORE;
907 else
908 return 0;
909
910 if (!arm_spe__synth_ds(speq, record, &data_src))
911 arm_spe__synth_memory_level(record, &data_src);
912
913 if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
914 data_src.mem_dtlb = PERF_MEM_TLB_WK;
915
916 if (record->type & ARM_SPE_TLB_MISS)
917 data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
918 else
919 data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
920 }
921
922 return data_src.val;
923 }
924
arm_spe_sample(struct arm_spe_queue * speq)925 static int arm_spe_sample(struct arm_spe_queue *speq)
926 {
927 const struct arm_spe_record *record = &speq->decoder->record;
928 struct arm_spe *spe = speq->spe;
929 u64 data_src;
930 int err;
931
932 arm_spe__sample_flags(speq);
933 data_src = arm_spe__synth_data_source(speq, record);
934
935 if (spe->sample_flc) {
936 if (record->type & ARM_SPE_L1D_MISS) {
937 err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
938 data_src);
939 if (err)
940 return err;
941 }
942
943 if (record->type & ARM_SPE_L1D_ACCESS) {
944 err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
945 data_src);
946 if (err)
947 return err;
948 }
949 }
950
951 if (spe->sample_llc) {
952 if (record->type & ARM_SPE_LLC_MISS) {
953 err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
954 data_src);
955 if (err)
956 return err;
957 }
958
959 if (record->type & ARM_SPE_LLC_ACCESS) {
960 err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
961 data_src);
962 if (err)
963 return err;
964 }
965 }
966
967 if (spe->sample_tlb) {
968 if (record->type & ARM_SPE_TLB_MISS) {
969 err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
970 data_src);
971 if (err)
972 return err;
973 }
974
975 if (record->type & ARM_SPE_TLB_ACCESS) {
976 err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
977 data_src);
978 if (err)
979 return err;
980 }
981 }
982
983 if (spe->synth_opts.last_branch &&
984 (spe->sample_branch || spe->sample_instructions))
985 arm_spe__prep_branch_stack(speq);
986
987 if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) {
988 err = arm_spe__synth_branch_sample(speq, spe->branch_id);
989 if (err)
990 return err;
991 }
992
993 if (spe->sample_remote_access &&
994 (record->type & ARM_SPE_REMOTE_ACCESS)) {
995 err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
996 data_src);
997 if (err)
998 return err;
999 }
1000
1001 /*
1002 * When data_src is zero it means the record is not a memory operation,
1003 * skip to synthesize memory sample for this case.
1004 */
1005 if (spe->sample_memory && is_ldst_op(record->op)) {
1006 err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
1007 if (err)
1008 return err;
1009 }
1010
1011 if (spe->sample_instructions) {
1012 err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
1013 if (err)
1014 return err;
1015 }
1016
1017 return 0;
1018 }
1019
arm_spe_run_decoder(struct arm_spe_queue * speq,u64 * timestamp)1020 static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
1021 {
1022 struct arm_spe *spe = speq->spe;
1023 struct arm_spe_record *record;
1024 int ret;
1025
1026 if (!spe->kernel_start)
1027 spe->kernel_start = machine__kernel_start(spe->machine);
1028
1029 while (1) {
1030 /*
1031 * The usual logic is firstly to decode the packets, and then
1032 * based the record to synthesize sample; but here the flow is
1033 * reversed: it calls arm_spe_sample() for synthesizing samples
1034 * prior to arm_spe_decode().
1035 *
1036 * Two reasons for this code logic:
1037 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
1038 * has decoded trace data and generated a record, but the record
1039 * is left to generate sample until run to here, so it's correct
1040 * to synthesize sample for the left record.
1041 * 2. After decoding trace data, it needs to compare the record
1042 * timestamp with the coming perf event, if the record timestamp
1043 * is later than the perf event, it needs bail out and pushs the
1044 * record into auxtrace heap, thus the record can be deferred to
1045 * synthesize sample until run to here at the next time; so this
1046 * can correlate samples between Arm SPE trace data and other
1047 * perf events with correct time ordering.
1048 */
1049
1050 /*
1051 * Update pid/tid info.
1052 */
1053 record = &speq->decoder->record;
1054 if (!spe->timeless_decoding && record->context_id != (u64)-1) {
1055 ret = arm_spe_set_tid(speq, record->context_id);
1056 if (ret)
1057 return ret;
1058
1059 spe->use_ctx_pkt_for_pid = true;
1060 }
1061
1062 ret = arm_spe_sample(speq);
1063 if (ret)
1064 return ret;
1065
1066 ret = arm_spe_decode(speq->decoder);
1067 if (!ret) {
1068 pr_debug("No data or all data has been processed.\n");
1069 return 1;
1070 }
1071
1072 /*
1073 * Error is detected when decode SPE trace data, continue to
1074 * the next trace data and find out more records.
1075 */
1076 if (ret < 0)
1077 continue;
1078
1079 record = &speq->decoder->record;
1080
1081 /* Update timestamp for the last record */
1082 if (record->timestamp > speq->timestamp)
1083 speq->timestamp = record->timestamp;
1084
1085 /*
1086 * If the timestamp of the queue is later than timestamp of the
1087 * coming perf event, bail out so can allow the perf event to
1088 * be processed ahead.
1089 */
1090 if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
1091 *timestamp = speq->timestamp;
1092 return 0;
1093 }
1094 }
1095
1096 return 0;
1097 }
1098
arm_spe__setup_queue(struct arm_spe * spe,struct auxtrace_queue * queue,unsigned int queue_nr)1099 static int arm_spe__setup_queue(struct arm_spe *spe,
1100 struct auxtrace_queue *queue,
1101 unsigned int queue_nr)
1102 {
1103 struct arm_spe_queue *speq = queue->priv;
1104 struct arm_spe_record *record;
1105
1106 if (list_empty(&queue->head) || speq)
1107 return 0;
1108
1109 speq = arm_spe__alloc_queue(spe, queue_nr);
1110
1111 if (!speq)
1112 return -ENOMEM;
1113
1114 queue->priv = speq;
1115
1116 if (queue->cpu != -1)
1117 speq->cpu = queue->cpu;
1118
1119 if (!speq->on_heap) {
1120 int ret;
1121
1122 if (spe->timeless_decoding)
1123 return 0;
1124
1125 retry:
1126 ret = arm_spe_decode(speq->decoder);
1127
1128 if (!ret)
1129 return 0;
1130
1131 if (ret < 0)
1132 goto retry;
1133
1134 record = &speq->decoder->record;
1135
1136 speq->timestamp = record->timestamp;
1137 ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
1138 if (ret)
1139 return ret;
1140 speq->on_heap = true;
1141 }
1142
1143 return 0;
1144 }
1145
arm_spe__setup_queues(struct arm_spe * spe)1146 static int arm_spe__setup_queues(struct arm_spe *spe)
1147 {
1148 unsigned int i;
1149 int ret;
1150
1151 for (i = 0; i < spe->queues.nr_queues; i++) {
1152 ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
1153 if (ret)
1154 return ret;
1155 }
1156
1157 return 0;
1158 }
1159
arm_spe__update_queues(struct arm_spe * spe)1160 static int arm_spe__update_queues(struct arm_spe *spe)
1161 {
1162 if (spe->queues.new_data) {
1163 spe->queues.new_data = false;
1164 return arm_spe__setup_queues(spe);
1165 }
1166
1167 return 0;
1168 }
1169
arm_spe__is_timeless_decoding(struct arm_spe * spe)1170 static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
1171 {
1172 struct evsel *evsel;
1173 struct evlist *evlist = spe->session->evlist;
1174 bool timeless_decoding = true;
1175
1176 /*
1177 * Circle through the list of event and complain if we find one
1178 * with the time bit set.
1179 */
1180 evlist__for_each_entry(evlist, evsel) {
1181 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
1182 timeless_decoding = false;
1183 }
1184
1185 return timeless_decoding;
1186 }
1187
arm_spe_process_queues(struct arm_spe * spe,u64 timestamp)1188 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
1189 {
1190 unsigned int queue_nr;
1191 u64 ts;
1192 int ret;
1193
1194 while (1) {
1195 struct auxtrace_queue *queue;
1196 struct arm_spe_queue *speq;
1197
1198 if (!spe->heap.heap_cnt)
1199 return 0;
1200
1201 if (spe->heap.heap_array[0].ordinal >= timestamp)
1202 return 0;
1203
1204 queue_nr = spe->heap.heap_array[0].queue_nr;
1205 queue = &spe->queues.queue_array[queue_nr];
1206 speq = queue->priv;
1207
1208 auxtrace_heap__pop(&spe->heap);
1209
1210 if (spe->heap.heap_cnt) {
1211 ts = spe->heap.heap_array[0].ordinal + 1;
1212 if (ts > timestamp)
1213 ts = timestamp;
1214 } else {
1215 ts = timestamp;
1216 }
1217
1218 /*
1219 * A previous context-switch event has set pid/tid in the machine's context, so
1220 * here we need to update the pid/tid in the thread and SPE queue.
1221 */
1222 if (!spe->use_ctx_pkt_for_pid)
1223 arm_spe_set_pid_tid_cpu(spe, queue);
1224
1225 ret = arm_spe_run_decoder(speq, &ts);
1226 if (ret < 0) {
1227 auxtrace_heap__add(&spe->heap, queue_nr, ts);
1228 return ret;
1229 }
1230
1231 if (!ret) {
1232 ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
1233 if (ret < 0)
1234 return ret;
1235 } else {
1236 speq->on_heap = false;
1237 }
1238 }
1239
1240 return 0;
1241 }
1242
arm_spe_process_timeless_queues(struct arm_spe * spe,pid_t tid,u64 time_)1243 static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
1244 u64 time_)
1245 {
1246 struct auxtrace_queues *queues = &spe->queues;
1247 unsigned int i;
1248 u64 ts = 0;
1249
1250 for (i = 0; i < queues->nr_queues; i++) {
1251 struct auxtrace_queue *queue = &spe->queues.queue_array[i];
1252 struct arm_spe_queue *speq = queue->priv;
1253
1254 if (speq && (tid == -1 || speq->tid == tid)) {
1255 speq->time = time_;
1256 arm_spe_set_pid_tid_cpu(spe, queue);
1257 arm_spe_run_decoder(speq, &ts);
1258 }
1259 }
1260 return 0;
1261 }
1262
arm_spe_context_switch(struct arm_spe * spe,union perf_event * event,struct perf_sample * sample)1263 static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
1264 struct perf_sample *sample)
1265 {
1266 pid_t pid, tid;
1267 int cpu;
1268
1269 if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
1270 return 0;
1271
1272 pid = event->context_switch.next_prev_pid;
1273 tid = event->context_switch.next_prev_tid;
1274 cpu = sample->cpu;
1275
1276 if (tid == -1)
1277 pr_warning("context_switch event has no tid\n");
1278
1279 return machine__set_current_tid(spe->machine, cpu, pid, tid);
1280 }
1281
arm_spe_process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,const struct perf_tool * tool)1282 static int arm_spe_process_event(struct perf_session *session,
1283 union perf_event *event,
1284 struct perf_sample *sample,
1285 const struct perf_tool *tool)
1286 {
1287 int err = 0;
1288 u64 timestamp;
1289 struct arm_spe *spe = container_of(session->auxtrace,
1290 struct arm_spe, auxtrace);
1291
1292 if (dump_trace)
1293 return 0;
1294
1295 if (!tool->ordered_events) {
1296 pr_err("SPE trace requires ordered events\n");
1297 return -EINVAL;
1298 }
1299
1300 if (sample->time && (sample->time != (u64) -1))
1301 timestamp = perf_time_to_tsc(sample->time, &spe->tc);
1302 else
1303 timestamp = 0;
1304
1305 if (timestamp || spe->timeless_decoding) {
1306 err = arm_spe__update_queues(spe);
1307 if (err)
1308 return err;
1309 }
1310
1311 if (spe->timeless_decoding) {
1312 if (event->header.type == PERF_RECORD_EXIT) {
1313 err = arm_spe_process_timeless_queues(spe,
1314 event->fork.tid,
1315 sample->time);
1316 }
1317 } else if (timestamp) {
1318 err = arm_spe_process_queues(spe, timestamp);
1319 if (err)
1320 return err;
1321
1322 if (!spe->use_ctx_pkt_for_pid &&
1323 (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
1324 event->header.type == PERF_RECORD_SWITCH))
1325 err = arm_spe_context_switch(spe, event, sample);
1326 }
1327
1328 return err;
1329 }
1330
arm_spe_process_auxtrace_event(struct perf_session * session,union perf_event * event,const struct perf_tool * tool __maybe_unused)1331 static int arm_spe_process_auxtrace_event(struct perf_session *session,
1332 union perf_event *event,
1333 const struct perf_tool *tool __maybe_unused)
1334 {
1335 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1336 auxtrace);
1337
1338 if (!spe->data_queued) {
1339 struct auxtrace_buffer *buffer;
1340 off_t data_offset;
1341 int fd = perf_data__fd(session->data);
1342 int err;
1343
1344 if (perf_data__is_pipe(session->data)) {
1345 data_offset = 0;
1346 } else {
1347 data_offset = lseek(fd, 0, SEEK_CUR);
1348 if (data_offset == -1)
1349 return -errno;
1350 }
1351
1352 err = auxtrace_queues__add_event(&spe->queues, session, event,
1353 data_offset, &buffer);
1354 if (err)
1355 return err;
1356
1357 /* Dump here now we have copied a piped trace out of the pipe */
1358 if (dump_trace) {
1359 if (auxtrace_buffer__get_data(buffer, fd)) {
1360 arm_spe_dump_event(spe, buffer->data,
1361 buffer->size);
1362 auxtrace_buffer__put_data(buffer);
1363 }
1364 }
1365 }
1366
1367 return 0;
1368 }
1369
arm_spe_flush(struct perf_session * session __maybe_unused,const struct perf_tool * tool __maybe_unused)1370 static int arm_spe_flush(struct perf_session *session __maybe_unused,
1371 const struct perf_tool *tool __maybe_unused)
1372 {
1373 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1374 auxtrace);
1375 int ret;
1376
1377 if (dump_trace)
1378 return 0;
1379
1380 if (!tool->ordered_events)
1381 return -EINVAL;
1382
1383 ret = arm_spe__update_queues(spe);
1384 if (ret < 0)
1385 return ret;
1386
1387 if (spe->timeless_decoding)
1388 return arm_spe_process_timeless_queues(spe, -1,
1389 MAX_TIMESTAMP - 1);
1390
1391 ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
1392 if (ret)
1393 return ret;
1394
1395 if (!spe->use_ctx_pkt_for_pid)
1396 ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
1397 "Matching of TIDs to SPE events could be inaccurate.\n");
1398
1399 return 0;
1400 }
1401
arm_spe__alloc_per_cpu_metadata(u64 * buf,int per_cpu_size)1402 static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size)
1403 {
1404 u64 *metadata;
1405
1406 metadata = zalloc(per_cpu_size);
1407 if (!metadata)
1408 return NULL;
1409
1410 memcpy(metadata, buf, per_cpu_size);
1411 return metadata;
1412 }
1413
arm_spe__free_metadata(u64 ** metadata,int nr_cpu)1414 static void arm_spe__free_metadata(u64 **metadata, int nr_cpu)
1415 {
1416 int i;
1417
1418 for (i = 0; i < nr_cpu; i++)
1419 zfree(&metadata[i]);
1420 free(metadata);
1421 }
1422
arm_spe__alloc_metadata(struct perf_record_auxtrace_info * info,u64 * ver,int * nr_cpu)1423 static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info,
1424 u64 *ver, int *nr_cpu)
1425 {
1426 u64 *ptr = (u64 *)info->priv;
1427 u64 metadata_size;
1428 u64 **metadata = NULL;
1429 int hdr_sz, per_cpu_sz, i;
1430
1431 metadata_size = info->header.size -
1432 sizeof(struct perf_record_auxtrace_info);
1433
1434 /* Metadata version 1 */
1435 if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) {
1436 *ver = 1;
1437 *nr_cpu = 0;
1438 /* No per CPU metadata */
1439 return NULL;
1440 }
1441
1442 *ver = ptr[ARM_SPE_HEADER_VERSION];
1443 hdr_sz = ptr[ARM_SPE_HEADER_SIZE];
1444 *nr_cpu = ptr[ARM_SPE_CPUS_NUM];
1445
1446 metadata = calloc(*nr_cpu, sizeof(*metadata));
1447 if (!metadata)
1448 return NULL;
1449
1450 /* Locate the start address of per CPU metadata */
1451 ptr += hdr_sz;
1452 per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu);
1453
1454 for (i = 0; i < *nr_cpu; i++) {
1455 metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz);
1456 if (!metadata[i])
1457 goto err_per_cpu_metadata;
1458
1459 ptr += per_cpu_sz / sizeof(u64);
1460 }
1461
1462 return metadata;
1463
1464 err_per_cpu_metadata:
1465 arm_spe__free_metadata(metadata, *nr_cpu);
1466 return NULL;
1467 }
1468
arm_spe_free_queue(void * priv)1469 static void arm_spe_free_queue(void *priv)
1470 {
1471 struct arm_spe_queue *speq = priv;
1472
1473 if (!speq)
1474 return;
1475 thread__zput(speq->thread);
1476 arm_spe_decoder_free(speq->decoder);
1477 zfree(&speq->event_buf);
1478 zfree(&speq->last_branch);
1479 free(speq);
1480 }
1481
arm_spe_free_events(struct perf_session * session)1482 static void arm_spe_free_events(struct perf_session *session)
1483 {
1484 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1485 auxtrace);
1486 struct auxtrace_queues *queues = &spe->queues;
1487 unsigned int i;
1488
1489 for (i = 0; i < queues->nr_queues; i++) {
1490 arm_spe_free_queue(queues->queue_array[i].priv);
1491 queues->queue_array[i].priv = NULL;
1492 }
1493 auxtrace_queues__free(queues);
1494 }
1495
arm_spe_free(struct perf_session * session)1496 static void arm_spe_free(struct perf_session *session)
1497 {
1498 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
1499 auxtrace);
1500
1501 auxtrace_heap__free(&spe->heap);
1502 arm_spe_free_events(session);
1503 session->auxtrace = NULL;
1504 arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu);
1505 free(spe);
1506 }
1507
arm_spe_evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)1508 static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
1509 struct evsel *evsel)
1510 {
1511 struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
1512
1513 return evsel->core.attr.type == spe->pmu_type;
1514 }
1515
1516 static const char * const metadata_hdr_v1_fmts[] = {
1517 [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n",
1518 [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n",
1519 };
1520
1521 static const char * const metadata_hdr_fmts[] = {
1522 [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n",
1523 [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n",
1524 [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n",
1525 [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n",
1526 };
1527
1528 static const char * const metadata_per_cpu_fmts[] = {
1529 [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n",
1530 [ARM_SPE_CPU] = " CPU # :%"PRId64"\n",
1531 [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n",
1532 [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n",
1533 [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n",
1534 [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n",
1535 };
1536
arm_spe_print_info(struct arm_spe * spe,__u64 * arr)1537 static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
1538 {
1539 unsigned int i, cpu, hdr_size, cpu_num, cpu_size;
1540 const char * const *hdr_fmts;
1541
1542 if (!dump_trace)
1543 return;
1544
1545 if (spe->metadata_ver == 1) {
1546 cpu_num = 0;
1547 hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX;
1548 hdr_fmts = metadata_hdr_v1_fmts;
1549 } else {
1550 cpu_num = arr[ARM_SPE_CPUS_NUM];
1551 hdr_size = arr[ARM_SPE_HEADER_SIZE];
1552 hdr_fmts = metadata_hdr_fmts;
1553 }
1554
1555 for (i = 0; i < hdr_size; i++)
1556 fprintf(stdout, hdr_fmts[i], arr[i]);
1557
1558 arr += hdr_size;
1559 for (cpu = 0; cpu < cpu_num; cpu++) {
1560 /*
1561 * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS
1562 * are fixed. The sequential parameter size is decided by the
1563 * field 'ARM_SPE_CPU_NR_PARAMS'.
1564 */
1565 cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS];
1566 for (i = 0; i < cpu_size; i++)
1567 fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]);
1568 arr += cpu_size;
1569 }
1570 }
1571
arm_spe_set_event_name(struct evlist * evlist,u64 id,const char * name)1572 static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
1573 const char *name)
1574 {
1575 struct evsel *evsel;
1576
1577 evlist__for_each_entry(evlist, evsel) {
1578 if (evsel->core.id && evsel->core.id[0] == id) {
1579 if (evsel->name)
1580 zfree(&evsel->name);
1581 evsel->name = strdup(name);
1582 break;
1583 }
1584 }
1585 }
1586
1587 static int
arm_spe_synth_events(struct arm_spe * spe,struct perf_session * session)1588 arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
1589 {
1590 struct evlist *evlist = session->evlist;
1591 struct evsel *evsel;
1592 struct perf_event_attr attr;
1593 bool found = false;
1594 u64 id;
1595 int err;
1596
1597 evlist__for_each_entry(evlist, evsel) {
1598 if (evsel->core.attr.type == spe->pmu_type) {
1599 found = true;
1600 break;
1601 }
1602 }
1603
1604 if (!found) {
1605 pr_debug("No selected events with SPE trace data\n");
1606 return 0;
1607 }
1608
1609 memset(&attr, 0, sizeof(struct perf_event_attr));
1610 attr.size = sizeof(struct perf_event_attr);
1611 attr.type = PERF_TYPE_HARDWARE;
1612 attr.sample_type = evsel->core.attr.sample_type &
1613 (PERF_SAMPLE_MASK | PERF_SAMPLE_PHYS_ADDR);
1614 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1615 PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
1616 PERF_SAMPLE_WEIGHT | PERF_SAMPLE_ADDR;
1617 if (spe->timeless_decoding)
1618 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1619 else
1620 attr.sample_type |= PERF_SAMPLE_TIME;
1621
1622 spe->sample_type = attr.sample_type;
1623
1624 attr.exclude_user = evsel->core.attr.exclude_user;
1625 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1626 attr.exclude_hv = evsel->core.attr.exclude_hv;
1627 attr.exclude_host = evsel->core.attr.exclude_host;
1628 attr.exclude_guest = evsel->core.attr.exclude_guest;
1629 attr.sample_id_all = evsel->core.attr.sample_id_all;
1630 attr.read_format = evsel->core.attr.read_format;
1631
1632 /* create new id val to be a fixed offset from evsel id */
1633 id = evsel->core.id[0] + 1000000000;
1634
1635 if (!id)
1636 id = 1;
1637
1638 if (spe->synth_opts.flc) {
1639 spe->sample_flc = true;
1640
1641 /* Level 1 data cache miss */
1642 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1643 if (err)
1644 return err;
1645 spe->l1d_miss_id = id;
1646 arm_spe_set_event_name(evlist, id, "l1d-miss");
1647 id += 1;
1648
1649 /* Level 1 data cache access */
1650 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1651 if (err)
1652 return err;
1653 spe->l1d_access_id = id;
1654 arm_spe_set_event_name(evlist, id, "l1d-access");
1655 id += 1;
1656 }
1657
1658 if (spe->synth_opts.llc) {
1659 spe->sample_llc = true;
1660
1661 /* Last level cache miss */
1662 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1663 if (err)
1664 return err;
1665 spe->llc_miss_id = id;
1666 arm_spe_set_event_name(evlist, id, "llc-miss");
1667 id += 1;
1668
1669 /* Last level cache access */
1670 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1671 if (err)
1672 return err;
1673 spe->llc_access_id = id;
1674 arm_spe_set_event_name(evlist, id, "llc-access");
1675 id += 1;
1676 }
1677
1678 if (spe->synth_opts.tlb) {
1679 spe->sample_tlb = true;
1680
1681 /* TLB miss */
1682 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1683 if (err)
1684 return err;
1685 spe->tlb_miss_id = id;
1686 arm_spe_set_event_name(evlist, id, "tlb-miss");
1687 id += 1;
1688
1689 /* TLB access */
1690 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1691 if (err)
1692 return err;
1693 spe->tlb_access_id = id;
1694 arm_spe_set_event_name(evlist, id, "tlb-access");
1695 id += 1;
1696 }
1697
1698 if (spe->synth_opts.last_branch) {
1699 if (spe->synth_opts.last_branch_sz > 2)
1700 pr_debug("Arm SPE supports only two bstack entries (PBT+TGT).\n");
1701
1702 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1703 /*
1704 * We don't use the hardware index, but the sample generation
1705 * code uses the new format branch_stack with this field,
1706 * so the event attributes must indicate that it's present.
1707 */
1708 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1709 }
1710
1711 if (spe->synth_opts.branches) {
1712 spe->sample_branch = true;
1713
1714 /* Branch */
1715 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1716 if (err)
1717 return err;
1718 spe->branch_id = id;
1719 arm_spe_set_event_name(evlist, id, "branch");
1720 id += 1;
1721 }
1722
1723 if (spe->synth_opts.remote_access) {
1724 spe->sample_remote_access = true;
1725
1726 /* Remote access */
1727 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1728 if (err)
1729 return err;
1730 spe->remote_access_id = id;
1731 arm_spe_set_event_name(evlist, id, "remote-access");
1732 id += 1;
1733 }
1734
1735 if (spe->synth_opts.mem) {
1736 spe->sample_memory = true;
1737
1738 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1739 if (err)
1740 return err;
1741 spe->memory_id = id;
1742 arm_spe_set_event_name(evlist, id, "memory");
1743 id += 1;
1744 }
1745
1746 if (spe->synth_opts.instructions) {
1747 if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
1748 pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
1749 goto synth_instructions_out;
1750 }
1751 if (spe->synth_opts.period > 1)
1752 pr_warning("Arm SPE has a hardware-based sample period.\n"
1753 "Additional instruction events will be discarded by --itrace\n");
1754
1755 spe->sample_instructions = true;
1756 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1757 attr.sample_period = spe->synth_opts.period;
1758 spe->instructions_sample_period = attr.sample_period;
1759 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1760 if (err)
1761 return err;
1762 spe->instructions_id = id;
1763 arm_spe_set_event_name(evlist, id, "instructions");
1764 }
1765 synth_instructions_out:
1766
1767 return 0;
1768 }
1769
arm_spe__is_homogeneous(u64 ** metadata,int nr_cpu)1770 static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu)
1771 {
1772 u64 midr;
1773 int i;
1774
1775 if (!nr_cpu)
1776 return false;
1777
1778 for (i = 0; i < nr_cpu; i++) {
1779 if (!metadata[i])
1780 return false;
1781
1782 if (i == 0) {
1783 midr = metadata[i][ARM_SPE_CPU_MIDR];
1784 continue;
1785 }
1786
1787 if (midr != metadata[i][ARM_SPE_CPU_MIDR])
1788 return false;
1789 }
1790
1791 return true;
1792 }
1793
arm_spe_process_auxtrace_info(union perf_event * event,struct perf_session * session)1794 int arm_spe_process_auxtrace_info(union perf_event *event,
1795 struct perf_session *session)
1796 {
1797 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1798 size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE;
1799 struct perf_record_time_conv *tc = &session->time_conv;
1800 struct arm_spe *spe;
1801 u64 **metadata = NULL;
1802 u64 metadata_ver;
1803 int nr_cpu, err;
1804
1805 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1806 min_sz)
1807 return -EINVAL;
1808
1809 metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver,
1810 &nr_cpu);
1811 if (!metadata && metadata_ver != 1) {
1812 pr_err("Failed to parse Arm SPE metadata.\n");
1813 return -EINVAL;
1814 }
1815
1816 spe = zalloc(sizeof(struct arm_spe));
1817 if (!spe) {
1818 err = -ENOMEM;
1819 goto err_free_metadata;
1820 }
1821
1822 err = auxtrace_queues__init(&spe->queues);
1823 if (err)
1824 goto err_free;
1825
1826 spe->session = session;
1827 spe->machine = &session->machines.host; /* No kvm support */
1828 spe->auxtrace_type = auxtrace_info->type;
1829 if (metadata_ver == 1)
1830 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1831 else
1832 spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2];
1833 spe->metadata = metadata;
1834 spe->metadata_ver = metadata_ver;
1835 spe->metadata_nr_cpu = nr_cpu;
1836 spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu);
1837
1838 spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1839
1840 /*
1841 * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1842 * and the parameters for hardware clock are stored in the session
1843 * context. Passes these parameters to the struct perf_tsc_conversion
1844 * in "spe->tc", which is used for later conversion between clock
1845 * counter and timestamp.
1846 *
1847 * For backward compatibility, copies the fields starting from
1848 * "time_cycles" only if they are contained in the event.
1849 */
1850 spe->tc.time_shift = tc->time_shift;
1851 spe->tc.time_mult = tc->time_mult;
1852 spe->tc.time_zero = tc->time_zero;
1853
1854 if (event_contains(*tc, time_cycles)) {
1855 spe->tc.time_cycles = tc->time_cycles;
1856 spe->tc.time_mask = tc->time_mask;
1857 spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1858 spe->tc.cap_user_time_short = tc->cap_user_time_short;
1859 }
1860
1861 spe->auxtrace.process_event = arm_spe_process_event;
1862 spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1863 spe->auxtrace.flush_events = arm_spe_flush;
1864 spe->auxtrace.free_events = arm_spe_free_events;
1865 spe->auxtrace.free = arm_spe_free;
1866 spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1867 session->auxtrace = &spe->auxtrace;
1868
1869 arm_spe_print_info(spe, &auxtrace_info->priv[0]);
1870
1871 if (dump_trace)
1872 return 0;
1873
1874 if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1875 spe->synth_opts = *session->itrace_synth_opts;
1876 else
1877 itrace_synth_opts__set_default(&spe->synth_opts, false);
1878
1879 err = arm_spe_synth_events(spe, session);
1880 if (err)
1881 goto err_free_queues;
1882
1883 err = auxtrace_queues__process_index(&spe->queues, session);
1884 if (err)
1885 goto err_free_queues;
1886
1887 if (spe->queues.populated)
1888 spe->data_queued = true;
1889
1890 return 0;
1891
1892 err_free_queues:
1893 auxtrace_queues__free(&spe->queues);
1894 session->auxtrace = NULL;
1895 err_free:
1896 free(spe);
1897 err_free_metadata:
1898 arm_spe__free_metadata(metadata, nr_cpu);
1899 return err;
1900 }
1901