1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17
18 #include <stdlib.h>
19
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42
43 struct cs_etm_auxtrace {
44 struct auxtrace auxtrace;
45 struct auxtrace_queues queues;
46 struct auxtrace_heap heap;
47 struct itrace_synth_opts synth_opts;
48 struct perf_session *session;
49 struct perf_tsc_conversion tc;
50
51 /*
52 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 * are less accurate but produces smaller trace data. We use context IDs
54 * in the trace instead of matching timestamps with fork records so
55 * they're not really needed in the general case. Overlapping mmaps
56 * happen in cases like between a fork and an exec.
57 */
58 bool timeless_decoding;
59
60 /*
61 * Per-thread ignores the trace channel ID and instead assumes that
62 * everything in a buffer comes from the same process regardless of
63 * which CPU it ran on. It also implies no context IDs so the TID is
64 * taken from the auxtrace buffer.
65 */
66 bool per_thread_decoding;
67 bool snapshot_mode;
68 bool data_queued;
69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71 int num_cpu;
72 u64 latest_kernel_timestamp;
73 u32 auxtrace_type;
74 u64 branches_sample_type;
75 u64 branches_id;
76 u64 instructions_sample_type;
77 u64 instructions_sample_period;
78 u64 instructions_id;
79 u64 **metadata;
80 unsigned int pmu_type;
81 enum cs_etm_pid_fmt pid_fmt;
82 };
83
84 struct cs_etm_traceid_queue {
85 u8 trace_chan_id;
86 u64 period_instructions;
87 size_t last_branch_pos;
88 union perf_event *event_buf;
89 struct thread *thread;
90 struct thread *prev_packet_thread;
91 ocsd_ex_level prev_packet_el;
92 ocsd_ex_level el;
93 struct branch_stack *last_branch;
94 struct branch_stack *last_branch_rb;
95 struct cs_etm_packet *prev_packet;
96 struct cs_etm_packet *packet;
97 struct cs_etm_packet_queue packet_queue;
98 };
99
100 struct cs_etm_queue {
101 struct cs_etm_auxtrace *etm;
102 struct cs_etm_decoder *decoder;
103 struct auxtrace_buffer *buffer;
104 unsigned int queue_nr;
105 u8 pending_timestamp_chan_id;
106 u64 offset;
107 const unsigned char *buf;
108 size_t buf_len, buf_used;
109 /* Conversion between traceID and index in traceid_queues array */
110 struct intlist *traceid_queues_list;
111 struct cs_etm_traceid_queue **traceid_queues;
112 };
113
114 /* RB tree for quick conversion between traceID and metadata pointers */
115 static struct intlist *traceid_list;
116
117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119 pid_t tid);
120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122
123 /* PTMs ETMIDR [11:8] set to b0011 */
124 #define ETMIDR_PTM_VERSION 0x00000300
125
126 /*
127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
129 * encode the etm queue number as the upper 16 bit and the channel as
130 * the lower 16 bit.
131 */
132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
133 (queue_nr << 16 | trace_chan_id)
134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136
cs_etm__get_v7_protocol_version(u32 etmidr)137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138 {
139 etmidr &= ETMIDR_PTM_VERSION;
140
141 if (etmidr == ETMIDR_PTM_VERSION)
142 return CS_ETM_PROTO_PTM;
143
144 return CS_ETM_PROTO_ETMV3;
145 }
146
cs_etm__get_magic(u8 trace_chan_id,u64 * magic)147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148 {
149 struct int_node *inode;
150 u64 *metadata;
151
152 inode = intlist__find(traceid_list, trace_chan_id);
153 if (!inode)
154 return -EINVAL;
155
156 metadata = inode->priv;
157 *magic = metadata[CS_ETM_MAGIC];
158 return 0;
159 }
160
cs_etm__get_cpu(u8 trace_chan_id,int * cpu)161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162 {
163 struct int_node *inode;
164 u64 *metadata;
165
166 inode = intlist__find(traceid_list, trace_chan_id);
167 if (!inode)
168 return -EINVAL;
169
170 metadata = inode->priv;
171 *cpu = (int)metadata[CS_ETM_CPU];
172 return 0;
173 }
174
175 /*
176 * The returned PID format is presented as an enum:
177 *
178 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180 * CS_ETM_PIDFMT_NONE: No context IDs
181 *
182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183 * are enabled at the same time when the session runs on an EL2 kernel.
184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185 * recorded in the trace data, the tool will selectively use
186 * CONTEXTIDR_EL2 as PID.
187 *
188 * The result is cached in etm->pid_fmt so this function only needs to be called
189 * when processing the aux info.
190 */
cs_etm__init_pid_fmt(u64 * metadata)191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192 {
193 u64 val;
194
195 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196 val = metadata[CS_ETM_ETMCR];
197 /* CONTEXTIDR is traced */
198 if (val & BIT(ETM_OPT_CTXTID))
199 return CS_ETM_PIDFMT_CTXTID;
200 } else {
201 val = metadata[CS_ETMV4_TRCCONFIGR];
202 /* CONTEXTIDR_EL2 is traced */
203 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204 return CS_ETM_PIDFMT_CTXTID2;
205 /* CONTEXTIDR_EL1 is traced */
206 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207 return CS_ETM_PIDFMT_CTXTID;
208 }
209
210 return CS_ETM_PIDFMT_NONE;
211 }
212
cs_etm__get_pid_fmt(struct cs_etm_queue * etmq)213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214 {
215 return etmq->etm->pid_fmt;
216 }
217
cs_etm__map_trace_id(u8 trace_chan_id,u64 * cpu_metadata)218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219 {
220 struct int_node *inode;
221
222 /* Get an RB node for this CPU */
223 inode = intlist__findnew(traceid_list, trace_chan_id);
224
225 /* Something went wrong, no need to continue */
226 if (!inode)
227 return -ENOMEM;
228
229 /*
230 * The node for that CPU should not be taken.
231 * Back out if that's the case.
232 */
233 if (inode->priv)
234 return -EINVAL;
235
236 /* All good, associate the traceID with the metadata pointer */
237 inode->priv = cpu_metadata;
238
239 return 0;
240 }
241
cs_etm__metadata_get_trace_id(u8 * trace_chan_id,u64 * cpu_metadata)242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243 {
244 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245
246 switch (cs_etm_magic) {
247 case __perf_cs_etmv3_magic:
248 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249 CORESIGHT_TRACE_ID_VAL_MASK);
250 break;
251 case __perf_cs_etmv4_magic:
252 case __perf_cs_ete_magic:
253 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254 CORESIGHT_TRACE_ID_VAL_MASK);
255 break;
256 default:
257 return -EINVAL;
258 }
259 return 0;
260 }
261
262 /*
263 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265 */
cs_etm__metadata_set_trace_id(u8 trace_chan_id,u64 * cpu_metadata)266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267 {
268 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269
270 switch (cs_etm_magic) {
271 case __perf_cs_etmv3_magic:
272 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273 break;
274 case __perf_cs_etmv4_magic:
275 case __perf_cs_ete_magic:
276 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277 break;
278
279 default:
280 return -EINVAL;
281 }
282 return 0;
283 }
284
285 /*
286 * Get a metadata index for a specific cpu from an array.
287 *
288 */
get_cpu_data_idx(struct cs_etm_auxtrace * etm,int cpu)289 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
290 {
291 int i;
292
293 for (i = 0; i < etm->num_cpu; i++) {
294 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
295 return i;
296 }
297 }
298
299 return -1;
300 }
301
302 /*
303 * Get a metadata for a specific cpu from an array.
304 *
305 */
get_cpu_data(struct cs_etm_auxtrace * etm,int cpu)306 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
307 {
308 int idx = get_cpu_data_idx(etm, cpu);
309
310 return (idx != -1) ? etm->metadata[idx] : NULL;
311 }
312
313 /*
314 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
315 *
316 * The payload associates the Trace ID and the CPU.
317 * The routine is tolerant of seeing multiple packets with the same association,
318 * but a CPU / Trace ID association changing during a session is an error.
319 */
cs_etm__process_aux_output_hw_id(struct perf_session * session,union perf_event * event)320 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
321 union perf_event *event)
322 {
323 struct cs_etm_auxtrace *etm;
324 struct perf_sample sample;
325 struct int_node *inode;
326 struct evsel *evsel;
327 u64 *cpu_data;
328 u64 hw_id;
329 int cpu, version, err;
330 u8 trace_chan_id, curr_chan_id;
331
332 /* extract and parse the HW ID */
333 hw_id = event->aux_output_hw_id.hw_id;
334 version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
335 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
336
337 /* check that we can handle this version */
338 if (version > CS_AUX_HW_ID_CURR_VERSION) {
339 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
340 version);
341 return -EINVAL;
342 }
343
344 /* get access to the etm metadata */
345 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
346 if (!etm || !etm->metadata)
347 return -EINVAL;
348
349 /* parse the sample to get the CPU */
350 evsel = evlist__event2evsel(session->evlist, event);
351 if (!evsel)
352 return -EINVAL;
353 err = evsel__parse_sample(evsel, event, &sample);
354 if (err)
355 return err;
356 cpu = sample.cpu;
357 if (cpu == -1) {
358 /* no CPU in the sample - possibly recorded with an old version of perf */
359 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
360 return -EINVAL;
361 }
362
363 /* See if the ID is mapped to a CPU, and it matches the current CPU */
364 inode = intlist__find(traceid_list, trace_chan_id);
365 if (inode) {
366 cpu_data = inode->priv;
367 if ((int)cpu_data[CS_ETM_CPU] != cpu) {
368 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
369 return -EINVAL;
370 }
371
372 /* check that the mapped ID matches */
373 err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
374 if (err)
375 return err;
376 if (curr_chan_id != trace_chan_id) {
377 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
378 return -EINVAL;
379 }
380
381 /* mapped and matched - return OK */
382 return 0;
383 }
384
385 cpu_data = get_cpu_data(etm, cpu);
386 if (cpu_data == NULL)
387 return err;
388
389 /* not one we've seen before - lets map it */
390 err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
391 if (err)
392 return err;
393
394 /*
395 * if we are picking up the association from the packet, need to plug
396 * the correct trace ID into the metadata for setting up decoders later.
397 */
398 err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
399 return err;
400 }
401
cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue * etmq,u8 trace_chan_id)402 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
403 u8 trace_chan_id)
404 {
405 /*
406 * When a timestamp packet is encountered the backend code
407 * is stopped so that the front end has time to process packets
408 * that were accumulated in the traceID queue. Since there can
409 * be more than one channel per cs_etm_queue, we need to specify
410 * what traceID queue needs servicing.
411 */
412 etmq->pending_timestamp_chan_id = trace_chan_id;
413 }
414
cs_etm__etmq_get_timestamp(struct cs_etm_queue * etmq,u8 * trace_chan_id)415 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
416 u8 *trace_chan_id)
417 {
418 struct cs_etm_packet_queue *packet_queue;
419
420 if (!etmq->pending_timestamp_chan_id)
421 return 0;
422
423 if (trace_chan_id)
424 *trace_chan_id = etmq->pending_timestamp_chan_id;
425
426 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
427 etmq->pending_timestamp_chan_id);
428 if (!packet_queue)
429 return 0;
430
431 /* Acknowledge pending status */
432 etmq->pending_timestamp_chan_id = 0;
433
434 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
435 return packet_queue->cs_timestamp;
436 }
437
cs_etm__clear_packet_queue(struct cs_etm_packet_queue * queue)438 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
439 {
440 int i;
441
442 queue->head = 0;
443 queue->tail = 0;
444 queue->packet_count = 0;
445 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
446 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
447 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
448 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
449 queue->packet_buffer[i].instr_count = 0;
450 queue->packet_buffer[i].last_instr_taken_branch = false;
451 queue->packet_buffer[i].last_instr_size = 0;
452 queue->packet_buffer[i].last_instr_type = 0;
453 queue->packet_buffer[i].last_instr_subtype = 0;
454 queue->packet_buffer[i].last_instr_cond = 0;
455 queue->packet_buffer[i].flags = 0;
456 queue->packet_buffer[i].exception_number = UINT32_MAX;
457 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
458 queue->packet_buffer[i].cpu = INT_MIN;
459 }
460 }
461
cs_etm__clear_all_packet_queues(struct cs_etm_queue * etmq)462 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
463 {
464 int idx;
465 struct int_node *inode;
466 struct cs_etm_traceid_queue *tidq;
467 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
468
469 intlist__for_each_entry(inode, traceid_queues_list) {
470 idx = (int)(intptr_t)inode->priv;
471 tidq = etmq->traceid_queues[idx];
472 cs_etm__clear_packet_queue(&tidq->packet_queue);
473 }
474 }
475
cs_etm__init_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u8 trace_chan_id)476 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
477 struct cs_etm_traceid_queue *tidq,
478 u8 trace_chan_id)
479 {
480 int rc = -ENOMEM;
481 struct auxtrace_queue *queue;
482 struct cs_etm_auxtrace *etm = etmq->etm;
483
484 cs_etm__clear_packet_queue(&tidq->packet_queue);
485
486 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
487 tidq->trace_chan_id = trace_chan_id;
488 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
489 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
490 queue->tid);
491 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
492
493 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
494 if (!tidq->packet)
495 goto out;
496
497 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
498 if (!tidq->prev_packet)
499 goto out_free;
500
501 if (etm->synth_opts.last_branch) {
502 size_t sz = sizeof(struct branch_stack);
503
504 sz += etm->synth_opts.last_branch_sz *
505 sizeof(struct branch_entry);
506 tidq->last_branch = zalloc(sz);
507 if (!tidq->last_branch)
508 goto out_free;
509 tidq->last_branch_rb = zalloc(sz);
510 if (!tidq->last_branch_rb)
511 goto out_free;
512 }
513
514 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
515 if (!tidq->event_buf)
516 goto out_free;
517
518 return 0;
519
520 out_free:
521 zfree(&tidq->last_branch_rb);
522 zfree(&tidq->last_branch);
523 zfree(&tidq->prev_packet);
524 zfree(&tidq->packet);
525 out:
526 return rc;
527 }
528
529 static struct cs_etm_traceid_queue
cs_etm__etmq_get_traceid_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)530 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
531 {
532 int idx;
533 struct int_node *inode;
534 struct intlist *traceid_queues_list;
535 struct cs_etm_traceid_queue *tidq, **traceid_queues;
536 struct cs_etm_auxtrace *etm = etmq->etm;
537
538 if (etm->per_thread_decoding)
539 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
540
541 traceid_queues_list = etmq->traceid_queues_list;
542
543 /*
544 * Check if the traceid_queue exist for this traceID by looking
545 * in the queue list.
546 */
547 inode = intlist__find(traceid_queues_list, trace_chan_id);
548 if (inode) {
549 idx = (int)(intptr_t)inode->priv;
550 return etmq->traceid_queues[idx];
551 }
552
553 /* We couldn't find a traceid_queue for this traceID, allocate one */
554 tidq = malloc(sizeof(*tidq));
555 if (!tidq)
556 return NULL;
557
558 memset(tidq, 0, sizeof(*tidq));
559
560 /* Get a valid index for the new traceid_queue */
561 idx = intlist__nr_entries(traceid_queues_list);
562 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
563 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
564 if (!inode)
565 goto out_free;
566
567 /* Associate this traceID with this index */
568 inode->priv = (void *)(intptr_t)idx;
569
570 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
571 goto out_free;
572
573 /* Grow the traceid_queues array by one unit */
574 traceid_queues = etmq->traceid_queues;
575 traceid_queues = reallocarray(traceid_queues,
576 idx + 1,
577 sizeof(*traceid_queues));
578
579 /*
580 * On failure reallocarray() returns NULL and the original block of
581 * memory is left untouched.
582 */
583 if (!traceid_queues)
584 goto out_free;
585
586 traceid_queues[idx] = tidq;
587 etmq->traceid_queues = traceid_queues;
588
589 return etmq->traceid_queues[idx];
590
591 out_free:
592 /*
593 * Function intlist__remove() removes the inode from the list
594 * and delete the memory associated to it.
595 */
596 intlist__remove(traceid_queues_list, inode);
597 free(tidq);
598
599 return NULL;
600 }
601
602 struct cs_etm_packet_queue
cs_etm__etmq_get_packet_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)603 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
604 {
605 struct cs_etm_traceid_queue *tidq;
606
607 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
608 if (tidq)
609 return &tidq->packet_queue;
610
611 return NULL;
612 }
613
cs_etm__packet_swap(struct cs_etm_auxtrace * etm,struct cs_etm_traceid_queue * tidq)614 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
615 struct cs_etm_traceid_queue *tidq)
616 {
617 struct cs_etm_packet *tmp;
618
619 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
620 etm->synth_opts.instructions) {
621 /*
622 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
623 * the next incoming packet.
624 *
625 * Threads and exception levels are also tracked for both the
626 * previous and current packets. This is because the previous
627 * packet is used for the 'from' IP for branch samples, so the
628 * thread at that time must also be assigned to that sample.
629 * Across discontinuity packets the thread can change, so by
630 * tracking the thread for the previous packet the branch sample
631 * will have the correct info.
632 */
633 tmp = tidq->packet;
634 tidq->packet = tidq->prev_packet;
635 tidq->prev_packet = tmp;
636 tidq->prev_packet_el = tidq->el;
637 thread__put(tidq->prev_packet_thread);
638 tidq->prev_packet_thread = thread__get(tidq->thread);
639 }
640 }
641
cs_etm__packet_dump(const char * pkt_string)642 static void cs_etm__packet_dump(const char *pkt_string)
643 {
644 const char *color = PERF_COLOR_BLUE;
645 int len = strlen(pkt_string);
646
647 if (len && (pkt_string[len-1] == '\n'))
648 color_fprintf(stdout, color, " %s", pkt_string);
649 else
650 color_fprintf(stdout, color, " %s\n", pkt_string);
651
652 fflush(stdout);
653 }
654
cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int t_idx,int m_idx,u32 etmidr)655 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
656 struct cs_etm_auxtrace *etm, int t_idx,
657 int m_idx, u32 etmidr)
658 {
659 u64 **metadata = etm->metadata;
660
661 t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
662 t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
663 t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
664 }
665
cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int t_idx,int m_idx)666 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
667 struct cs_etm_auxtrace *etm, int t_idx,
668 int m_idx)
669 {
670 u64 **metadata = etm->metadata;
671
672 t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
673 t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
674 t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
675 t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
676 t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
677 t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
678 t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
679 }
680
cs_etm__set_trace_param_ete(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,int t_idx,int m_idx)681 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
682 struct cs_etm_auxtrace *etm, int t_idx,
683 int m_idx)
684 {
685 u64 **metadata = etm->metadata;
686
687 t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
688 t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
689 t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
690 t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
691 t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
692 t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
693 t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
694 t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
695 }
696
cs_etm__init_trace_params(struct cs_etm_trace_params * t_params,struct cs_etm_auxtrace * etm,bool formatted,int sample_cpu,int decoders)697 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
698 struct cs_etm_auxtrace *etm,
699 bool formatted,
700 int sample_cpu,
701 int decoders)
702 {
703 int t_idx, m_idx;
704 u32 etmidr;
705 u64 architecture;
706
707 for (t_idx = 0; t_idx < decoders; t_idx++) {
708 if (formatted)
709 m_idx = t_idx;
710 else {
711 m_idx = get_cpu_data_idx(etm, sample_cpu);
712 if (m_idx == -1) {
713 pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
714 m_idx = 0;
715 }
716 }
717
718 architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
719
720 switch (architecture) {
721 case __perf_cs_etmv3_magic:
722 etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
723 cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
724 break;
725 case __perf_cs_etmv4_magic:
726 cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
727 break;
728 case __perf_cs_ete_magic:
729 cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
730 break;
731 default:
732 return -EINVAL;
733 }
734 }
735
736 return 0;
737 }
738
cs_etm__init_decoder_params(struct cs_etm_decoder_params * d_params,struct cs_etm_queue * etmq,enum cs_etm_decoder_operation mode,bool formatted)739 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
740 struct cs_etm_queue *etmq,
741 enum cs_etm_decoder_operation mode,
742 bool formatted)
743 {
744 int ret = -EINVAL;
745
746 if (!(mode < CS_ETM_OPERATION_MAX))
747 goto out;
748
749 d_params->packet_printer = cs_etm__packet_dump;
750 d_params->operation = mode;
751 d_params->data = etmq;
752 d_params->formatted = formatted;
753 d_params->fsyncs = false;
754 d_params->hsyncs = false;
755 d_params->frame_aligned = true;
756
757 ret = 0;
758 out:
759 return ret;
760 }
761
cs_etm__dump_event(struct cs_etm_queue * etmq,struct auxtrace_buffer * buffer)762 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
763 struct auxtrace_buffer *buffer)
764 {
765 int ret;
766 const char *color = PERF_COLOR_BLUE;
767 size_t buffer_used = 0;
768
769 fprintf(stdout, "\n");
770 color_fprintf(stdout, color,
771 ". ... CoreSight %s Trace data: size %#zx bytes\n",
772 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
773
774 do {
775 size_t consumed;
776
777 ret = cs_etm_decoder__process_data_block(
778 etmq->decoder, buffer->offset,
779 &((u8 *)buffer->data)[buffer_used],
780 buffer->size - buffer_used, &consumed);
781 if (ret)
782 break;
783
784 buffer_used += consumed;
785 } while (buffer_used < buffer->size);
786
787 cs_etm_decoder__reset(etmq->decoder);
788 }
789
cs_etm__flush_events(struct perf_session * session,struct perf_tool * tool)790 static int cs_etm__flush_events(struct perf_session *session,
791 struct perf_tool *tool)
792 {
793 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
794 struct cs_etm_auxtrace,
795 auxtrace);
796 if (dump_trace)
797 return 0;
798
799 if (!tool->ordered_events)
800 return -EINVAL;
801
802 if (etm->timeless_decoding) {
803 /*
804 * Pass tid = -1 to process all queues. But likely they will have
805 * already been processed on PERF_RECORD_EXIT anyway.
806 */
807 return cs_etm__process_timeless_queues(etm, -1);
808 }
809
810 return cs_etm__process_timestamped_queues(etm);
811 }
812
cs_etm__free_traceid_queues(struct cs_etm_queue * etmq)813 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
814 {
815 int idx;
816 uintptr_t priv;
817 struct int_node *inode, *tmp;
818 struct cs_etm_traceid_queue *tidq;
819 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
820
821 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
822 priv = (uintptr_t)inode->priv;
823 idx = priv;
824
825 /* Free this traceid_queue from the array */
826 tidq = etmq->traceid_queues[idx];
827 thread__zput(tidq->thread);
828 thread__zput(tidq->prev_packet_thread);
829 zfree(&tidq->event_buf);
830 zfree(&tidq->last_branch);
831 zfree(&tidq->last_branch_rb);
832 zfree(&tidq->prev_packet);
833 zfree(&tidq->packet);
834 zfree(&tidq);
835
836 /*
837 * Function intlist__remove() removes the inode from the list
838 * and delete the memory associated to it.
839 */
840 intlist__remove(traceid_queues_list, inode);
841 }
842
843 /* Then the RB tree itself */
844 intlist__delete(traceid_queues_list);
845 etmq->traceid_queues_list = NULL;
846
847 /* finally free the traceid_queues array */
848 zfree(&etmq->traceid_queues);
849 }
850
cs_etm__free_queue(void * priv)851 static void cs_etm__free_queue(void *priv)
852 {
853 struct cs_etm_queue *etmq = priv;
854
855 if (!etmq)
856 return;
857
858 cs_etm_decoder__free(etmq->decoder);
859 cs_etm__free_traceid_queues(etmq);
860 free(etmq);
861 }
862
cs_etm__free_events(struct perf_session * session)863 static void cs_etm__free_events(struct perf_session *session)
864 {
865 unsigned int i;
866 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
867 struct cs_etm_auxtrace,
868 auxtrace);
869 struct auxtrace_queues *queues = &aux->queues;
870
871 for (i = 0; i < queues->nr_queues; i++) {
872 cs_etm__free_queue(queues->queue_array[i].priv);
873 queues->queue_array[i].priv = NULL;
874 }
875
876 auxtrace_queues__free(queues);
877 }
878
cs_etm__free(struct perf_session * session)879 static void cs_etm__free(struct perf_session *session)
880 {
881 int i;
882 struct int_node *inode, *tmp;
883 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
884 struct cs_etm_auxtrace,
885 auxtrace);
886 cs_etm__free_events(session);
887 session->auxtrace = NULL;
888
889 /* First remove all traceID/metadata nodes for the RB tree */
890 intlist__for_each_entry_safe(inode, tmp, traceid_list)
891 intlist__remove(traceid_list, inode);
892 /* Then the RB tree itself */
893 intlist__delete(traceid_list);
894
895 for (i = 0; i < aux->num_cpu; i++)
896 zfree(&aux->metadata[i]);
897
898 zfree(&aux->metadata);
899 zfree(&aux);
900 }
901
cs_etm__evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)902 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
903 struct evsel *evsel)
904 {
905 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
906 struct cs_etm_auxtrace,
907 auxtrace);
908
909 return evsel->core.attr.type == aux->pmu_type;
910 }
911
cs_etm__get_machine(struct cs_etm_queue * etmq,ocsd_ex_level el)912 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
913 ocsd_ex_level el)
914 {
915 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
916
917 /*
918 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
919 * running at EL1 assume everything is the host.
920 */
921 if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
922 return &etmq->etm->session->machines.host;
923
924 /*
925 * Not perfect, but otherwise assume anything in EL1 is the default
926 * guest, and everything else is the host. Distinguishing between guest
927 * and host userspaces isn't currently supported either. Neither is
928 * multiple guest support. All this does is reduce the likeliness of
929 * decode errors where we look into the host kernel maps when it should
930 * have been the guest maps.
931 */
932 switch (el) {
933 case ocsd_EL1:
934 return machines__find_guest(&etmq->etm->session->machines,
935 DEFAULT_GUEST_KERNEL_ID);
936 case ocsd_EL3:
937 case ocsd_EL2:
938 case ocsd_EL0:
939 case ocsd_EL_unknown:
940 default:
941 return &etmq->etm->session->machines.host;
942 }
943 }
944
cs_etm__cpu_mode(struct cs_etm_queue * etmq,u64 address,ocsd_ex_level el)945 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
946 ocsd_ex_level el)
947 {
948 struct machine *machine = cs_etm__get_machine(etmq, el);
949
950 if (address >= machine__kernel_start(machine)) {
951 if (machine__is_host(machine))
952 return PERF_RECORD_MISC_KERNEL;
953 else
954 return PERF_RECORD_MISC_GUEST_KERNEL;
955 } else {
956 if (machine__is_host(machine))
957 return PERF_RECORD_MISC_USER;
958 else {
959 /*
960 * Can't really happen at the moment because
961 * cs_etm__get_machine() will always return
962 * machines.host for any non EL1 trace.
963 */
964 return PERF_RECORD_MISC_GUEST_USER;
965 }
966 }
967 }
968
cs_etm__mem_access(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 address,size_t size,u8 * buffer,const ocsd_mem_space_acc_t mem_space)969 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
970 u64 address, size_t size, u8 *buffer,
971 const ocsd_mem_space_acc_t mem_space)
972 {
973 u8 cpumode;
974 u64 offset;
975 int len;
976 struct addr_location al;
977 struct dso *dso;
978 struct cs_etm_traceid_queue *tidq;
979 int ret = 0;
980
981 if (!etmq)
982 return 0;
983
984 addr_location__init(&al);
985 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
986 if (!tidq)
987 goto out;
988
989 /*
990 * We've already tracked EL along side the PID in cs_etm__set_thread()
991 * so double check that it matches what OpenCSD thinks as well. It
992 * doesn't distinguish between EL0 and EL1 for this mem access callback
993 * so we had to do the extra tracking. Skip validation if it's any of
994 * the 'any' values.
995 */
996 if (!(mem_space == OCSD_MEM_SPACE_ANY ||
997 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
998 if (mem_space & OCSD_MEM_SPACE_EL1N) {
999 /* Includes both non secure EL1 and EL0 */
1000 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1001 } else if (mem_space & OCSD_MEM_SPACE_EL2)
1002 assert(tidq->el == ocsd_EL2);
1003 else if (mem_space & OCSD_MEM_SPACE_EL3)
1004 assert(tidq->el == ocsd_EL3);
1005 }
1006
1007 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1008
1009 if (!thread__find_map(tidq->thread, cpumode, address, &al))
1010 goto out;
1011
1012 dso = map__dso(al.map);
1013 if (!dso)
1014 goto out;
1015
1016 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1017 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1018 goto out;
1019
1020 offset = map__map_ip(al.map, address);
1021
1022 map__load(al.map);
1023
1024 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1025 offset, buffer, size);
1026
1027 if (len <= 0) {
1028 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1029 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1030 if (!dso__auxtrace_warned(dso)) {
1031 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1032 address,
1033 dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1034 dso__set_auxtrace_warned(dso);
1035 }
1036 goto out;
1037 }
1038 ret = len;
1039 out:
1040 addr_location__exit(&al);
1041 return ret;
1042 }
1043
cs_etm__alloc_queue(struct cs_etm_auxtrace * etm,bool formatted,int sample_cpu)1044 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1045 bool formatted, int sample_cpu)
1046 {
1047 struct cs_etm_decoder_params d_params;
1048 struct cs_etm_trace_params *t_params = NULL;
1049 struct cs_etm_queue *etmq;
1050 /*
1051 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1052 * needed.
1053 */
1054 int decoders = formatted ? etm->num_cpu : 1;
1055
1056 etmq = zalloc(sizeof(*etmq));
1057 if (!etmq)
1058 return NULL;
1059
1060 etmq->traceid_queues_list = intlist__new(NULL);
1061 if (!etmq->traceid_queues_list)
1062 goto out_free;
1063
1064 /* Use metadata to fill in trace parameters for trace decoder */
1065 t_params = zalloc(sizeof(*t_params) * decoders);
1066
1067 if (!t_params)
1068 goto out_free;
1069
1070 if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
1071 goto out_free;
1072
1073 /* Set decoder parameters to decode trace packets */
1074 if (cs_etm__init_decoder_params(&d_params, etmq,
1075 dump_trace ? CS_ETM_OPERATION_PRINT :
1076 CS_ETM_OPERATION_DECODE,
1077 formatted))
1078 goto out_free;
1079
1080 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1081 t_params);
1082
1083 if (!etmq->decoder)
1084 goto out_free;
1085
1086 /*
1087 * Register a function to handle all memory accesses required by
1088 * the trace decoder library.
1089 */
1090 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1091 0x0L, ((u64) -1L),
1092 cs_etm__mem_access))
1093 goto out_free_decoder;
1094
1095 zfree(&t_params);
1096 return etmq;
1097
1098 out_free_decoder:
1099 cs_etm_decoder__free(etmq->decoder);
1100 out_free:
1101 intlist__delete(etmq->traceid_queues_list);
1102 free(etmq);
1103
1104 return NULL;
1105 }
1106
cs_etm__setup_queue(struct cs_etm_auxtrace * etm,struct auxtrace_queue * queue,unsigned int queue_nr,bool formatted,int sample_cpu)1107 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1108 struct auxtrace_queue *queue,
1109 unsigned int queue_nr,
1110 bool formatted,
1111 int sample_cpu)
1112 {
1113 struct cs_etm_queue *etmq = queue->priv;
1114
1115 if (list_empty(&queue->head) || etmq)
1116 return 0;
1117
1118 etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
1119
1120 if (!etmq)
1121 return -ENOMEM;
1122
1123 queue->priv = etmq;
1124 etmq->etm = etm;
1125 etmq->queue_nr = queue_nr;
1126 etmq->offset = 0;
1127
1128 return 0;
1129 }
1130
cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace * etm,struct cs_etm_queue * etmq,unsigned int queue_nr)1131 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1132 struct cs_etm_queue *etmq,
1133 unsigned int queue_nr)
1134 {
1135 int ret = 0;
1136 unsigned int cs_queue_nr;
1137 u8 trace_chan_id;
1138 u64 cs_timestamp;
1139
1140 /*
1141 * We are under a CPU-wide trace scenario. As such we need to know
1142 * when the code that generated the traces started to execute so that
1143 * it can be correlated with execution on other CPUs. So we get a
1144 * handle on the beginning of traces and decode until we find a
1145 * timestamp. The timestamp is then added to the auxtrace min heap
1146 * in order to know what nibble (of all the etmqs) to decode first.
1147 */
1148 while (1) {
1149 /*
1150 * Fetch an aux_buffer from this etmq. Bail if no more
1151 * blocks or an error has been encountered.
1152 */
1153 ret = cs_etm__get_data_block(etmq);
1154 if (ret <= 0)
1155 goto out;
1156
1157 /*
1158 * Run decoder on the trace block. The decoder will stop when
1159 * encountering a CS timestamp, a full packet queue or the end of
1160 * trace for that block.
1161 */
1162 ret = cs_etm__decode_data_block(etmq);
1163 if (ret)
1164 goto out;
1165
1166 /*
1167 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1168 * the timestamp calculation for us.
1169 */
1170 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1171
1172 /* We found a timestamp, no need to continue. */
1173 if (cs_timestamp)
1174 break;
1175
1176 /*
1177 * We didn't find a timestamp so empty all the traceid packet
1178 * queues before looking for another timestamp packet, either
1179 * in the current data block or a new one. Packets that were
1180 * just decoded are useless since no timestamp has been
1181 * associated with them. As such simply discard them.
1182 */
1183 cs_etm__clear_all_packet_queues(etmq);
1184 }
1185
1186 /*
1187 * We have a timestamp. Add it to the min heap to reflect when
1188 * instructions conveyed by the range packets of this traceID queue
1189 * started to execute. Once the same has been done for all the traceID
1190 * queues of each etmq, redenring and decoding can start in
1191 * chronological order.
1192 *
1193 * Note that packets decoded above are still in the traceID's packet
1194 * queue and will be processed in cs_etm__process_timestamped_queues().
1195 */
1196 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1197 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1198 out:
1199 return ret;
1200 }
1201
1202 static inline
cs_etm__copy_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1203 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1204 struct cs_etm_traceid_queue *tidq)
1205 {
1206 struct branch_stack *bs_src = tidq->last_branch_rb;
1207 struct branch_stack *bs_dst = tidq->last_branch;
1208 size_t nr = 0;
1209
1210 /*
1211 * Set the number of records before early exit: ->nr is used to
1212 * determine how many branches to copy from ->entries.
1213 */
1214 bs_dst->nr = bs_src->nr;
1215
1216 /*
1217 * Early exit when there is nothing to copy.
1218 */
1219 if (!bs_src->nr)
1220 return;
1221
1222 /*
1223 * As bs_src->entries is a circular buffer, we need to copy from it in
1224 * two steps. First, copy the branches from the most recently inserted
1225 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1226 */
1227 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1228 memcpy(&bs_dst->entries[0],
1229 &bs_src->entries[tidq->last_branch_pos],
1230 sizeof(struct branch_entry) * nr);
1231
1232 /*
1233 * If we wrapped around at least once, the branches from the beginning
1234 * of the bs_src->entries buffer and until the ->last_branch_pos element
1235 * are older valid branches: copy them over. The total number of
1236 * branches copied over will be equal to the number of branches asked by
1237 * the user in last_branch_sz.
1238 */
1239 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1240 memcpy(&bs_dst->entries[nr],
1241 &bs_src->entries[0],
1242 sizeof(struct branch_entry) * tidq->last_branch_pos);
1243 }
1244 }
1245
1246 static inline
cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue * tidq)1247 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1248 {
1249 tidq->last_branch_pos = 0;
1250 tidq->last_branch_rb->nr = 0;
1251 }
1252
cs_etm__t32_instr_size(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 addr)1253 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1254 u8 trace_chan_id, u64 addr)
1255 {
1256 u8 instrBytes[2];
1257
1258 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1259 instrBytes, 0);
1260 /*
1261 * T32 instruction size is indicated by bits[15:11] of the first
1262 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1263 * denote a 32-bit instruction.
1264 */
1265 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1266 }
1267
cs_etm__first_executed_instr(struct cs_etm_packet * packet)1268 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1269 {
1270 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1271 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1272 return 0;
1273
1274 return packet->start_addr;
1275 }
1276
1277 static inline
cs_etm__last_executed_instr(const struct cs_etm_packet * packet)1278 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1279 {
1280 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1281 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1282 return 0;
1283
1284 return packet->end_addr - packet->last_instr_size;
1285 }
1286
cs_etm__instr_addr(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,u64 offset)1287 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1288 u64 trace_chan_id,
1289 const struct cs_etm_packet *packet,
1290 u64 offset)
1291 {
1292 if (packet->isa == CS_ETM_ISA_T32) {
1293 u64 addr = packet->start_addr;
1294
1295 while (offset) {
1296 addr += cs_etm__t32_instr_size(etmq,
1297 trace_chan_id, addr);
1298 offset--;
1299 }
1300 return addr;
1301 }
1302
1303 /* Assume a 4 byte instruction size (A32/A64) */
1304 return packet->start_addr + offset * 4;
1305 }
1306
cs_etm__update_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1307 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1308 struct cs_etm_traceid_queue *tidq)
1309 {
1310 struct branch_stack *bs = tidq->last_branch_rb;
1311 struct branch_entry *be;
1312
1313 /*
1314 * The branches are recorded in a circular buffer in reverse
1315 * chronological order: we start recording from the last element of the
1316 * buffer down. After writing the first element of the stack, move the
1317 * insert position back to the end of the buffer.
1318 */
1319 if (!tidq->last_branch_pos)
1320 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1321
1322 tidq->last_branch_pos -= 1;
1323
1324 be = &bs->entries[tidq->last_branch_pos];
1325 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1326 be->to = cs_etm__first_executed_instr(tidq->packet);
1327 /* No support for mispredict */
1328 be->flags.mispred = 0;
1329 be->flags.predicted = 1;
1330
1331 /*
1332 * Increment bs->nr until reaching the number of last branches asked by
1333 * the user on the command line.
1334 */
1335 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1336 bs->nr += 1;
1337 }
1338
cs_etm__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)1339 static int cs_etm__inject_event(union perf_event *event,
1340 struct perf_sample *sample, u64 type)
1341 {
1342 event->header.size = perf_event__sample_event_size(sample, type, 0);
1343 return perf_event__synthesize_sample(event, type, 0, sample);
1344 }
1345
1346
1347 static int
cs_etm__get_trace(struct cs_etm_queue * etmq)1348 cs_etm__get_trace(struct cs_etm_queue *etmq)
1349 {
1350 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1351 struct auxtrace_buffer *old_buffer = aux_buffer;
1352 struct auxtrace_queue *queue;
1353
1354 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1355
1356 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1357
1358 /* If no more data, drop the previous auxtrace_buffer and return */
1359 if (!aux_buffer) {
1360 if (old_buffer)
1361 auxtrace_buffer__drop_data(old_buffer);
1362 etmq->buf_len = 0;
1363 return 0;
1364 }
1365
1366 etmq->buffer = aux_buffer;
1367
1368 /* If the aux_buffer doesn't have data associated, try to load it */
1369 if (!aux_buffer->data) {
1370 /* get the file desc associated with the perf data file */
1371 int fd = perf_data__fd(etmq->etm->session->data);
1372
1373 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1374 if (!aux_buffer->data)
1375 return -ENOMEM;
1376 }
1377
1378 /* If valid, drop the previous buffer */
1379 if (old_buffer)
1380 auxtrace_buffer__drop_data(old_buffer);
1381
1382 etmq->buf_used = 0;
1383 etmq->buf_len = aux_buffer->size;
1384 etmq->buf = aux_buffer->data;
1385
1386 return etmq->buf_len;
1387 }
1388
cs_etm__set_thread(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,pid_t tid,ocsd_ex_level el)1389 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1390 struct cs_etm_traceid_queue *tidq, pid_t tid,
1391 ocsd_ex_level el)
1392 {
1393 struct machine *machine = cs_etm__get_machine(etmq, el);
1394
1395 if (tid != -1) {
1396 thread__zput(tidq->thread);
1397 tidq->thread = machine__find_thread(machine, -1, tid);
1398 }
1399
1400 /* Couldn't find a known thread */
1401 if (!tidq->thread)
1402 tidq->thread = machine__idle_thread(machine);
1403
1404 tidq->el = el;
1405 }
1406
cs_etm__etmq_set_tid_el(struct cs_etm_queue * etmq,pid_t tid,u8 trace_chan_id,ocsd_ex_level el)1407 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1408 u8 trace_chan_id, ocsd_ex_level el)
1409 {
1410 struct cs_etm_traceid_queue *tidq;
1411
1412 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1413 if (!tidq)
1414 return -EINVAL;
1415
1416 cs_etm__set_thread(etmq, tidq, tid, el);
1417 return 0;
1418 }
1419
cs_etm__etmq_is_timeless(struct cs_etm_queue * etmq)1420 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1421 {
1422 return !!etmq->etm->timeless_decoding;
1423 }
1424
cs_etm__copy_insn(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,struct perf_sample * sample)1425 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1426 u64 trace_chan_id,
1427 const struct cs_etm_packet *packet,
1428 struct perf_sample *sample)
1429 {
1430 /*
1431 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1432 * packet, so directly bail out with 'insn_len' = 0.
1433 */
1434 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1435 sample->insn_len = 0;
1436 return;
1437 }
1438
1439 /*
1440 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1441 * cs_etm__t32_instr_size().
1442 */
1443 if (packet->isa == CS_ETM_ISA_T32)
1444 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1445 sample->ip);
1446 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1447 else
1448 sample->insn_len = 4;
1449
1450 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1451 (void *)sample->insn, 0);
1452 }
1453
cs_etm__convert_sample_time(struct cs_etm_queue * etmq,u64 cs_timestamp)1454 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1455 {
1456 struct cs_etm_auxtrace *etm = etmq->etm;
1457
1458 if (etm->has_virtual_ts)
1459 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1460 else
1461 return cs_timestamp;
1462 }
1463
cs_etm__resolve_sample_time(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1464 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1465 struct cs_etm_traceid_queue *tidq)
1466 {
1467 struct cs_etm_auxtrace *etm = etmq->etm;
1468 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1469
1470 if (!etm->timeless_decoding && etm->has_virtual_ts)
1471 return packet_queue->cs_timestamp;
1472 else
1473 return etm->latest_kernel_timestamp;
1474 }
1475
cs_etm__synth_instruction_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 addr,u64 period)1476 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1477 struct cs_etm_traceid_queue *tidq,
1478 u64 addr, u64 period)
1479 {
1480 int ret = 0;
1481 struct cs_etm_auxtrace *etm = etmq->etm;
1482 union perf_event *event = tidq->event_buf;
1483 struct perf_sample sample = {.ip = 0,};
1484
1485 event->sample.header.type = PERF_RECORD_SAMPLE;
1486 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1487 event->sample.header.size = sizeof(struct perf_event_header);
1488
1489 /* Set time field based on etm auxtrace config. */
1490 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1491
1492 sample.ip = addr;
1493 sample.pid = thread__pid(tidq->thread);
1494 sample.tid = thread__tid(tidq->thread);
1495 sample.id = etmq->etm->instructions_id;
1496 sample.stream_id = etmq->etm->instructions_id;
1497 sample.period = period;
1498 sample.cpu = tidq->packet->cpu;
1499 sample.flags = tidq->prev_packet->flags;
1500 sample.cpumode = event->sample.header.misc;
1501
1502 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1503
1504 if (etm->synth_opts.last_branch)
1505 sample.branch_stack = tidq->last_branch;
1506
1507 if (etm->synth_opts.inject) {
1508 ret = cs_etm__inject_event(event, &sample,
1509 etm->instructions_sample_type);
1510 if (ret)
1511 return ret;
1512 }
1513
1514 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1515
1516 if (ret)
1517 pr_err(
1518 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1519 ret);
1520
1521 return ret;
1522 }
1523
1524 /*
1525 * The cs etm packet encodes an instruction range between a branch target
1526 * and the next taken branch. Generate sample accordingly.
1527 */
cs_etm__synth_branch_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1528 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1529 struct cs_etm_traceid_queue *tidq)
1530 {
1531 int ret = 0;
1532 struct cs_etm_auxtrace *etm = etmq->etm;
1533 struct perf_sample sample = {.ip = 0,};
1534 union perf_event *event = tidq->event_buf;
1535 struct dummy_branch_stack {
1536 u64 nr;
1537 u64 hw_idx;
1538 struct branch_entry entries;
1539 } dummy_bs;
1540 u64 ip;
1541
1542 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1543
1544 event->sample.header.type = PERF_RECORD_SAMPLE;
1545 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1546 tidq->prev_packet_el);
1547 event->sample.header.size = sizeof(struct perf_event_header);
1548
1549 /* Set time field based on etm auxtrace config. */
1550 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1551
1552 sample.ip = ip;
1553 sample.pid = thread__pid(tidq->prev_packet_thread);
1554 sample.tid = thread__tid(tidq->prev_packet_thread);
1555 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1556 sample.id = etmq->etm->branches_id;
1557 sample.stream_id = etmq->etm->branches_id;
1558 sample.period = 1;
1559 sample.cpu = tidq->packet->cpu;
1560 sample.flags = tidq->prev_packet->flags;
1561 sample.cpumode = event->sample.header.misc;
1562
1563 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1564 &sample);
1565
1566 /*
1567 * perf report cannot handle events without a branch stack
1568 */
1569 if (etm->synth_opts.last_branch) {
1570 dummy_bs = (struct dummy_branch_stack){
1571 .nr = 1,
1572 .hw_idx = -1ULL,
1573 .entries = {
1574 .from = sample.ip,
1575 .to = sample.addr,
1576 },
1577 };
1578 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1579 }
1580
1581 if (etm->synth_opts.inject) {
1582 ret = cs_etm__inject_event(event, &sample,
1583 etm->branches_sample_type);
1584 if (ret)
1585 return ret;
1586 }
1587
1588 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1589
1590 if (ret)
1591 pr_err(
1592 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1593 ret);
1594
1595 return ret;
1596 }
1597
1598 struct cs_etm_synth {
1599 struct perf_tool dummy_tool;
1600 struct perf_session *session;
1601 };
1602
cs_etm__event_synth(struct perf_tool * tool,union perf_event * event,struct perf_sample * sample __maybe_unused,struct machine * machine __maybe_unused)1603 static int cs_etm__event_synth(struct perf_tool *tool,
1604 union perf_event *event,
1605 struct perf_sample *sample __maybe_unused,
1606 struct machine *machine __maybe_unused)
1607 {
1608 struct cs_etm_synth *cs_etm_synth =
1609 container_of(tool, struct cs_etm_synth, dummy_tool);
1610
1611 return perf_session__deliver_synth_event(cs_etm_synth->session,
1612 event, NULL);
1613 }
1614
cs_etm__synth_event(struct perf_session * session,struct perf_event_attr * attr,u64 id)1615 static int cs_etm__synth_event(struct perf_session *session,
1616 struct perf_event_attr *attr, u64 id)
1617 {
1618 struct cs_etm_synth cs_etm_synth;
1619
1620 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1621 cs_etm_synth.session = session;
1622
1623 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1624 &id, cs_etm__event_synth);
1625 }
1626
cs_etm__synth_events(struct cs_etm_auxtrace * etm,struct perf_session * session)1627 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1628 struct perf_session *session)
1629 {
1630 struct evlist *evlist = session->evlist;
1631 struct evsel *evsel;
1632 struct perf_event_attr attr;
1633 bool found = false;
1634 u64 id;
1635 int err;
1636
1637 evlist__for_each_entry(evlist, evsel) {
1638 if (evsel->core.attr.type == etm->pmu_type) {
1639 found = true;
1640 break;
1641 }
1642 }
1643
1644 if (!found) {
1645 pr_debug("No selected events with CoreSight Trace data\n");
1646 return 0;
1647 }
1648
1649 memset(&attr, 0, sizeof(struct perf_event_attr));
1650 attr.size = sizeof(struct perf_event_attr);
1651 attr.type = PERF_TYPE_HARDWARE;
1652 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1653 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1654 PERF_SAMPLE_PERIOD;
1655 if (etm->timeless_decoding)
1656 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1657 else
1658 attr.sample_type |= PERF_SAMPLE_TIME;
1659
1660 attr.exclude_user = evsel->core.attr.exclude_user;
1661 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1662 attr.exclude_hv = evsel->core.attr.exclude_hv;
1663 attr.exclude_host = evsel->core.attr.exclude_host;
1664 attr.exclude_guest = evsel->core.attr.exclude_guest;
1665 attr.sample_id_all = evsel->core.attr.sample_id_all;
1666 attr.read_format = evsel->core.attr.read_format;
1667
1668 /* create new id val to be a fixed offset from evsel id */
1669 id = evsel->core.id[0] + 1000000000;
1670
1671 if (!id)
1672 id = 1;
1673
1674 if (etm->synth_opts.branches) {
1675 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1676 attr.sample_period = 1;
1677 attr.sample_type |= PERF_SAMPLE_ADDR;
1678 err = cs_etm__synth_event(session, &attr, id);
1679 if (err)
1680 return err;
1681 etm->branches_sample_type = attr.sample_type;
1682 etm->branches_id = id;
1683 id += 1;
1684 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1685 }
1686
1687 if (etm->synth_opts.last_branch) {
1688 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1689 /*
1690 * We don't use the hardware index, but the sample generation
1691 * code uses the new format branch_stack with this field,
1692 * so the event attributes must indicate that it's present.
1693 */
1694 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1695 }
1696
1697 if (etm->synth_opts.instructions) {
1698 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1699 attr.sample_period = etm->synth_opts.period;
1700 etm->instructions_sample_period = attr.sample_period;
1701 err = cs_etm__synth_event(session, &attr, id);
1702 if (err)
1703 return err;
1704 etm->instructions_sample_type = attr.sample_type;
1705 etm->instructions_id = id;
1706 id += 1;
1707 }
1708
1709 return 0;
1710 }
1711
cs_etm__sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1712 static int cs_etm__sample(struct cs_etm_queue *etmq,
1713 struct cs_etm_traceid_queue *tidq)
1714 {
1715 struct cs_etm_auxtrace *etm = etmq->etm;
1716 int ret;
1717 u8 trace_chan_id = tidq->trace_chan_id;
1718 u64 instrs_prev;
1719
1720 /* Get instructions remainder from previous packet */
1721 instrs_prev = tidq->period_instructions;
1722
1723 tidq->period_instructions += tidq->packet->instr_count;
1724
1725 /*
1726 * Record a branch when the last instruction in
1727 * PREV_PACKET is a branch.
1728 */
1729 if (etm->synth_opts.last_branch &&
1730 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1731 tidq->prev_packet->last_instr_taken_branch)
1732 cs_etm__update_last_branch_rb(etmq, tidq);
1733
1734 if (etm->synth_opts.instructions &&
1735 tidq->period_instructions >= etm->instructions_sample_period) {
1736 /*
1737 * Emit instruction sample periodically
1738 * TODO: allow period to be defined in cycles and clock time
1739 */
1740
1741 /*
1742 * Below diagram demonstrates the instruction samples
1743 * generation flows:
1744 *
1745 * Instrs Instrs Instrs Instrs
1746 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1747 * | | | |
1748 * V V V V
1749 * --------------------------------------------------
1750 * ^ ^
1751 * | |
1752 * Period Period
1753 * instructions(Pi) instructions(Pi')
1754 *
1755 * | |
1756 * \---------------- -----------------/
1757 * V
1758 * tidq->packet->instr_count
1759 *
1760 * Instrs Sample(n...) are the synthesised samples occurring
1761 * every etm->instructions_sample_period instructions - as
1762 * defined on the perf command line. Sample(n) is being the
1763 * last sample before the current etm packet, n+1 to n+3
1764 * samples are generated from the current etm packet.
1765 *
1766 * tidq->packet->instr_count represents the number of
1767 * instructions in the current etm packet.
1768 *
1769 * Period instructions (Pi) contains the number of
1770 * instructions executed after the sample point(n) from the
1771 * previous etm packet. This will always be less than
1772 * etm->instructions_sample_period.
1773 *
1774 * When generate new samples, it combines with two parts
1775 * instructions, one is the tail of the old packet and another
1776 * is the head of the new coming packet, to generate
1777 * sample(n+1); sample(n+2) and sample(n+3) consume the
1778 * instructions with sample period. After sample(n+3), the rest
1779 * instructions will be used by later packet and it is assigned
1780 * to tidq->period_instructions for next round calculation.
1781 */
1782
1783 /*
1784 * Get the initial offset into the current packet instructions;
1785 * entry conditions ensure that instrs_prev is less than
1786 * etm->instructions_sample_period.
1787 */
1788 u64 offset = etm->instructions_sample_period - instrs_prev;
1789 u64 addr;
1790
1791 /* Prepare last branches for instruction sample */
1792 if (etm->synth_opts.last_branch)
1793 cs_etm__copy_last_branch_rb(etmq, tidq);
1794
1795 while (tidq->period_instructions >=
1796 etm->instructions_sample_period) {
1797 /*
1798 * Calculate the address of the sampled instruction (-1
1799 * as sample is reported as though instruction has just
1800 * been executed, but PC has not advanced to next
1801 * instruction)
1802 */
1803 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1804 tidq->packet, offset - 1);
1805 ret = cs_etm__synth_instruction_sample(
1806 etmq, tidq, addr,
1807 etm->instructions_sample_period);
1808 if (ret)
1809 return ret;
1810
1811 offset += etm->instructions_sample_period;
1812 tidq->period_instructions -=
1813 etm->instructions_sample_period;
1814 }
1815 }
1816
1817 if (etm->synth_opts.branches) {
1818 bool generate_sample = false;
1819
1820 /* Generate sample for tracing on packet */
1821 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1822 generate_sample = true;
1823
1824 /* Generate sample for branch taken packet */
1825 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1826 tidq->prev_packet->last_instr_taken_branch)
1827 generate_sample = true;
1828
1829 if (generate_sample) {
1830 ret = cs_etm__synth_branch_sample(etmq, tidq);
1831 if (ret)
1832 return ret;
1833 }
1834 }
1835
1836 cs_etm__packet_swap(etm, tidq);
1837
1838 return 0;
1839 }
1840
cs_etm__exception(struct cs_etm_traceid_queue * tidq)1841 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1842 {
1843 /*
1844 * When the exception packet is inserted, whether the last instruction
1845 * in previous range packet is taken branch or not, we need to force
1846 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1847 * to generate branch sample for the instruction range before the
1848 * exception is trapped to kernel or before the exception returning.
1849 *
1850 * The exception packet includes the dummy address values, so don't
1851 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1852 * for generating instruction and branch samples.
1853 */
1854 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1855 tidq->prev_packet->last_instr_taken_branch = true;
1856
1857 return 0;
1858 }
1859
cs_etm__flush(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1860 static int cs_etm__flush(struct cs_etm_queue *etmq,
1861 struct cs_etm_traceid_queue *tidq)
1862 {
1863 int err = 0;
1864 struct cs_etm_auxtrace *etm = etmq->etm;
1865
1866 /* Handle start tracing packet */
1867 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1868 goto swap_packet;
1869
1870 if (etmq->etm->synth_opts.last_branch &&
1871 etmq->etm->synth_opts.instructions &&
1872 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1873 u64 addr;
1874
1875 /* Prepare last branches for instruction sample */
1876 cs_etm__copy_last_branch_rb(etmq, tidq);
1877
1878 /*
1879 * Generate a last branch event for the branches left in the
1880 * circular buffer at the end of the trace.
1881 *
1882 * Use the address of the end of the last reported execution
1883 * range
1884 */
1885 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1886
1887 err = cs_etm__synth_instruction_sample(
1888 etmq, tidq, addr,
1889 tidq->period_instructions);
1890 if (err)
1891 return err;
1892
1893 tidq->period_instructions = 0;
1894
1895 }
1896
1897 if (etm->synth_opts.branches &&
1898 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1899 err = cs_etm__synth_branch_sample(etmq, tidq);
1900 if (err)
1901 return err;
1902 }
1903
1904 swap_packet:
1905 cs_etm__packet_swap(etm, tidq);
1906
1907 /* Reset last branches after flush the trace */
1908 if (etm->synth_opts.last_branch)
1909 cs_etm__reset_last_branch_rb(tidq);
1910
1911 return err;
1912 }
1913
cs_etm__end_block(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1914 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1915 struct cs_etm_traceid_queue *tidq)
1916 {
1917 int err;
1918
1919 /*
1920 * It has no new packet coming and 'etmq->packet' contains the stale
1921 * packet which was set at the previous time with packets swapping;
1922 * so skip to generate branch sample to avoid stale packet.
1923 *
1924 * For this case only flush branch stack and generate a last branch
1925 * event for the branches left in the circular buffer at the end of
1926 * the trace.
1927 */
1928 if (etmq->etm->synth_opts.last_branch &&
1929 etmq->etm->synth_opts.instructions &&
1930 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1931 u64 addr;
1932
1933 /* Prepare last branches for instruction sample */
1934 cs_etm__copy_last_branch_rb(etmq, tidq);
1935
1936 /*
1937 * Use the address of the end of the last reported execution
1938 * range.
1939 */
1940 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1941
1942 err = cs_etm__synth_instruction_sample(
1943 etmq, tidq, addr,
1944 tidq->period_instructions);
1945 if (err)
1946 return err;
1947
1948 tidq->period_instructions = 0;
1949 }
1950
1951 return 0;
1952 }
1953 /*
1954 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1955 * if need be.
1956 * Returns: < 0 if error
1957 * = 0 if no more auxtrace_buffer to read
1958 * > 0 if the current buffer isn't empty yet
1959 */
cs_etm__get_data_block(struct cs_etm_queue * etmq)1960 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1961 {
1962 int ret;
1963
1964 if (!etmq->buf_len) {
1965 ret = cs_etm__get_trace(etmq);
1966 if (ret <= 0)
1967 return ret;
1968 /*
1969 * We cannot assume consecutive blocks in the data file
1970 * are contiguous, reset the decoder to force re-sync.
1971 */
1972 ret = cs_etm_decoder__reset(etmq->decoder);
1973 if (ret)
1974 return ret;
1975 }
1976
1977 return etmq->buf_len;
1978 }
1979
cs_etm__is_svc_instr(struct cs_etm_queue * etmq,u8 trace_chan_id,struct cs_etm_packet * packet,u64 end_addr)1980 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1981 struct cs_etm_packet *packet,
1982 u64 end_addr)
1983 {
1984 /* Initialise to keep compiler happy */
1985 u16 instr16 = 0;
1986 u32 instr32 = 0;
1987 u64 addr;
1988
1989 switch (packet->isa) {
1990 case CS_ETM_ISA_T32:
1991 /*
1992 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1993 *
1994 * b'15 b'8
1995 * +-----------------+--------+
1996 * | 1 1 0 1 1 1 1 1 | imm8 |
1997 * +-----------------+--------+
1998 *
1999 * According to the specification, it only defines SVC for T32
2000 * with 16 bits instruction and has no definition for 32bits;
2001 * so below only read 2 bytes as instruction size for T32.
2002 */
2003 addr = end_addr - 2;
2004 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2005 (u8 *)&instr16, 0);
2006 if ((instr16 & 0xFF00) == 0xDF00)
2007 return true;
2008
2009 break;
2010 case CS_ETM_ISA_A32:
2011 /*
2012 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2013 *
2014 * b'31 b'28 b'27 b'24
2015 * +---------+---------+-------------------------+
2016 * | !1111 | 1 1 1 1 | imm24 |
2017 * +---------+---------+-------------------------+
2018 */
2019 addr = end_addr - 4;
2020 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2021 (u8 *)&instr32, 0);
2022 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2023 (instr32 & 0xF0000000) != 0xF0000000)
2024 return true;
2025
2026 break;
2027 case CS_ETM_ISA_A64:
2028 /*
2029 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2030 *
2031 * b'31 b'21 b'4 b'0
2032 * +-----------------------+---------+-----------+
2033 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2034 * +-----------------------+---------+-----------+
2035 */
2036 addr = end_addr - 4;
2037 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2038 (u8 *)&instr32, 0);
2039 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2040 return true;
2041
2042 break;
2043 case CS_ETM_ISA_UNKNOWN:
2044 default:
2045 break;
2046 }
2047
2048 return false;
2049 }
2050
cs_etm__is_syscall(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2051 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2052 struct cs_etm_traceid_queue *tidq, u64 magic)
2053 {
2054 u8 trace_chan_id = tidq->trace_chan_id;
2055 struct cs_etm_packet *packet = tidq->packet;
2056 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2057
2058 if (magic == __perf_cs_etmv3_magic)
2059 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2060 return true;
2061
2062 /*
2063 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2064 * HVC cases; need to check if it's SVC instruction based on
2065 * packet address.
2066 */
2067 if (magic == __perf_cs_etmv4_magic) {
2068 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2069 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2070 prev_packet->end_addr))
2071 return true;
2072 }
2073
2074 return false;
2075 }
2076
cs_etm__is_async_exception(struct cs_etm_traceid_queue * tidq,u64 magic)2077 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2078 u64 magic)
2079 {
2080 struct cs_etm_packet *packet = tidq->packet;
2081
2082 if (magic == __perf_cs_etmv3_magic)
2083 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2084 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2085 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2086 packet->exception_number == CS_ETMV3_EXC_IRQ ||
2087 packet->exception_number == CS_ETMV3_EXC_FIQ)
2088 return true;
2089
2090 if (magic == __perf_cs_etmv4_magic)
2091 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2092 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2093 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2094 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2095 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2096 packet->exception_number == CS_ETMV4_EXC_IRQ ||
2097 packet->exception_number == CS_ETMV4_EXC_FIQ)
2098 return true;
2099
2100 return false;
2101 }
2102
cs_etm__is_sync_exception(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2103 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2104 struct cs_etm_traceid_queue *tidq,
2105 u64 magic)
2106 {
2107 u8 trace_chan_id = tidq->trace_chan_id;
2108 struct cs_etm_packet *packet = tidq->packet;
2109 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2110
2111 if (magic == __perf_cs_etmv3_magic)
2112 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2113 packet->exception_number == CS_ETMV3_EXC_HYP ||
2114 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2115 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2116 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2117 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2118 packet->exception_number == CS_ETMV3_EXC_GENERIC)
2119 return true;
2120
2121 if (magic == __perf_cs_etmv4_magic) {
2122 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2123 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2124 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2125 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2126 return true;
2127
2128 /*
2129 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2130 * (SMC, HVC) are taken as sync exceptions.
2131 */
2132 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2133 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2134 prev_packet->end_addr))
2135 return true;
2136
2137 /*
2138 * ETMv4 has 5 bits for exception number; if the numbers
2139 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2140 * they are implementation defined exceptions.
2141 *
2142 * For this case, simply take it as sync exception.
2143 */
2144 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2145 packet->exception_number <= CS_ETMV4_EXC_END)
2146 return true;
2147 }
2148
2149 return false;
2150 }
2151
cs_etm__set_sample_flags(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2152 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2153 struct cs_etm_traceid_queue *tidq)
2154 {
2155 struct cs_etm_packet *packet = tidq->packet;
2156 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2157 u8 trace_chan_id = tidq->trace_chan_id;
2158 u64 magic;
2159 int ret;
2160
2161 switch (packet->sample_type) {
2162 case CS_ETM_RANGE:
2163 /*
2164 * Immediate branch instruction without neither link nor
2165 * return flag, it's normal branch instruction within
2166 * the function.
2167 */
2168 if (packet->last_instr_type == OCSD_INSTR_BR &&
2169 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2170 packet->flags = PERF_IP_FLAG_BRANCH;
2171
2172 if (packet->last_instr_cond)
2173 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2174 }
2175
2176 /*
2177 * Immediate branch instruction with link (e.g. BL), this is
2178 * branch instruction for function call.
2179 */
2180 if (packet->last_instr_type == OCSD_INSTR_BR &&
2181 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2182 packet->flags = PERF_IP_FLAG_BRANCH |
2183 PERF_IP_FLAG_CALL;
2184
2185 /*
2186 * Indirect branch instruction with link (e.g. BLR), this is
2187 * branch instruction for function call.
2188 */
2189 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2190 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2191 packet->flags = PERF_IP_FLAG_BRANCH |
2192 PERF_IP_FLAG_CALL;
2193
2194 /*
2195 * Indirect branch instruction with subtype of
2196 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2197 * function return for A32/T32.
2198 */
2199 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2200 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2201 packet->flags = PERF_IP_FLAG_BRANCH |
2202 PERF_IP_FLAG_RETURN;
2203
2204 /*
2205 * Indirect branch instruction without link (e.g. BR), usually
2206 * this is used for function return, especially for functions
2207 * within dynamic link lib.
2208 */
2209 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2210 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2211 packet->flags = PERF_IP_FLAG_BRANCH |
2212 PERF_IP_FLAG_RETURN;
2213
2214 /* Return instruction for function return. */
2215 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2216 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2217 packet->flags = PERF_IP_FLAG_BRANCH |
2218 PERF_IP_FLAG_RETURN;
2219
2220 /*
2221 * Decoder might insert a discontinuity in the middle of
2222 * instruction packets, fixup prev_packet with flag
2223 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2224 */
2225 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2226 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2227 PERF_IP_FLAG_TRACE_BEGIN;
2228
2229 /*
2230 * If the previous packet is an exception return packet
2231 * and the return address just follows SVC instruction,
2232 * it needs to calibrate the previous packet sample flags
2233 * as PERF_IP_FLAG_SYSCALLRET.
2234 */
2235 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2236 PERF_IP_FLAG_RETURN |
2237 PERF_IP_FLAG_INTERRUPT) &&
2238 cs_etm__is_svc_instr(etmq, trace_chan_id,
2239 packet, packet->start_addr))
2240 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2241 PERF_IP_FLAG_RETURN |
2242 PERF_IP_FLAG_SYSCALLRET;
2243 break;
2244 case CS_ETM_DISCONTINUITY:
2245 /*
2246 * The trace is discontinuous, if the previous packet is
2247 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2248 * for previous packet.
2249 */
2250 if (prev_packet->sample_type == CS_ETM_RANGE)
2251 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2252 PERF_IP_FLAG_TRACE_END;
2253 break;
2254 case CS_ETM_EXCEPTION:
2255 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2256 if (ret)
2257 return ret;
2258
2259 /* The exception is for system call. */
2260 if (cs_etm__is_syscall(etmq, tidq, magic))
2261 packet->flags = PERF_IP_FLAG_BRANCH |
2262 PERF_IP_FLAG_CALL |
2263 PERF_IP_FLAG_SYSCALLRET;
2264 /*
2265 * The exceptions are triggered by external signals from bus,
2266 * interrupt controller, debug module, PE reset or halt.
2267 */
2268 else if (cs_etm__is_async_exception(tidq, magic))
2269 packet->flags = PERF_IP_FLAG_BRANCH |
2270 PERF_IP_FLAG_CALL |
2271 PERF_IP_FLAG_ASYNC |
2272 PERF_IP_FLAG_INTERRUPT;
2273 /*
2274 * Otherwise, exception is caused by trap, instruction &
2275 * data fault, or alignment errors.
2276 */
2277 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2278 packet->flags = PERF_IP_FLAG_BRANCH |
2279 PERF_IP_FLAG_CALL |
2280 PERF_IP_FLAG_INTERRUPT;
2281
2282 /*
2283 * When the exception packet is inserted, since exception
2284 * packet is not used standalone for generating samples
2285 * and it's affiliation to the previous instruction range
2286 * packet; so set previous range packet flags to tell perf
2287 * it is an exception taken branch.
2288 */
2289 if (prev_packet->sample_type == CS_ETM_RANGE)
2290 prev_packet->flags = packet->flags;
2291 break;
2292 case CS_ETM_EXCEPTION_RET:
2293 /*
2294 * When the exception return packet is inserted, since
2295 * exception return packet is not used standalone for
2296 * generating samples and it's affiliation to the previous
2297 * instruction range packet; so set previous range packet
2298 * flags to tell perf it is an exception return branch.
2299 *
2300 * The exception return can be for either system call or
2301 * other exception types; unfortunately the packet doesn't
2302 * contain exception type related info so we cannot decide
2303 * the exception type purely based on exception return packet.
2304 * If we record the exception number from exception packet and
2305 * reuse it for exception return packet, this is not reliable
2306 * due the trace can be discontinuity or the interrupt can
2307 * be nested, thus the recorded exception number cannot be
2308 * used for exception return packet for these two cases.
2309 *
2310 * For exception return packet, we only need to distinguish the
2311 * packet is for system call or for other types. Thus the
2312 * decision can be deferred when receive the next packet which
2313 * contains the return address, based on the return address we
2314 * can read out the previous instruction and check if it's a
2315 * system call instruction and then calibrate the sample flag
2316 * as needed.
2317 */
2318 if (prev_packet->sample_type == CS_ETM_RANGE)
2319 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2320 PERF_IP_FLAG_RETURN |
2321 PERF_IP_FLAG_INTERRUPT;
2322 break;
2323 case CS_ETM_EMPTY:
2324 default:
2325 break;
2326 }
2327
2328 return 0;
2329 }
2330
cs_etm__decode_data_block(struct cs_etm_queue * etmq)2331 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2332 {
2333 int ret = 0;
2334 size_t processed = 0;
2335
2336 /*
2337 * Packets are decoded and added to the decoder's packet queue
2338 * until the decoder packet processing callback has requested that
2339 * processing stops or there is nothing left in the buffer. Normal
2340 * operations that stop processing are a timestamp packet or a full
2341 * decoder buffer queue.
2342 */
2343 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2344 etmq->offset,
2345 &etmq->buf[etmq->buf_used],
2346 etmq->buf_len,
2347 &processed);
2348 if (ret)
2349 goto out;
2350
2351 etmq->offset += processed;
2352 etmq->buf_used += processed;
2353 etmq->buf_len -= processed;
2354
2355 out:
2356 return ret;
2357 }
2358
cs_etm__process_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2359 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2360 struct cs_etm_traceid_queue *tidq)
2361 {
2362 int ret;
2363 struct cs_etm_packet_queue *packet_queue;
2364
2365 packet_queue = &tidq->packet_queue;
2366
2367 /* Process each packet in this chunk */
2368 while (1) {
2369 ret = cs_etm_decoder__get_packet(packet_queue,
2370 tidq->packet);
2371 if (ret <= 0)
2372 /*
2373 * Stop processing this chunk on
2374 * end of data or error
2375 */
2376 break;
2377
2378 /*
2379 * Since packet addresses are swapped in packet
2380 * handling within below switch() statements,
2381 * thus setting sample flags must be called
2382 * prior to switch() statement to use address
2383 * information before packets swapping.
2384 */
2385 ret = cs_etm__set_sample_flags(etmq, tidq);
2386 if (ret < 0)
2387 break;
2388
2389 switch (tidq->packet->sample_type) {
2390 case CS_ETM_RANGE:
2391 /*
2392 * If the packet contains an instruction
2393 * range, generate instruction sequence
2394 * events.
2395 */
2396 cs_etm__sample(etmq, tidq);
2397 break;
2398 case CS_ETM_EXCEPTION:
2399 case CS_ETM_EXCEPTION_RET:
2400 /*
2401 * If the exception packet is coming,
2402 * make sure the previous instruction
2403 * range packet to be handled properly.
2404 */
2405 cs_etm__exception(tidq);
2406 break;
2407 case CS_ETM_DISCONTINUITY:
2408 /*
2409 * Discontinuity in trace, flush
2410 * previous branch stack
2411 */
2412 cs_etm__flush(etmq, tidq);
2413 break;
2414 case CS_ETM_EMPTY:
2415 /*
2416 * Should not receive empty packet,
2417 * report error.
2418 */
2419 pr_err("CS ETM Trace: empty packet\n");
2420 return -EINVAL;
2421 default:
2422 break;
2423 }
2424 }
2425
2426 return ret;
2427 }
2428
cs_etm__clear_all_traceid_queues(struct cs_etm_queue * etmq)2429 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2430 {
2431 int idx;
2432 struct int_node *inode;
2433 struct cs_etm_traceid_queue *tidq;
2434 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2435
2436 intlist__for_each_entry(inode, traceid_queues_list) {
2437 idx = (int)(intptr_t)inode->priv;
2438 tidq = etmq->traceid_queues[idx];
2439
2440 /* Ignore return value */
2441 cs_etm__process_traceid_queue(etmq, tidq);
2442
2443 /*
2444 * Generate an instruction sample with the remaining
2445 * branchstack entries.
2446 */
2447 cs_etm__flush(etmq, tidq);
2448 }
2449 }
2450
cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue * etmq)2451 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2452 {
2453 int err = 0;
2454 struct cs_etm_traceid_queue *tidq;
2455
2456 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2457 if (!tidq)
2458 return -EINVAL;
2459
2460 /* Go through each buffer in the queue and decode them one by one */
2461 while (1) {
2462 err = cs_etm__get_data_block(etmq);
2463 if (err <= 0)
2464 return err;
2465
2466 /* Run trace decoder until buffer consumed or end of trace */
2467 do {
2468 err = cs_etm__decode_data_block(etmq);
2469 if (err)
2470 return err;
2471
2472 /*
2473 * Process each packet in this chunk, nothing to do if
2474 * an error occurs other than hoping the next one will
2475 * be better.
2476 */
2477 err = cs_etm__process_traceid_queue(etmq, tidq);
2478
2479 } while (etmq->buf_len);
2480
2481 if (err == 0)
2482 /* Flush any remaining branch stack entries */
2483 err = cs_etm__end_block(etmq, tidq);
2484 }
2485
2486 return err;
2487 }
2488
cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue * etmq)2489 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2490 {
2491 int idx, err = 0;
2492 struct cs_etm_traceid_queue *tidq;
2493 struct int_node *inode;
2494
2495 /* Go through each buffer in the queue and decode them one by one */
2496 while (1) {
2497 err = cs_etm__get_data_block(etmq);
2498 if (err <= 0)
2499 return err;
2500
2501 /* Run trace decoder until buffer consumed or end of trace */
2502 do {
2503 err = cs_etm__decode_data_block(etmq);
2504 if (err)
2505 return err;
2506
2507 /*
2508 * cs_etm__run_per_thread_timeless_decoder() runs on a
2509 * single traceID queue because each TID has a separate
2510 * buffer. But here in per-cpu mode we need to iterate
2511 * over each channel instead.
2512 */
2513 intlist__for_each_entry(inode,
2514 etmq->traceid_queues_list) {
2515 idx = (int)(intptr_t)inode->priv;
2516 tidq = etmq->traceid_queues[idx];
2517 cs_etm__process_traceid_queue(etmq, tidq);
2518 }
2519 } while (etmq->buf_len);
2520
2521 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2522 idx = (int)(intptr_t)inode->priv;
2523 tidq = etmq->traceid_queues[idx];
2524 /* Flush any remaining branch stack entries */
2525 err = cs_etm__end_block(etmq, tidq);
2526 if (err)
2527 return err;
2528 }
2529 }
2530
2531 return err;
2532 }
2533
cs_etm__process_timeless_queues(struct cs_etm_auxtrace * etm,pid_t tid)2534 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2535 pid_t tid)
2536 {
2537 unsigned int i;
2538 struct auxtrace_queues *queues = &etm->queues;
2539
2540 for (i = 0; i < queues->nr_queues; i++) {
2541 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2542 struct cs_etm_queue *etmq = queue->priv;
2543 struct cs_etm_traceid_queue *tidq;
2544
2545 if (!etmq)
2546 continue;
2547
2548 if (etm->per_thread_decoding) {
2549 tidq = cs_etm__etmq_get_traceid_queue(
2550 etmq, CS_ETM_PER_THREAD_TRACEID);
2551
2552 if (!tidq)
2553 continue;
2554
2555 if (tid == -1 || thread__tid(tidq->thread) == tid)
2556 cs_etm__run_per_thread_timeless_decoder(etmq);
2557 } else
2558 cs_etm__run_per_cpu_timeless_decoder(etmq);
2559 }
2560
2561 return 0;
2562 }
2563
cs_etm__process_timestamped_queues(struct cs_etm_auxtrace * etm)2564 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2565 {
2566 int ret = 0;
2567 unsigned int cs_queue_nr, queue_nr, i;
2568 u8 trace_chan_id;
2569 u64 cs_timestamp;
2570 struct auxtrace_queue *queue;
2571 struct cs_etm_queue *etmq;
2572 struct cs_etm_traceid_queue *tidq;
2573
2574 /*
2575 * Pre-populate the heap with one entry from each queue so that we can
2576 * start processing in time order across all queues.
2577 */
2578 for (i = 0; i < etm->queues.nr_queues; i++) {
2579 etmq = etm->queues.queue_array[i].priv;
2580 if (!etmq)
2581 continue;
2582
2583 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2584 if (ret)
2585 return ret;
2586 }
2587
2588 while (1) {
2589 if (!etm->heap.heap_cnt)
2590 goto out;
2591
2592 /* Take the entry at the top of the min heap */
2593 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2594 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2595 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2596 queue = &etm->queues.queue_array[queue_nr];
2597 etmq = queue->priv;
2598
2599 /*
2600 * Remove the top entry from the heap since we are about
2601 * to process it.
2602 */
2603 auxtrace_heap__pop(&etm->heap);
2604
2605 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2606 if (!tidq) {
2607 /*
2608 * No traceID queue has been allocated for this traceID,
2609 * which means something somewhere went very wrong. No
2610 * other choice than simply exit.
2611 */
2612 ret = -EINVAL;
2613 goto out;
2614 }
2615
2616 /*
2617 * Packets associated with this timestamp are already in
2618 * the etmq's traceID queue, so process them.
2619 */
2620 ret = cs_etm__process_traceid_queue(etmq, tidq);
2621 if (ret < 0)
2622 goto out;
2623
2624 /*
2625 * Packets for this timestamp have been processed, time to
2626 * move on to the next timestamp, fetching a new auxtrace_buffer
2627 * if need be.
2628 */
2629 refetch:
2630 ret = cs_etm__get_data_block(etmq);
2631 if (ret < 0)
2632 goto out;
2633
2634 /*
2635 * No more auxtrace_buffers to process in this etmq, simply
2636 * move on to another entry in the auxtrace_heap.
2637 */
2638 if (!ret)
2639 continue;
2640
2641 ret = cs_etm__decode_data_block(etmq);
2642 if (ret)
2643 goto out;
2644
2645 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2646
2647 if (!cs_timestamp) {
2648 /*
2649 * Function cs_etm__decode_data_block() returns when
2650 * there is no more traces to decode in the current
2651 * auxtrace_buffer OR when a timestamp has been
2652 * encountered on any of the traceID queues. Since we
2653 * did not get a timestamp, there is no more traces to
2654 * process in this auxtrace_buffer. As such empty and
2655 * flush all traceID queues.
2656 */
2657 cs_etm__clear_all_traceid_queues(etmq);
2658
2659 /* Fetch another auxtrace_buffer for this etmq */
2660 goto refetch;
2661 }
2662
2663 /*
2664 * Add to the min heap the timestamp for packets that have
2665 * just been decoded. They will be processed and synthesized
2666 * during the next call to cs_etm__process_traceid_queue() for
2667 * this queue/traceID.
2668 */
2669 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2670 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2671 }
2672
2673 out:
2674 return ret;
2675 }
2676
cs_etm__process_itrace_start(struct cs_etm_auxtrace * etm,union perf_event * event)2677 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2678 union perf_event *event)
2679 {
2680 struct thread *th;
2681
2682 if (etm->timeless_decoding)
2683 return 0;
2684
2685 /*
2686 * Add the tid/pid to the log so that we can get a match when we get a
2687 * contextID from the decoder. Only track for the host: only kernel
2688 * trace is supported for guests which wouldn't need pids so this should
2689 * be fine.
2690 */
2691 th = machine__findnew_thread(&etm->session->machines.host,
2692 event->itrace_start.pid,
2693 event->itrace_start.tid);
2694 if (!th)
2695 return -ENOMEM;
2696
2697 thread__put(th);
2698
2699 return 0;
2700 }
2701
cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace * etm,union perf_event * event)2702 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2703 union perf_event *event)
2704 {
2705 struct thread *th;
2706 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2707
2708 /*
2709 * Context switch in per-thread mode are irrelevant since perf
2710 * will start/stop tracing as the process is scheduled.
2711 */
2712 if (etm->timeless_decoding)
2713 return 0;
2714
2715 /*
2716 * SWITCH_IN events carry the next process to be switched out while
2717 * SWITCH_OUT events carry the process to be switched in. As such
2718 * we don't care about IN events.
2719 */
2720 if (!out)
2721 return 0;
2722
2723 /*
2724 * Add the tid/pid to the log so that we can get a match when we get a
2725 * contextID from the decoder. Only track for the host: only kernel
2726 * trace is supported for guests which wouldn't need pids so this should
2727 * be fine.
2728 */
2729 th = machine__findnew_thread(&etm->session->machines.host,
2730 event->context_switch.next_prev_pid,
2731 event->context_switch.next_prev_tid);
2732 if (!th)
2733 return -ENOMEM;
2734
2735 thread__put(th);
2736
2737 return 0;
2738 }
2739
cs_etm__process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,struct perf_tool * tool)2740 static int cs_etm__process_event(struct perf_session *session,
2741 union perf_event *event,
2742 struct perf_sample *sample,
2743 struct perf_tool *tool)
2744 {
2745 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2746 struct cs_etm_auxtrace,
2747 auxtrace);
2748
2749 if (dump_trace)
2750 return 0;
2751
2752 if (!tool->ordered_events) {
2753 pr_err("CoreSight ETM Trace requires ordered events\n");
2754 return -EINVAL;
2755 }
2756
2757 switch (event->header.type) {
2758 case PERF_RECORD_EXIT:
2759 /*
2760 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2761 * start the decode because we know there will be no more trace from
2762 * this thread. All this does is emit samples earlier than waiting for
2763 * the flush in other modes, but with timestamps it makes sense to wait
2764 * for flush so that events from different threads are interleaved
2765 * properly.
2766 */
2767 if (etm->per_thread_decoding && etm->timeless_decoding)
2768 return cs_etm__process_timeless_queues(etm,
2769 event->fork.tid);
2770 break;
2771
2772 case PERF_RECORD_ITRACE_START:
2773 return cs_etm__process_itrace_start(etm, event);
2774
2775 case PERF_RECORD_SWITCH_CPU_WIDE:
2776 return cs_etm__process_switch_cpu_wide(etm, event);
2777
2778 case PERF_RECORD_AUX:
2779 /*
2780 * Record the latest kernel timestamp available in the header
2781 * for samples so that synthesised samples occur from this point
2782 * onwards.
2783 */
2784 if (sample->time && (sample->time != (u64)-1))
2785 etm->latest_kernel_timestamp = sample->time;
2786 break;
2787
2788 default:
2789 break;
2790 }
2791
2792 return 0;
2793 }
2794
dump_queued_data(struct cs_etm_auxtrace * etm,struct perf_record_auxtrace * event)2795 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2796 struct perf_record_auxtrace *event)
2797 {
2798 struct auxtrace_buffer *buf;
2799 unsigned int i;
2800 /*
2801 * Find all buffers with same reference in the queues and dump them.
2802 * This is because the queues can contain multiple entries of the same
2803 * buffer that were split on aux records.
2804 */
2805 for (i = 0; i < etm->queues.nr_queues; ++i)
2806 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2807 if (buf->reference == event->reference)
2808 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2809 }
2810
cs_etm__process_auxtrace_event(struct perf_session * session,union perf_event * event,struct perf_tool * tool __maybe_unused)2811 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2812 union perf_event *event,
2813 struct perf_tool *tool __maybe_unused)
2814 {
2815 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2816 struct cs_etm_auxtrace,
2817 auxtrace);
2818 if (!etm->data_queued) {
2819 struct auxtrace_buffer *buffer;
2820 off_t data_offset;
2821 int fd = perf_data__fd(session->data);
2822 bool is_pipe = perf_data__is_pipe(session->data);
2823 int err;
2824 int idx = event->auxtrace.idx;
2825
2826 if (is_pipe)
2827 data_offset = 0;
2828 else {
2829 data_offset = lseek(fd, 0, SEEK_CUR);
2830 if (data_offset == -1)
2831 return -errno;
2832 }
2833
2834 err = auxtrace_queues__add_event(&etm->queues, session,
2835 event, data_offset, &buffer);
2836 if (err)
2837 return err;
2838
2839 /*
2840 * Knowing if the trace is formatted or not requires a lookup of
2841 * the aux record so only works in non-piped mode where data is
2842 * queued in cs_etm__queue_aux_records(). Always assume
2843 * formatted in piped mode (true).
2844 */
2845 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2846 idx, true, -1);
2847 if (err)
2848 return err;
2849
2850 if (dump_trace)
2851 if (auxtrace_buffer__get_data(buffer, fd)) {
2852 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2853 auxtrace_buffer__put_data(buffer);
2854 }
2855 } else if (dump_trace)
2856 dump_queued_data(etm, &event->auxtrace);
2857
2858 return 0;
2859 }
2860
cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace * etm)2861 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2862 {
2863 struct evsel *evsel;
2864 struct evlist *evlist = etm->session->evlist;
2865
2866 /* Override timeless mode with user input from --itrace=Z */
2867 if (etm->synth_opts.timeless_decoding) {
2868 etm->timeless_decoding = true;
2869 return 0;
2870 }
2871
2872 /*
2873 * Find the cs_etm evsel and look at what its timestamp setting was
2874 */
2875 evlist__for_each_entry(evlist, evsel)
2876 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2877 etm->timeless_decoding =
2878 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2879 return 0;
2880 }
2881
2882 pr_err("CS ETM: Couldn't find ETM evsel\n");
2883 return -EINVAL;
2884 }
2885
2886 /*
2887 * Read a single cpu parameter block from the auxtrace_info priv block.
2888 *
2889 * For version 1 there is a per cpu nr_params entry. If we are handling
2890 * version 1 file, then there may be less, the same, or more params
2891 * indicated by this value than the compile time number we understand.
2892 *
2893 * For a version 0 info block, there are a fixed number, and we need to
2894 * fill out the nr_param value in the metadata we create.
2895 */
cs_etm__create_meta_blk(u64 * buff_in,int * buff_in_offset,int out_blk_size,int nr_params_v0)2896 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2897 int out_blk_size, int nr_params_v0)
2898 {
2899 u64 *metadata = NULL;
2900 int hdr_version;
2901 int nr_in_params, nr_out_params, nr_cmn_params;
2902 int i, k;
2903
2904 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2905 if (!metadata)
2906 return NULL;
2907
2908 /* read block current index & version */
2909 i = *buff_in_offset;
2910 hdr_version = buff_in[CS_HEADER_VERSION];
2911
2912 if (!hdr_version) {
2913 /* read version 0 info block into a version 1 metadata block */
2914 nr_in_params = nr_params_v0;
2915 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2916 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2917 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2918 /* remaining block params at offset +1 from source */
2919 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2920 metadata[k + 1] = buff_in[i + k];
2921 /* version 0 has 2 common params */
2922 nr_cmn_params = 2;
2923 } else {
2924 /* read version 1 info block - input and output nr_params may differ */
2925 /* version 1 has 3 common params */
2926 nr_cmn_params = 3;
2927 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2928
2929 /* if input has more params than output - skip excess */
2930 nr_out_params = nr_in_params + nr_cmn_params;
2931 if (nr_out_params > out_blk_size)
2932 nr_out_params = out_blk_size;
2933
2934 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2935 metadata[k] = buff_in[i + k];
2936
2937 /* record the actual nr params we copied */
2938 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2939 }
2940
2941 /* adjust in offset by number of in params used */
2942 i += nr_in_params + nr_cmn_params;
2943 *buff_in_offset = i;
2944 return metadata;
2945 }
2946
2947 /**
2948 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2949 * on the bounds of aux_event, if it matches with the buffer that's at
2950 * file_offset.
2951 *
2952 * Normally, whole auxtrace buffers would be added to the queue. But we
2953 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2954 * is reset across each buffer, so splitting the buffers up in advance has
2955 * the same effect.
2956 */
cs_etm__queue_aux_fragment(struct perf_session * session,off_t file_offset,size_t sz,struct perf_record_aux * aux_event,struct perf_sample * sample)2957 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2958 struct perf_record_aux *aux_event, struct perf_sample *sample)
2959 {
2960 int err;
2961 char buf[PERF_SAMPLE_MAX_SIZE];
2962 union perf_event *auxtrace_event_union;
2963 struct perf_record_auxtrace *auxtrace_event;
2964 union perf_event auxtrace_fragment;
2965 __u64 aux_offset, aux_size;
2966 __u32 idx;
2967 bool formatted;
2968
2969 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2970 struct cs_etm_auxtrace,
2971 auxtrace);
2972
2973 /*
2974 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2975 * from looping through the auxtrace index.
2976 */
2977 err = perf_session__peek_event(session, file_offset, buf,
2978 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2979 if (err)
2980 return err;
2981 auxtrace_event = &auxtrace_event_union->auxtrace;
2982 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2983 return -EINVAL;
2984
2985 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2986 auxtrace_event->header.size != sz) {
2987 return -EINVAL;
2988 }
2989
2990 /*
2991 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2992 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2993 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2994 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2995 * Return 'not found' if mismatch.
2996 */
2997 if (auxtrace_event->cpu == (__u32) -1) {
2998 etm->per_thread_decoding = true;
2999 if (auxtrace_event->tid != sample->tid)
3000 return 1;
3001 } else if (auxtrace_event->cpu != sample->cpu) {
3002 if (etm->per_thread_decoding) {
3003 /*
3004 * Found a per-cpu buffer after a per-thread one was
3005 * already found
3006 */
3007 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3008 return -EINVAL;
3009 }
3010 return 1;
3011 }
3012
3013 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3014 /*
3015 * Clamp size in snapshot mode. The buffer size is clamped in
3016 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3017 * the buffer size.
3018 */
3019 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3020
3021 /*
3022 * In this mode, the head also points to the end of the buffer so aux_offset
3023 * needs to have the size subtracted so it points to the beginning as in normal mode
3024 */
3025 aux_offset = aux_event->aux_offset - aux_size;
3026 } else {
3027 aux_size = aux_event->aux_size;
3028 aux_offset = aux_event->aux_offset;
3029 }
3030
3031 if (aux_offset >= auxtrace_event->offset &&
3032 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3033 /*
3034 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3035 * based on the sizes of the aux event, and queue that fragment.
3036 */
3037 auxtrace_fragment.auxtrace = *auxtrace_event;
3038 auxtrace_fragment.auxtrace.size = aux_size;
3039 auxtrace_fragment.auxtrace.offset = aux_offset;
3040 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3041
3042 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3043 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3044 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3045 file_offset, NULL);
3046 if (err)
3047 return err;
3048
3049 idx = auxtrace_event->idx;
3050 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3051 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3052 idx, formatted, sample->cpu);
3053 }
3054
3055 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3056 return 1;
3057 }
3058
cs_etm__process_aux_hw_id_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3059 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3060 u64 offset __maybe_unused, void *data __maybe_unused)
3061 {
3062 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3063 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3064 (*(int *)data)++; /* increment found count */
3065 return cs_etm__process_aux_output_hw_id(session, event);
3066 }
3067 return 0;
3068 }
3069
cs_etm__queue_aux_records_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3070 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3071 u64 offset __maybe_unused, void *data __maybe_unused)
3072 {
3073 struct perf_sample sample;
3074 int ret;
3075 struct auxtrace_index_entry *ent;
3076 struct auxtrace_index *auxtrace_index;
3077 struct evsel *evsel;
3078 size_t i;
3079
3080 /* Don't care about any other events, we're only queuing buffers for AUX events */
3081 if (event->header.type != PERF_RECORD_AUX)
3082 return 0;
3083
3084 if (event->header.size < sizeof(struct perf_record_aux))
3085 return -EINVAL;
3086
3087 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3088 if (!event->aux.aux_size)
3089 return 0;
3090
3091 /*
3092 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3093 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3094 */
3095 evsel = evlist__event2evsel(session->evlist, event);
3096 if (!evsel)
3097 return -EINVAL;
3098 ret = evsel__parse_sample(evsel, event, &sample);
3099 if (ret)
3100 return ret;
3101
3102 /*
3103 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3104 */
3105 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3106 for (i = 0; i < auxtrace_index->nr; i++) {
3107 ent = &auxtrace_index->entries[i];
3108 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3109 ent->sz, &event->aux, &sample);
3110 /*
3111 * Stop search on error or successful values. Continue search on
3112 * 1 ('not found')
3113 */
3114 if (ret != 1)
3115 return ret;
3116 }
3117 }
3118
3119 /*
3120 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3121 * don't exit with an error because it will still be possible to decode other aux records.
3122 */
3123 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3124 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3125 return 0;
3126 }
3127
cs_etm__queue_aux_records(struct perf_session * session)3128 static int cs_etm__queue_aux_records(struct perf_session *session)
3129 {
3130 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3131 struct auxtrace_index, list);
3132 if (index && index->nr > 0)
3133 return perf_session__peek_events(session, session->header.data_offset,
3134 session->header.data_size,
3135 cs_etm__queue_aux_records_cb, NULL);
3136
3137 /*
3138 * We would get here if there are no entries in the index (either no auxtrace
3139 * buffers or no index at all). Fail silently as there is the possibility of
3140 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3141 * false.
3142 *
3143 * In that scenario, buffers will not be split by AUX records.
3144 */
3145 return 0;
3146 }
3147
3148 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3149 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3150
3151 /*
3152 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3153 * timestamps).
3154 */
cs_etm__has_virtual_ts(u64 ** metadata,int num_cpu)3155 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3156 {
3157 int j;
3158
3159 for (j = 0; j < num_cpu; j++) {
3160 switch (metadata[j][CS_ETM_MAGIC]) {
3161 case __perf_cs_etmv4_magic:
3162 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3163 return false;
3164 break;
3165 case __perf_cs_ete_magic:
3166 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3167 return false;
3168 break;
3169 default:
3170 /* Unknown / unsupported magic number. */
3171 return false;
3172 }
3173 }
3174 return true;
3175 }
3176
3177 /* map trace ids to correct metadata block, from information in metadata */
cs_etm__map_trace_ids_metadata(int num_cpu,u64 ** metadata)3178 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3179 {
3180 u64 cs_etm_magic;
3181 u8 trace_chan_id;
3182 int i, err;
3183
3184 for (i = 0; i < num_cpu; i++) {
3185 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3186 switch (cs_etm_magic) {
3187 case __perf_cs_etmv3_magic:
3188 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3189 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3190 break;
3191 case __perf_cs_etmv4_magic:
3192 case __perf_cs_ete_magic:
3193 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3194 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3195 break;
3196 default:
3197 /* unknown magic number */
3198 return -EINVAL;
3199 }
3200 err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3201 if (err)
3202 return err;
3203 }
3204 return 0;
3205 }
3206
3207 /*
3208 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3209 * unused value to reduce the number of unneeded decoders created.
3210 */
cs_etm__clear_unused_trace_ids_metadata(int num_cpu,u64 ** metadata)3211 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3212 {
3213 u64 cs_etm_magic;
3214 int i;
3215
3216 for (i = 0; i < num_cpu; i++) {
3217 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3218 switch (cs_etm_magic) {
3219 case __perf_cs_etmv3_magic:
3220 if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3221 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3222 break;
3223 case __perf_cs_etmv4_magic:
3224 case __perf_cs_ete_magic:
3225 if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3226 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3227 break;
3228 default:
3229 /* unknown magic number */
3230 return -EINVAL;
3231 }
3232 }
3233 return 0;
3234 }
3235
cs_etm__process_auxtrace_info_full(union perf_event * event,struct perf_session * session)3236 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3237 struct perf_session *session)
3238 {
3239 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3240 struct cs_etm_auxtrace *etm = NULL;
3241 struct perf_record_time_conv *tc = &session->time_conv;
3242 int event_header_size = sizeof(struct perf_event_header);
3243 int total_size = auxtrace_info->header.size;
3244 int priv_size = 0;
3245 int num_cpu;
3246 int err = 0;
3247 int aux_hw_id_found;
3248 int i, j;
3249 u64 *ptr = NULL;
3250 u64 **metadata = NULL;
3251
3252 /*
3253 * Create an RB tree for traceID-metadata tuple. Since the conversion
3254 * has to be made for each packet that gets decoded, optimizing access
3255 * in anything other than a sequential array is worth doing.
3256 */
3257 traceid_list = intlist__new(NULL);
3258 if (!traceid_list)
3259 return -ENOMEM;
3260
3261 /* First the global part */
3262 ptr = (u64 *) auxtrace_info->priv;
3263 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3264 metadata = zalloc(sizeof(*metadata) * num_cpu);
3265 if (!metadata) {
3266 err = -ENOMEM;
3267 goto err_free_traceid_list;
3268 }
3269
3270 /* Start parsing after the common part of the header */
3271 i = CS_HEADER_VERSION_MAX;
3272
3273 /*
3274 * The metadata is stored in the auxtrace_info section and encodes
3275 * the configuration of the ARM embedded trace macrocell which is
3276 * required by the trace decoder to properly decode the trace due
3277 * to its highly compressed nature.
3278 */
3279 for (j = 0; j < num_cpu; j++) {
3280 if (ptr[i] == __perf_cs_etmv3_magic) {
3281 metadata[j] =
3282 cs_etm__create_meta_blk(ptr, &i,
3283 CS_ETM_PRIV_MAX,
3284 CS_ETM_NR_TRC_PARAMS_V0);
3285 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3286 metadata[j] =
3287 cs_etm__create_meta_blk(ptr, &i,
3288 CS_ETMV4_PRIV_MAX,
3289 CS_ETMV4_NR_TRC_PARAMS_V0);
3290 } else if (ptr[i] == __perf_cs_ete_magic) {
3291 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3292 } else {
3293 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3294 ptr[i]);
3295 err = -EINVAL;
3296 goto err_free_metadata;
3297 }
3298
3299 if (!metadata[j]) {
3300 err = -ENOMEM;
3301 goto err_free_metadata;
3302 }
3303 }
3304
3305 /*
3306 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3307 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3308 * global metadata, and each cpu's metadata respectively.
3309 * The following tests if the correct number of double words was
3310 * present in the auxtrace info section.
3311 */
3312 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3313 if (i * 8 != priv_size) {
3314 err = -EINVAL;
3315 goto err_free_metadata;
3316 }
3317
3318 etm = zalloc(sizeof(*etm));
3319
3320 if (!etm) {
3321 err = -ENOMEM;
3322 goto err_free_metadata;
3323 }
3324
3325 /*
3326 * As all the ETMs run at the same exception level, the system should
3327 * have the same PID format crossing CPUs. So cache the PID format
3328 * and reuse it for sequential decoding.
3329 */
3330 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3331
3332 err = auxtrace_queues__init(&etm->queues);
3333 if (err)
3334 goto err_free_etm;
3335
3336 if (session->itrace_synth_opts->set) {
3337 etm->synth_opts = *session->itrace_synth_opts;
3338 } else {
3339 itrace_synth_opts__set_default(&etm->synth_opts,
3340 session->itrace_synth_opts->default_no_sample);
3341 etm->synth_opts.callchain = false;
3342 }
3343
3344 etm->session = session;
3345
3346 etm->num_cpu = num_cpu;
3347 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3348 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3349 etm->metadata = metadata;
3350 etm->auxtrace_type = auxtrace_info->type;
3351
3352 if (etm->synth_opts.use_timestamp)
3353 /*
3354 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3355 * therefore the decoder cannot know if the timestamp trace is
3356 * same with the kernel time.
3357 *
3358 * If a user has knowledge for the working platform and can
3359 * specify itrace option 'T' to tell decoder to forcely use the
3360 * traced timestamp as the kernel time.
3361 */
3362 etm->has_virtual_ts = true;
3363 else
3364 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3365 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3366
3367 if (!etm->has_virtual_ts)
3368 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3369 "The time field of the samples will not be set accurately.\n"
3370 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3371 "you can specify the itrace option 'T' for timestamp decoding\n"
3372 "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3373
3374 etm->auxtrace.process_event = cs_etm__process_event;
3375 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3376 etm->auxtrace.flush_events = cs_etm__flush_events;
3377 etm->auxtrace.free_events = cs_etm__free_events;
3378 etm->auxtrace.free = cs_etm__free;
3379 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3380 session->auxtrace = &etm->auxtrace;
3381
3382 err = cs_etm__setup_timeless_decoding(etm);
3383 if (err)
3384 return err;
3385
3386 etm->tc.time_shift = tc->time_shift;
3387 etm->tc.time_mult = tc->time_mult;
3388 etm->tc.time_zero = tc->time_zero;
3389 if (event_contains(*tc, time_cycles)) {
3390 etm->tc.time_cycles = tc->time_cycles;
3391 etm->tc.time_mask = tc->time_mask;
3392 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3393 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3394 }
3395 err = cs_etm__synth_events(etm, session);
3396 if (err)
3397 goto err_free_queues;
3398
3399 /*
3400 * Map Trace ID values to CPU metadata.
3401 *
3402 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3403 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3404 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3405 *
3406 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3407 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3408 * in which case a different value will be used. This means an older perf may still
3409 * be able to record and read files generate on a newer system.
3410 *
3411 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3412 * those packets. If they are there then the values will be mapped and plugged into
3413 * the metadata. We then set any remaining metadata values with the used flag to a
3414 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3415 *
3416 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3417 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3418 * flags if present.
3419 */
3420
3421 /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3422 aux_hw_id_found = 0;
3423 err = perf_session__peek_events(session, session->header.data_offset,
3424 session->header.data_size,
3425 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3426 if (err)
3427 goto err_free_queues;
3428
3429 /* if HW ID found then clear any unused metadata ID values */
3430 if (aux_hw_id_found)
3431 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3432 /* otherwise, this is a file with metadata values only, map from metadata */
3433 else
3434 err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3435
3436 if (err)
3437 goto err_free_queues;
3438
3439 err = cs_etm__queue_aux_records(session);
3440 if (err)
3441 goto err_free_queues;
3442
3443 etm->data_queued = etm->queues.populated;
3444 return 0;
3445
3446 err_free_queues:
3447 auxtrace_queues__free(&etm->queues);
3448 session->auxtrace = NULL;
3449 err_free_etm:
3450 zfree(&etm);
3451 err_free_metadata:
3452 /* No need to check @metadata[j], free(NULL) is supported */
3453 for (j = 0; j < num_cpu; j++)
3454 zfree(&metadata[j]);
3455 zfree(&metadata);
3456 err_free_traceid_list:
3457 intlist__delete(traceid_list);
3458 return err;
3459 }
3460