1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9 #include <linux/bitfield.h>
10 #include <linux/bitops.h>
11 #include <linux/coresight-pmu.h>
12 #include <linux/err.h>
13 #include <linux/log2.h>
14 #include <linux/types.h>
15 #include <linux/zalloc.h>
16
17 #include <stdlib.h>
18
19 #include "auxtrace.h"
20 #include "color.h"
21 #include "cs-etm.h"
22 #include "cs-etm-decoder/cs-etm-decoder.h"
23 #include "debug.h"
24 #include "dso.h"
25 #include "evlist.h"
26 #include "intlist.h"
27 #include "machine.h"
28 #include "map.h"
29 #include "perf.h"
30 #include "session.h"
31 #include "map_symbol.h"
32 #include "branch.h"
33 #include "symbol.h"
34 #include "tool.h"
35 #include "thread.h"
36 #include "thread-stack.h"
37 #include "tsc.h"
38 #include <tools/libc_compat.h>
39 #include "util/synthetic-events.h"
40 #include "util/util.h"
41
42 struct cs_etm_auxtrace {
43 struct auxtrace auxtrace;
44 struct auxtrace_queues queues;
45 struct auxtrace_heap heap;
46 struct itrace_synth_opts synth_opts;
47 struct perf_session *session;
48 struct perf_tsc_conversion tc;
49
50 /*
51 * Timeless has no timestamps in the trace so overlapping mmap lookups
52 * are less accurate but produces smaller trace data. We use context IDs
53 * in the trace instead of matching timestamps with fork records so
54 * they're not really needed in the general case. Overlapping mmaps
55 * happen in cases like between a fork and an exec.
56 */
57 bool timeless_decoding;
58
59 /*
60 * Per-thread ignores the trace channel ID and instead assumes that
61 * everything in a buffer comes from the same process regardless of
62 * which CPU it ran on. It also implies no context IDs so the TID is
63 * taken from the auxtrace buffer.
64 */
65 bool per_thread_decoding;
66 bool snapshot_mode;
67 bool data_queued;
68 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
69
70 int num_cpu;
71 u64 latest_kernel_timestamp;
72 u32 auxtrace_type;
73 u64 branches_sample_type;
74 u64 branches_id;
75 u64 instructions_sample_type;
76 u64 instructions_sample_period;
77 u64 instructions_id;
78 u64 **metadata;
79 unsigned int pmu_type;
80 enum cs_etm_pid_fmt pid_fmt;
81 };
82
83 struct cs_etm_traceid_queue {
84 u8 trace_chan_id;
85 u64 period_instructions;
86 size_t last_branch_pos;
87 union perf_event *event_buf;
88 struct thread *thread;
89 struct thread *prev_packet_thread;
90 ocsd_ex_level prev_packet_el;
91 ocsd_ex_level el;
92 struct branch_stack *last_branch;
93 struct branch_stack *last_branch_rb;
94 struct cs_etm_packet *prev_packet;
95 struct cs_etm_packet *packet;
96 struct cs_etm_packet_queue packet_queue;
97 };
98
99 enum cs_etm_format {
100 UNSET,
101 FORMATTED,
102 UNFORMATTED
103 };
104
105 struct cs_etm_queue {
106 struct cs_etm_auxtrace *etm;
107 struct cs_etm_decoder *decoder;
108 struct auxtrace_buffer *buffer;
109 unsigned int queue_nr;
110 u8 pending_timestamp_chan_id;
111 enum cs_etm_format format;
112 u64 offset;
113 const unsigned char *buf;
114 size_t buf_len, buf_used;
115 /* Conversion between traceID and index in traceid_queues array */
116 struct intlist *traceid_queues_list;
117 struct cs_etm_traceid_queue **traceid_queues;
118 /* Conversion between traceID and metadata pointers */
119 struct intlist *traceid_list;
120 /*
121 * Same as traceid_list, but traceid_list may be a reference to another
122 * queue's which has a matching sink ID.
123 */
124 struct intlist *own_traceid_list;
125 u32 sink_id;
126 };
127
128 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
129 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
130 pid_t tid);
131 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
132 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
134 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
135 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
136
137 /* PTMs ETMIDR [11:8] set to b0011 */
138 #define ETMIDR_PTM_VERSION 0x00000300
139
140 /*
141 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
142 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
143 * encode the etm queue number as the upper 16 bit and the channel as
144 * the lower 16 bit.
145 */
146 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
147 (queue_nr << 16 | trace_chan_id)
148 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
149 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
150 #define SINK_UNSET ((u32) -1)
151
cs_etm__get_v7_protocol_version(u32 etmidr)152 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
153 {
154 etmidr &= ETMIDR_PTM_VERSION;
155
156 if (etmidr == ETMIDR_PTM_VERSION)
157 return CS_ETM_PROTO_PTM;
158
159 return CS_ETM_PROTO_ETMV3;
160 }
161
cs_etm__get_magic(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 * magic)162 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
163 {
164 struct int_node *inode;
165 u64 *metadata;
166
167 inode = intlist__find(etmq->traceid_list, trace_chan_id);
168 if (!inode)
169 return -EINVAL;
170
171 metadata = inode->priv;
172 *magic = metadata[CS_ETM_MAGIC];
173 return 0;
174 }
175
cs_etm__get_cpu(struct cs_etm_queue * etmq,u8 trace_chan_id,int * cpu)176 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
177 {
178 struct int_node *inode;
179 u64 *metadata;
180
181 inode = intlist__find(etmq->traceid_list, trace_chan_id);
182 if (!inode)
183 return -EINVAL;
184
185 metadata = inode->priv;
186 *cpu = (int)metadata[CS_ETM_CPU];
187 return 0;
188 }
189
190 /*
191 * The returned PID format is presented as an enum:
192 *
193 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
194 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
195 * CS_ETM_PIDFMT_NONE: No context IDs
196 *
197 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
198 * are enabled at the same time when the session runs on an EL2 kernel.
199 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
200 * recorded in the trace data, the tool will selectively use
201 * CONTEXTIDR_EL2 as PID.
202 *
203 * The result is cached in etm->pid_fmt so this function only needs to be called
204 * when processing the aux info.
205 */
cs_etm__init_pid_fmt(u64 * metadata)206 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
207 {
208 u64 val;
209
210 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
211 val = metadata[CS_ETM_ETMCR];
212 /* CONTEXTIDR is traced */
213 if (val & BIT(ETM_OPT_CTXTID))
214 return CS_ETM_PIDFMT_CTXTID;
215 } else {
216 val = metadata[CS_ETMV4_TRCCONFIGR];
217 /* CONTEXTIDR_EL2 is traced */
218 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
219 return CS_ETM_PIDFMT_CTXTID2;
220 /* CONTEXTIDR_EL1 is traced */
221 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
222 return CS_ETM_PIDFMT_CTXTID;
223 }
224
225 return CS_ETM_PIDFMT_NONE;
226 }
227
cs_etm__get_pid_fmt(struct cs_etm_queue * etmq)228 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
229 {
230 return etmq->etm->pid_fmt;
231 }
232
cs_etm__insert_trace_id_node(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 * cpu_metadata)233 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
234 u8 trace_chan_id, u64 *cpu_metadata)
235 {
236 /* Get an RB node for this CPU */
237 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
238
239 /* Something went wrong, no need to continue */
240 if (!inode)
241 return -ENOMEM;
242
243 /* Disallow re-mapping a different traceID to metadata pair. */
244 if (inode->priv) {
245 u64 *curr_cpu_data = inode->priv;
246 u8 curr_chan_id;
247 int err;
248
249 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
250 /*
251 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
252 * are expected (but not supported) in per-thread mode,
253 * rather than signifying an error.
254 */
255 if (etmq->etm->per_thread_decoding)
256 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
257 else
258 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
259
260 return -EINVAL;
261 }
262
263 /* check that the mapped ID matches */
264 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
265 if (err)
266 return err;
267
268 if (curr_chan_id != trace_chan_id) {
269 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
270 return -EINVAL;
271 }
272
273 /* Skip re-adding the same mappings if everything matched */
274 return 0;
275 }
276
277 /* Not one we've seen before, associate the traceID with the metadata pointer */
278 inode->priv = cpu_metadata;
279
280 return 0;
281 }
282
cs_etm__get_queue(struct cs_etm_auxtrace * etm,int cpu)283 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
284 {
285 if (etm->per_thread_decoding)
286 return etm->queues.queue_array[0].priv;
287 else
288 return etm->queues.queue_array[cpu].priv;
289 }
290
cs_etm__map_trace_id_v0(struct cs_etm_auxtrace * etm,u8 trace_chan_id,u64 * cpu_metadata)291 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
292 u64 *cpu_metadata)
293 {
294 struct cs_etm_queue *etmq;
295
296 /*
297 * If the queue is unformatted then only save one mapping in the
298 * queue associated with that CPU so only one decoder is made.
299 */
300 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
301 if (etmq->format == UNFORMATTED)
302 return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
303 cpu_metadata);
304
305 /*
306 * Otherwise, version 0 trace IDs are global so save them into every
307 * queue.
308 */
309 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
310 int ret;
311
312 etmq = etm->queues.queue_array[i].priv;
313 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
314 cpu_metadata);
315 if (ret)
316 return ret;
317 }
318
319 return 0;
320 }
321
cs_etm__process_trace_id_v0(struct cs_etm_auxtrace * etm,int cpu,u64 hw_id)322 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
323 u64 hw_id)
324 {
325 int err;
326 u64 *cpu_data;
327 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
328
329 cpu_data = get_cpu_data(etm, cpu);
330 if (cpu_data == NULL)
331 return -EINVAL;
332
333 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
334 if (err)
335 return err;
336
337 /*
338 * if we are picking up the association from the packet, need to plug
339 * the correct trace ID into the metadata for setting up decoders later.
340 */
341 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
342 }
343
cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace * etm,int cpu,u64 hw_id)344 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
345 u64 hw_id)
346 {
347 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
348 int ret;
349 u64 *cpu_data;
350 u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
351 u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
352
353 /*
354 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
355 * let it pass for now until an actual overlapping trace ID is hit. In
356 * most cases IDs won't overlap even if the sink changes.
357 */
358 if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
359 etmq->sink_id != sink_id) {
360 pr_err("CS_ETM: mismatch between sink IDs\n");
361 return -EINVAL;
362 }
363
364 etmq->sink_id = sink_id;
365
366 /* Find which other queues use this sink and link their ID maps */
367 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
368 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
369
370 /* Different sinks, skip */
371 if (other_etmq->sink_id != etmq->sink_id)
372 continue;
373
374 /* Already linked, skip */
375 if (other_etmq->traceid_list == etmq->traceid_list)
376 continue;
377
378 /* At the point of first linking, this one should be empty */
379 if (!intlist__empty(etmq->traceid_list)) {
380 pr_err("CS_ETM: Can't link populated trace ID lists\n");
381 return -EINVAL;
382 }
383
384 etmq->own_traceid_list = NULL;
385 intlist__delete(etmq->traceid_list);
386 etmq->traceid_list = other_etmq->traceid_list;
387 break;
388 }
389
390 cpu_data = get_cpu_data(etm, cpu);
391 ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
392 if (ret)
393 return ret;
394
395 ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
396 if (ret)
397 return ret;
398
399 return 0;
400 }
401
cs_etm__metadata_get_trace_id(u8 * trace_chan_id,u64 * cpu_metadata)402 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
403 {
404 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
405
406 switch (cs_etm_magic) {
407 case __perf_cs_etmv3_magic:
408 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
409 CORESIGHT_TRACE_ID_VAL_MASK);
410 break;
411 case __perf_cs_etmv4_magic:
412 case __perf_cs_ete_magic:
413 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
414 CORESIGHT_TRACE_ID_VAL_MASK);
415 break;
416 default:
417 return -EINVAL;
418 }
419 return 0;
420 }
421
422 /*
423 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
424 */
cs_etm__metadata_set_trace_id(u8 trace_chan_id,u64 * cpu_metadata)425 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
426 {
427 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
428
429 switch (cs_etm_magic) {
430 case __perf_cs_etmv3_magic:
431 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
432 break;
433 case __perf_cs_etmv4_magic:
434 case __perf_cs_ete_magic:
435 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
436 break;
437
438 default:
439 return -EINVAL;
440 }
441 return 0;
442 }
443
444 /*
445 * Get a metadata index for a specific cpu from an array.
446 *
447 */
get_cpu_data_idx(struct cs_etm_auxtrace * etm,int cpu)448 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
449 {
450 int i;
451
452 for (i = 0; i < etm->num_cpu; i++) {
453 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
454 return i;
455 }
456 }
457
458 return -1;
459 }
460
461 /*
462 * Get a metadata for a specific cpu from an array.
463 *
464 */
get_cpu_data(struct cs_etm_auxtrace * etm,int cpu)465 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
466 {
467 int idx = get_cpu_data_idx(etm, cpu);
468
469 return (idx != -1) ? etm->metadata[idx] : NULL;
470 }
471
472 /*
473 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
474 *
475 * The payload associates the Trace ID and the CPU.
476 * The routine is tolerant of seeing multiple packets with the same association,
477 * but a CPU / Trace ID association changing during a session is an error.
478 */
cs_etm__process_aux_output_hw_id(struct perf_session * session,union perf_event * event)479 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
480 union perf_event *event)
481 {
482 struct cs_etm_auxtrace *etm;
483 struct perf_sample sample;
484 struct evsel *evsel;
485 u64 hw_id;
486 int cpu, version, err;
487
488 /* extract and parse the HW ID */
489 hw_id = event->aux_output_hw_id.hw_id;
490 version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
491
492 /* check that we can handle this version */
493 if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
494 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
495 version);
496 return -EINVAL;
497 }
498
499 /* get access to the etm metadata */
500 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
501 if (!etm || !etm->metadata)
502 return -EINVAL;
503
504 /* parse the sample to get the CPU */
505 evsel = evlist__event2evsel(session->evlist, event);
506 if (!evsel)
507 return -EINVAL;
508 perf_sample__init(&sample, /*all=*/false);
509 err = evsel__parse_sample(evsel, event, &sample);
510 if (err)
511 goto out;
512 cpu = sample.cpu;
513 if (cpu == -1) {
514 /* no CPU in the sample - possibly recorded with an old version of perf */
515 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
516 err = -EINVAL;
517 goto out;
518 }
519
520 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
521 err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
522 goto out;
523 }
524
525 err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
526 out:
527 perf_sample__exit(&sample);
528 return err;
529 }
530
cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue * etmq,u8 trace_chan_id)531 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
532 u8 trace_chan_id)
533 {
534 /*
535 * When a timestamp packet is encountered the backend code
536 * is stopped so that the front end has time to process packets
537 * that were accumulated in the traceID queue. Since there can
538 * be more than one channel per cs_etm_queue, we need to specify
539 * what traceID queue needs servicing.
540 */
541 etmq->pending_timestamp_chan_id = trace_chan_id;
542 }
543
cs_etm__etmq_get_timestamp(struct cs_etm_queue * etmq,u8 * trace_chan_id)544 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
545 u8 *trace_chan_id)
546 {
547 struct cs_etm_packet_queue *packet_queue;
548
549 if (!etmq->pending_timestamp_chan_id)
550 return 0;
551
552 if (trace_chan_id)
553 *trace_chan_id = etmq->pending_timestamp_chan_id;
554
555 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
556 etmq->pending_timestamp_chan_id);
557 if (!packet_queue)
558 return 0;
559
560 /* Acknowledge pending status */
561 etmq->pending_timestamp_chan_id = 0;
562
563 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
564 return packet_queue->cs_timestamp;
565 }
566
cs_etm__clear_packet_queue(struct cs_etm_packet_queue * queue)567 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
568 {
569 int i;
570
571 queue->head = 0;
572 queue->tail = 0;
573 queue->packet_count = 0;
574 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
575 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
576 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
577 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
578 queue->packet_buffer[i].instr_count = 0;
579 queue->packet_buffer[i].last_instr_taken_branch = false;
580 queue->packet_buffer[i].last_instr_size = 0;
581 queue->packet_buffer[i].last_instr_type = 0;
582 queue->packet_buffer[i].last_instr_subtype = 0;
583 queue->packet_buffer[i].last_instr_cond = 0;
584 queue->packet_buffer[i].flags = 0;
585 queue->packet_buffer[i].exception_number = UINT32_MAX;
586 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
587 queue->packet_buffer[i].cpu = INT_MIN;
588 }
589 }
590
cs_etm__clear_all_packet_queues(struct cs_etm_queue * etmq)591 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
592 {
593 int idx;
594 struct int_node *inode;
595 struct cs_etm_traceid_queue *tidq;
596 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
597
598 intlist__for_each_entry(inode, traceid_queues_list) {
599 idx = (int)(intptr_t)inode->priv;
600 tidq = etmq->traceid_queues[idx];
601 cs_etm__clear_packet_queue(&tidq->packet_queue);
602 }
603 }
604
cs_etm__init_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u8 trace_chan_id)605 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
606 struct cs_etm_traceid_queue *tidq,
607 u8 trace_chan_id)
608 {
609 int rc = -ENOMEM;
610 struct auxtrace_queue *queue;
611 struct cs_etm_auxtrace *etm = etmq->etm;
612
613 cs_etm__clear_packet_queue(&tidq->packet_queue);
614
615 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
616 tidq->trace_chan_id = trace_chan_id;
617 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
618 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
619 queue->tid);
620 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
621
622 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
623 if (!tidq->packet)
624 goto out;
625
626 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
627 if (!tidq->prev_packet)
628 goto out_free;
629
630 if (etm->synth_opts.last_branch) {
631 size_t sz = sizeof(struct branch_stack);
632
633 sz += etm->synth_opts.last_branch_sz *
634 sizeof(struct branch_entry);
635 tidq->last_branch = zalloc(sz);
636 if (!tidq->last_branch)
637 goto out_free;
638 tidq->last_branch_rb = zalloc(sz);
639 if (!tidq->last_branch_rb)
640 goto out_free;
641 }
642
643 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
644 if (!tidq->event_buf)
645 goto out_free;
646
647 return 0;
648
649 out_free:
650 zfree(&tidq->last_branch_rb);
651 zfree(&tidq->last_branch);
652 zfree(&tidq->prev_packet);
653 zfree(&tidq->packet);
654 out:
655 return rc;
656 }
657
658 static struct cs_etm_traceid_queue
cs_etm__etmq_get_traceid_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)659 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
660 {
661 int idx;
662 struct int_node *inode;
663 struct intlist *traceid_queues_list;
664 struct cs_etm_traceid_queue *tidq, **traceid_queues;
665 struct cs_etm_auxtrace *etm = etmq->etm;
666
667 if (etm->per_thread_decoding)
668 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
669
670 traceid_queues_list = etmq->traceid_queues_list;
671
672 /*
673 * Check if the traceid_queue exist for this traceID by looking
674 * in the queue list.
675 */
676 inode = intlist__find(traceid_queues_list, trace_chan_id);
677 if (inode) {
678 idx = (int)(intptr_t)inode->priv;
679 return etmq->traceid_queues[idx];
680 }
681
682 /* We couldn't find a traceid_queue for this traceID, allocate one */
683 tidq = malloc(sizeof(*tidq));
684 if (!tidq)
685 return NULL;
686
687 memset(tidq, 0, sizeof(*tidq));
688
689 /* Get a valid index for the new traceid_queue */
690 idx = intlist__nr_entries(traceid_queues_list);
691 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
692 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
693 if (!inode)
694 goto out_free;
695
696 /* Associate this traceID with this index */
697 inode->priv = (void *)(intptr_t)idx;
698
699 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
700 goto out_free;
701
702 /* Grow the traceid_queues array by one unit */
703 traceid_queues = etmq->traceid_queues;
704 traceid_queues = reallocarray(traceid_queues,
705 idx + 1,
706 sizeof(*traceid_queues));
707
708 /*
709 * On failure reallocarray() returns NULL and the original block of
710 * memory is left untouched.
711 */
712 if (!traceid_queues)
713 goto out_free;
714
715 traceid_queues[idx] = tidq;
716 etmq->traceid_queues = traceid_queues;
717
718 return etmq->traceid_queues[idx];
719
720 out_free:
721 /*
722 * Function intlist__remove() removes the inode from the list
723 * and delete the memory associated to it.
724 */
725 intlist__remove(traceid_queues_list, inode);
726 free(tidq);
727
728 return NULL;
729 }
730
731 struct cs_etm_packet_queue
cs_etm__etmq_get_packet_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)732 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
733 {
734 struct cs_etm_traceid_queue *tidq;
735
736 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
737 if (tidq)
738 return &tidq->packet_queue;
739
740 return NULL;
741 }
742
cs_etm__packet_swap(struct cs_etm_auxtrace * etm,struct cs_etm_traceid_queue * tidq)743 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
744 struct cs_etm_traceid_queue *tidq)
745 {
746 struct cs_etm_packet *tmp;
747
748 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
749 etm->synth_opts.instructions) {
750 /*
751 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
752 * the next incoming packet.
753 *
754 * Threads and exception levels are also tracked for both the
755 * previous and current packets. This is because the previous
756 * packet is used for the 'from' IP for branch samples, so the
757 * thread at that time must also be assigned to that sample.
758 * Across discontinuity packets the thread can change, so by
759 * tracking the thread for the previous packet the branch sample
760 * will have the correct info.
761 */
762 tmp = tidq->packet;
763 tidq->packet = tidq->prev_packet;
764 tidq->prev_packet = tmp;
765 tidq->prev_packet_el = tidq->el;
766 thread__put(tidq->prev_packet_thread);
767 tidq->prev_packet_thread = thread__get(tidq->thread);
768 }
769 }
770
cs_etm__packet_dump(const char * pkt_string,void * data)771 static void cs_etm__packet_dump(const char *pkt_string, void *data)
772 {
773 const char *color = PERF_COLOR_BLUE;
774 int len = strlen(pkt_string);
775 struct cs_etm_queue *etmq = data;
776 char queue_nr[64];
777
778 if (verbose)
779 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr);
780 else
781 queue_nr[0] = '\0';
782
783 if (len && (pkt_string[len-1] == '\n'))
784 color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string);
785 else
786 color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string);
787
788 fflush(stdout);
789 }
790
cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params * t_params,u64 * metadata,u32 etmidr)791 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
792 u64 *metadata, u32 etmidr)
793 {
794 t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
795 t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
796 t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
797 }
798
cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params * t_params,u64 * metadata)799 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
800 u64 *metadata)
801 {
802 t_params->protocol = CS_ETM_PROTO_ETMV4i;
803 t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
804 t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
805 t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
806 t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
807 t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
808 t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
809 }
810
cs_etm__set_trace_param_ete(struct cs_etm_trace_params * t_params,u64 * metadata)811 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
812 u64 *metadata)
813 {
814 t_params->protocol = CS_ETM_PROTO_ETE;
815 t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
816 t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
817 t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
818 t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
819 t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
820 t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
821 t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
822 }
823
cs_etm__init_trace_params(struct cs_etm_trace_params * t_params,struct cs_etm_queue * etmq)824 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
825 struct cs_etm_queue *etmq)
826 {
827 struct int_node *inode;
828
829 intlist__for_each_entry(inode, etmq->traceid_list) {
830 u64 *metadata = inode->priv;
831 u64 architecture = metadata[CS_ETM_MAGIC];
832 u32 etmidr;
833
834 switch (architecture) {
835 case __perf_cs_etmv3_magic:
836 etmidr = metadata[CS_ETM_ETMIDR];
837 cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
838 break;
839 case __perf_cs_etmv4_magic:
840 cs_etm__set_trace_param_etmv4(t_params++, metadata);
841 break;
842 case __perf_cs_ete_magic:
843 cs_etm__set_trace_param_ete(t_params++, metadata);
844 break;
845 default:
846 return -EINVAL;
847 }
848 }
849
850 return 0;
851 }
852
cs_etm__init_decoder_params(struct cs_etm_decoder_params * d_params,struct cs_etm_queue * etmq,enum cs_etm_decoder_operation mode)853 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
854 struct cs_etm_queue *etmq,
855 enum cs_etm_decoder_operation mode)
856 {
857 int ret = -EINVAL;
858
859 if (!(mode < CS_ETM_OPERATION_MAX))
860 goto out;
861
862 d_params->packet_printer = cs_etm__packet_dump;
863 d_params->operation = mode;
864 d_params->data = etmq;
865 d_params->formatted = etmq->format == FORMATTED;
866 d_params->fsyncs = false;
867 d_params->hsyncs = false;
868 d_params->frame_aligned = true;
869
870 ret = 0;
871 out:
872 return ret;
873 }
874
cs_etm__dump_event(struct cs_etm_queue * etmq,struct auxtrace_buffer * buffer)875 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
876 struct auxtrace_buffer *buffer)
877 {
878 int ret;
879 const char *color = PERF_COLOR_BLUE;
880 size_t buffer_used = 0;
881
882 fprintf(stdout, "\n");
883 color_fprintf(stdout, color,
884 ". ... CoreSight %s Trace data: size %#zx bytes\n",
885 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
886
887 do {
888 size_t consumed;
889
890 ret = cs_etm_decoder__process_data_block(
891 etmq->decoder, buffer->offset,
892 &((u8 *)buffer->data)[buffer_used],
893 buffer->size - buffer_used, &consumed);
894 if (ret)
895 break;
896
897 buffer_used += consumed;
898 } while (buffer_used < buffer->size);
899
900 cs_etm_decoder__reset(etmq->decoder);
901 }
902
cs_etm__flush_events(struct perf_session * session,const struct perf_tool * tool)903 static int cs_etm__flush_events(struct perf_session *session,
904 const struct perf_tool *tool)
905 {
906 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
907 struct cs_etm_auxtrace,
908 auxtrace);
909 if (dump_trace)
910 return 0;
911
912 if (!tool->ordered_events)
913 return -EINVAL;
914
915 if (etm->timeless_decoding) {
916 /*
917 * Pass tid = -1 to process all queues. But likely they will have
918 * already been processed on PERF_RECORD_EXIT anyway.
919 */
920 return cs_etm__process_timeless_queues(etm, -1);
921 }
922
923 return cs_etm__process_timestamped_queues(etm);
924 }
925
cs_etm__free_traceid_queues(struct cs_etm_queue * etmq)926 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
927 {
928 int idx;
929 uintptr_t priv;
930 struct int_node *inode, *tmp;
931 struct cs_etm_traceid_queue *tidq;
932 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
933
934 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
935 priv = (uintptr_t)inode->priv;
936 idx = priv;
937
938 /* Free this traceid_queue from the array */
939 tidq = etmq->traceid_queues[idx];
940 thread__zput(tidq->thread);
941 thread__zput(tidq->prev_packet_thread);
942 zfree(&tidq->event_buf);
943 zfree(&tidq->last_branch);
944 zfree(&tidq->last_branch_rb);
945 zfree(&tidq->prev_packet);
946 zfree(&tidq->packet);
947 zfree(&tidq);
948
949 /*
950 * Function intlist__remove() removes the inode from the list
951 * and delete the memory associated to it.
952 */
953 intlist__remove(traceid_queues_list, inode);
954 }
955
956 /* Then the RB tree itself */
957 intlist__delete(traceid_queues_list);
958 etmq->traceid_queues_list = NULL;
959
960 /* finally free the traceid_queues array */
961 zfree(&etmq->traceid_queues);
962 }
963
cs_etm__free_queue(void * priv)964 static void cs_etm__free_queue(void *priv)
965 {
966 struct int_node *inode, *tmp;
967 struct cs_etm_queue *etmq = priv;
968
969 if (!etmq)
970 return;
971
972 cs_etm_decoder__free(etmq->decoder);
973 cs_etm__free_traceid_queues(etmq);
974
975 if (etmq->own_traceid_list) {
976 /* First remove all traceID/metadata nodes for the RB tree */
977 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
978 intlist__remove(etmq->own_traceid_list, inode);
979
980 /* Then the RB tree itself */
981 intlist__delete(etmq->own_traceid_list);
982 }
983
984 free(etmq);
985 }
986
cs_etm__free_events(struct perf_session * session)987 static void cs_etm__free_events(struct perf_session *session)
988 {
989 unsigned int i;
990 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
991 struct cs_etm_auxtrace,
992 auxtrace);
993 struct auxtrace_queues *queues = &aux->queues;
994
995 for (i = 0; i < queues->nr_queues; i++) {
996 cs_etm__free_queue(queues->queue_array[i].priv);
997 queues->queue_array[i].priv = NULL;
998 }
999
1000 auxtrace_queues__free(queues);
1001 }
1002
cs_etm__free(struct perf_session * session)1003 static void cs_etm__free(struct perf_session *session)
1004 {
1005 int i;
1006 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1007 struct cs_etm_auxtrace,
1008 auxtrace);
1009 cs_etm__free_events(session);
1010 session->auxtrace = NULL;
1011
1012 for (i = 0; i < aux->num_cpu; i++)
1013 zfree(&aux->metadata[i]);
1014
1015 zfree(&aux->metadata);
1016 zfree(&aux);
1017 }
1018
cs_etm__evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)1019 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1020 struct evsel *evsel)
1021 {
1022 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1023 struct cs_etm_auxtrace,
1024 auxtrace);
1025
1026 return evsel->core.attr.type == aux->pmu_type;
1027 }
1028
cs_etm__get_machine(struct cs_etm_queue * etmq,ocsd_ex_level el)1029 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1030 ocsd_ex_level el)
1031 {
1032 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1033
1034 /*
1035 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1036 * running at EL1 assume everything is the host.
1037 */
1038 if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1039 return &etmq->etm->session->machines.host;
1040
1041 /*
1042 * Not perfect, but otherwise assume anything in EL1 is the default
1043 * guest, and everything else is the host. Distinguishing between guest
1044 * and host userspaces isn't currently supported either. Neither is
1045 * multiple guest support. All this does is reduce the likeliness of
1046 * decode errors where we look into the host kernel maps when it should
1047 * have been the guest maps.
1048 */
1049 switch (el) {
1050 case ocsd_EL1:
1051 return machines__find_guest(&etmq->etm->session->machines,
1052 DEFAULT_GUEST_KERNEL_ID);
1053 case ocsd_EL3:
1054 case ocsd_EL2:
1055 case ocsd_EL0:
1056 case ocsd_EL_unknown:
1057 default:
1058 return &etmq->etm->session->machines.host;
1059 }
1060 }
1061
cs_etm__cpu_mode(struct cs_etm_queue * etmq,u64 address,ocsd_ex_level el)1062 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1063 ocsd_ex_level el)
1064 {
1065 struct machine *machine = cs_etm__get_machine(etmq, el);
1066
1067 if (address >= machine__kernel_start(machine)) {
1068 if (machine__is_host(machine))
1069 return PERF_RECORD_MISC_KERNEL;
1070 else
1071 return PERF_RECORD_MISC_GUEST_KERNEL;
1072 } else {
1073 if (machine__is_host(machine))
1074 return PERF_RECORD_MISC_USER;
1075 else {
1076 /*
1077 * Can't really happen at the moment because
1078 * cs_etm__get_machine() will always return
1079 * machines.host for any non EL1 trace.
1080 */
1081 return PERF_RECORD_MISC_GUEST_USER;
1082 }
1083 }
1084 }
1085
cs_etm__mem_access(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 address,size_t size,u8 * buffer,const ocsd_mem_space_acc_t mem_space)1086 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1087 u64 address, size_t size, u8 *buffer,
1088 const ocsd_mem_space_acc_t mem_space)
1089 {
1090 u8 cpumode;
1091 u64 offset;
1092 int len;
1093 struct addr_location al;
1094 struct dso *dso;
1095 struct cs_etm_traceid_queue *tidq;
1096 int ret = 0;
1097
1098 if (!etmq)
1099 return 0;
1100
1101 addr_location__init(&al);
1102 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1103 if (!tidq)
1104 goto out;
1105
1106 /*
1107 * We've already tracked EL along side the PID in cs_etm__set_thread()
1108 * so double check that it matches what OpenCSD thinks as well. It
1109 * doesn't distinguish between EL0 and EL1 for this mem access callback
1110 * so we had to do the extra tracking. Skip validation if it's any of
1111 * the 'any' values.
1112 */
1113 if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1114 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1115 if (mem_space & OCSD_MEM_SPACE_EL1N) {
1116 /* Includes both non secure EL1 and EL0 */
1117 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1118 } else if (mem_space & OCSD_MEM_SPACE_EL2)
1119 assert(tidq->el == ocsd_EL2);
1120 else if (mem_space & OCSD_MEM_SPACE_EL3)
1121 assert(tidq->el == ocsd_EL3);
1122 }
1123
1124 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1125
1126 if (!thread__find_map(tidq->thread, cpumode, address, &al))
1127 goto out;
1128
1129 dso = map__dso(al.map);
1130 if (!dso)
1131 goto out;
1132
1133 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1134 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1135 goto out;
1136
1137 offset = map__map_ip(al.map, address);
1138
1139 map__load(al.map);
1140
1141 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1142 offset, buffer, size);
1143
1144 if (len <= 0) {
1145 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1146 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1147 if (!dso__auxtrace_warned(dso)) {
1148 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1149 address,
1150 dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1151 dso__set_auxtrace_warned(dso);
1152 }
1153 goto out;
1154 }
1155 ret = len;
1156 out:
1157 addr_location__exit(&al);
1158 return ret;
1159 }
1160
cs_etm__alloc_queue(void)1161 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1162 {
1163 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1164 if (!etmq)
1165 return NULL;
1166
1167 etmq->traceid_queues_list = intlist__new(NULL);
1168 if (!etmq->traceid_queues_list)
1169 goto out_free;
1170
1171 /*
1172 * Create an RB tree for traceID-metadata tuple. Since the conversion
1173 * has to be made for each packet that gets decoded, optimizing access
1174 * in anything other than a sequential array is worth doing.
1175 */
1176 etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1177 if (!etmq->traceid_list)
1178 goto out_free;
1179
1180 return etmq;
1181
1182 out_free:
1183 intlist__delete(etmq->traceid_queues_list);
1184 free(etmq);
1185
1186 return NULL;
1187 }
1188
cs_etm__setup_queue(struct cs_etm_auxtrace * etm,struct auxtrace_queue * queue,unsigned int queue_nr)1189 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1190 struct auxtrace_queue *queue,
1191 unsigned int queue_nr)
1192 {
1193 struct cs_etm_queue *etmq = queue->priv;
1194
1195 if (etmq)
1196 return 0;
1197
1198 etmq = cs_etm__alloc_queue();
1199
1200 if (!etmq)
1201 return -ENOMEM;
1202
1203 queue->priv = etmq;
1204 etmq->etm = etm;
1205 etmq->queue_nr = queue_nr;
1206 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1207 etmq->offset = 0;
1208 etmq->sink_id = SINK_UNSET;
1209
1210 return 0;
1211 }
1212
cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace * etm,struct cs_etm_queue * etmq,unsigned int queue_nr)1213 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1214 struct cs_etm_queue *etmq,
1215 unsigned int queue_nr)
1216 {
1217 int ret = 0;
1218 unsigned int cs_queue_nr;
1219 u8 trace_chan_id;
1220 u64 cs_timestamp;
1221
1222 /*
1223 * We are under a CPU-wide trace scenario. As such we need to know
1224 * when the code that generated the traces started to execute so that
1225 * it can be correlated with execution on other CPUs. So we get a
1226 * handle on the beginning of traces and decode until we find a
1227 * timestamp. The timestamp is then added to the auxtrace min heap
1228 * in order to know what nibble (of all the etmqs) to decode first.
1229 */
1230 while (1) {
1231 /*
1232 * Fetch an aux_buffer from this etmq. Bail if no more
1233 * blocks or an error has been encountered.
1234 */
1235 ret = cs_etm__get_data_block(etmq);
1236 if (ret <= 0)
1237 goto out;
1238
1239 /*
1240 * Run decoder on the trace block. The decoder will stop when
1241 * encountering a CS timestamp, a full packet queue or the end of
1242 * trace for that block.
1243 */
1244 ret = cs_etm__decode_data_block(etmq);
1245 if (ret)
1246 goto out;
1247
1248 /*
1249 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1250 * the timestamp calculation for us.
1251 */
1252 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1253
1254 /* We found a timestamp, no need to continue. */
1255 if (cs_timestamp)
1256 break;
1257
1258 /*
1259 * We didn't find a timestamp so empty all the traceid packet
1260 * queues before looking for another timestamp packet, either
1261 * in the current data block or a new one. Packets that were
1262 * just decoded are useless since no timestamp has been
1263 * associated with them. As such simply discard them.
1264 */
1265 cs_etm__clear_all_packet_queues(etmq);
1266 }
1267
1268 /*
1269 * We have a timestamp. Add it to the min heap to reflect when
1270 * instructions conveyed by the range packets of this traceID queue
1271 * started to execute. Once the same has been done for all the traceID
1272 * queues of each etmq, redenring and decoding can start in
1273 * chronological order.
1274 *
1275 * Note that packets decoded above are still in the traceID's packet
1276 * queue and will be processed in cs_etm__process_timestamped_queues().
1277 */
1278 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1279 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1280 out:
1281 return ret;
1282 }
1283
1284 static inline
cs_etm__copy_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1285 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1286 struct cs_etm_traceid_queue *tidq)
1287 {
1288 struct branch_stack *bs_src = tidq->last_branch_rb;
1289 struct branch_stack *bs_dst = tidq->last_branch;
1290 size_t nr = 0;
1291
1292 /*
1293 * Set the number of records before early exit: ->nr is used to
1294 * determine how many branches to copy from ->entries.
1295 */
1296 bs_dst->nr = bs_src->nr;
1297
1298 /*
1299 * Early exit when there is nothing to copy.
1300 */
1301 if (!bs_src->nr)
1302 return;
1303
1304 /*
1305 * As bs_src->entries is a circular buffer, we need to copy from it in
1306 * two steps. First, copy the branches from the most recently inserted
1307 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1308 */
1309 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1310 memcpy(&bs_dst->entries[0],
1311 &bs_src->entries[tidq->last_branch_pos],
1312 sizeof(struct branch_entry) * nr);
1313
1314 /*
1315 * If we wrapped around at least once, the branches from the beginning
1316 * of the bs_src->entries buffer and until the ->last_branch_pos element
1317 * are older valid branches: copy them over. The total number of
1318 * branches copied over will be equal to the number of branches asked by
1319 * the user in last_branch_sz.
1320 */
1321 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1322 memcpy(&bs_dst->entries[nr],
1323 &bs_src->entries[0],
1324 sizeof(struct branch_entry) * tidq->last_branch_pos);
1325 }
1326 }
1327
1328 static inline
cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue * tidq)1329 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1330 {
1331 tidq->last_branch_pos = 0;
1332 tidq->last_branch_rb->nr = 0;
1333 }
1334
cs_etm__t32_instr_size(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 addr)1335 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1336 u8 trace_chan_id, u64 addr)
1337 {
1338 u8 instrBytes[2];
1339
1340 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1341 instrBytes, 0);
1342 /*
1343 * T32 instruction size is indicated by bits[15:11] of the first
1344 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1345 * denote a 32-bit instruction.
1346 */
1347 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1348 }
1349
cs_etm__first_executed_instr(struct cs_etm_packet * packet)1350 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1351 {
1352 /*
1353 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1354 * appear in samples.
1355 */
1356 if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1357 packet->sample_type == CS_ETM_EXCEPTION)
1358 return 0;
1359
1360 return packet->start_addr;
1361 }
1362
1363 static inline
cs_etm__last_executed_instr(const struct cs_etm_packet * packet)1364 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1365 {
1366 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1367 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1368 return 0;
1369
1370 return packet->end_addr - packet->last_instr_size;
1371 }
1372
cs_etm__instr_addr(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,u64 offset)1373 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1374 u64 trace_chan_id,
1375 const struct cs_etm_packet *packet,
1376 u64 offset)
1377 {
1378 if (packet->isa == CS_ETM_ISA_T32) {
1379 u64 addr = packet->start_addr;
1380
1381 while (offset) {
1382 addr += cs_etm__t32_instr_size(etmq,
1383 trace_chan_id, addr);
1384 offset--;
1385 }
1386 return addr;
1387 }
1388
1389 /* Assume a 4 byte instruction size (A32/A64) */
1390 return packet->start_addr + offset * 4;
1391 }
1392
cs_etm__update_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1393 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1394 struct cs_etm_traceid_queue *tidq)
1395 {
1396 struct branch_stack *bs = tidq->last_branch_rb;
1397 struct branch_entry *be;
1398
1399 /*
1400 * The branches are recorded in a circular buffer in reverse
1401 * chronological order: we start recording from the last element of the
1402 * buffer down. After writing the first element of the stack, move the
1403 * insert position back to the end of the buffer.
1404 */
1405 if (!tidq->last_branch_pos)
1406 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1407
1408 tidq->last_branch_pos -= 1;
1409
1410 be = &bs->entries[tidq->last_branch_pos];
1411 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1412 be->to = cs_etm__first_executed_instr(tidq->packet);
1413 /* No support for mispredict */
1414 be->flags.mispred = 0;
1415 be->flags.predicted = 1;
1416
1417 /*
1418 * Increment bs->nr until reaching the number of last branches asked by
1419 * the user on the command line.
1420 */
1421 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1422 bs->nr += 1;
1423 }
1424
cs_etm__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)1425 static int cs_etm__inject_event(union perf_event *event,
1426 struct perf_sample *sample, u64 type)
1427 {
1428 event->header.size = perf_event__sample_event_size(sample, type, 0);
1429 return perf_event__synthesize_sample(event, type, 0, sample);
1430 }
1431
1432
1433 static int
cs_etm__get_trace(struct cs_etm_queue * etmq)1434 cs_etm__get_trace(struct cs_etm_queue *etmq)
1435 {
1436 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1437 struct auxtrace_buffer *old_buffer = aux_buffer;
1438 struct auxtrace_queue *queue;
1439
1440 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1441
1442 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1443
1444 /* If no more data, drop the previous auxtrace_buffer and return */
1445 if (!aux_buffer) {
1446 if (old_buffer)
1447 auxtrace_buffer__drop_data(old_buffer);
1448 etmq->buf_len = 0;
1449 return 0;
1450 }
1451
1452 etmq->buffer = aux_buffer;
1453
1454 /* If the aux_buffer doesn't have data associated, try to load it */
1455 if (!aux_buffer->data) {
1456 /* get the file desc associated with the perf data file */
1457 int fd = perf_data__fd(etmq->etm->session->data);
1458
1459 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1460 if (!aux_buffer->data)
1461 return -ENOMEM;
1462 }
1463
1464 /* If valid, drop the previous buffer */
1465 if (old_buffer)
1466 auxtrace_buffer__drop_data(old_buffer);
1467
1468 etmq->buf_used = 0;
1469 etmq->buf_len = aux_buffer->size;
1470 etmq->buf = aux_buffer->data;
1471
1472 return etmq->buf_len;
1473 }
1474
cs_etm__set_thread(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,pid_t tid,ocsd_ex_level el)1475 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1476 struct cs_etm_traceid_queue *tidq, pid_t tid,
1477 ocsd_ex_level el)
1478 {
1479 struct machine *machine = cs_etm__get_machine(etmq, el);
1480
1481 if (tid != -1) {
1482 thread__zput(tidq->thread);
1483 tidq->thread = machine__find_thread(machine, -1, tid);
1484 }
1485
1486 /* Couldn't find a known thread */
1487 if (!tidq->thread)
1488 tidq->thread = machine__idle_thread(machine);
1489
1490 tidq->el = el;
1491 }
1492
cs_etm__etmq_set_tid_el(struct cs_etm_queue * etmq,pid_t tid,u8 trace_chan_id,ocsd_ex_level el)1493 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1494 u8 trace_chan_id, ocsd_ex_level el)
1495 {
1496 struct cs_etm_traceid_queue *tidq;
1497
1498 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1499 if (!tidq)
1500 return -EINVAL;
1501
1502 cs_etm__set_thread(etmq, tidq, tid, el);
1503 return 0;
1504 }
1505
cs_etm__etmq_is_timeless(struct cs_etm_queue * etmq)1506 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1507 {
1508 return !!etmq->etm->timeless_decoding;
1509 }
1510
cs_etm__copy_insn(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,struct perf_sample * sample)1511 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1512 u64 trace_chan_id,
1513 const struct cs_etm_packet *packet,
1514 struct perf_sample *sample)
1515 {
1516 /*
1517 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1518 * packet, so directly bail out with 'insn_len' = 0.
1519 */
1520 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1521 sample->insn_len = 0;
1522 return;
1523 }
1524
1525 /*
1526 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1527 * cs_etm__t32_instr_size().
1528 */
1529 if (packet->isa == CS_ETM_ISA_T32)
1530 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1531 sample->ip);
1532 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1533 else
1534 sample->insn_len = 4;
1535
1536 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1537 (void *)sample->insn, 0);
1538 }
1539
cs_etm__convert_sample_time(struct cs_etm_queue * etmq,u64 cs_timestamp)1540 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1541 {
1542 struct cs_etm_auxtrace *etm = etmq->etm;
1543
1544 if (etm->has_virtual_ts)
1545 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1546 else
1547 return cs_timestamp;
1548 }
1549
cs_etm__resolve_sample_time(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1550 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1551 struct cs_etm_traceid_queue *tidq)
1552 {
1553 struct cs_etm_auxtrace *etm = etmq->etm;
1554 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1555
1556 if (!etm->timeless_decoding && etm->has_virtual_ts)
1557 return packet_queue->cs_timestamp;
1558 else
1559 return etm->latest_kernel_timestamp;
1560 }
1561
cs_etm__synth_instruction_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 addr,u64 period)1562 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1563 struct cs_etm_traceid_queue *tidq,
1564 u64 addr, u64 period)
1565 {
1566 int ret = 0;
1567 struct cs_etm_auxtrace *etm = etmq->etm;
1568 union perf_event *event = tidq->event_buf;
1569 struct perf_sample sample;
1570
1571 perf_sample__init(&sample, /*all=*/true);
1572 event->sample.header.type = PERF_RECORD_SAMPLE;
1573 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1574 event->sample.header.size = sizeof(struct perf_event_header);
1575
1576 /* Set time field based on etm auxtrace config. */
1577 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1578
1579 sample.ip = addr;
1580 sample.pid = thread__pid(tidq->thread);
1581 sample.tid = thread__tid(tidq->thread);
1582 sample.id = etmq->etm->instructions_id;
1583 sample.stream_id = etmq->etm->instructions_id;
1584 sample.period = period;
1585 sample.cpu = tidq->packet->cpu;
1586 sample.flags = tidq->prev_packet->flags;
1587 sample.cpumode = event->sample.header.misc;
1588
1589 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1590
1591 if (etm->synth_opts.last_branch)
1592 sample.branch_stack = tidq->last_branch;
1593
1594 if (etm->synth_opts.inject) {
1595 ret = cs_etm__inject_event(event, &sample,
1596 etm->instructions_sample_type);
1597 if (ret)
1598 return ret;
1599 }
1600
1601 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1602
1603 if (ret)
1604 pr_err(
1605 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1606 ret);
1607
1608 perf_sample__exit(&sample);
1609 return ret;
1610 }
1611
1612 /*
1613 * The cs etm packet encodes an instruction range between a branch target
1614 * and the next taken branch. Generate sample accordingly.
1615 */
cs_etm__synth_branch_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1616 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1617 struct cs_etm_traceid_queue *tidq)
1618 {
1619 int ret = 0;
1620 struct cs_etm_auxtrace *etm = etmq->etm;
1621 struct perf_sample sample = {.ip = 0,};
1622 union perf_event *event = tidq->event_buf;
1623 struct dummy_branch_stack {
1624 u64 nr;
1625 u64 hw_idx;
1626 struct branch_entry entries;
1627 } dummy_bs;
1628 u64 ip;
1629
1630 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1631
1632 event->sample.header.type = PERF_RECORD_SAMPLE;
1633 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1634 tidq->prev_packet_el);
1635 event->sample.header.size = sizeof(struct perf_event_header);
1636
1637 /* Set time field based on etm auxtrace config. */
1638 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1639
1640 sample.ip = ip;
1641 sample.pid = thread__pid(tidq->prev_packet_thread);
1642 sample.tid = thread__tid(tidq->prev_packet_thread);
1643 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1644 sample.id = etmq->etm->branches_id;
1645 sample.stream_id = etmq->etm->branches_id;
1646 sample.period = 1;
1647 sample.cpu = tidq->packet->cpu;
1648 sample.flags = tidq->prev_packet->flags;
1649 sample.cpumode = event->sample.header.misc;
1650
1651 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1652 &sample);
1653
1654 /*
1655 * perf report cannot handle events without a branch stack
1656 */
1657 if (etm->synth_opts.last_branch) {
1658 dummy_bs = (struct dummy_branch_stack){
1659 .nr = 1,
1660 .hw_idx = -1ULL,
1661 .entries = {
1662 .from = sample.ip,
1663 .to = sample.addr,
1664 },
1665 };
1666 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1667 }
1668
1669 if (etm->synth_opts.inject) {
1670 ret = cs_etm__inject_event(event, &sample,
1671 etm->branches_sample_type);
1672 if (ret)
1673 return ret;
1674 }
1675
1676 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1677
1678 if (ret)
1679 pr_err(
1680 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1681 ret);
1682
1683 return ret;
1684 }
1685
cs_etm__synth_events(struct cs_etm_auxtrace * etm,struct perf_session * session)1686 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1687 struct perf_session *session)
1688 {
1689 struct evlist *evlist = session->evlist;
1690 struct evsel *evsel;
1691 struct perf_event_attr attr;
1692 bool found = false;
1693 u64 id;
1694 int err;
1695
1696 evlist__for_each_entry(evlist, evsel) {
1697 if (evsel->core.attr.type == etm->pmu_type) {
1698 found = true;
1699 break;
1700 }
1701 }
1702
1703 if (!found) {
1704 pr_debug("No selected events with CoreSight Trace data\n");
1705 return 0;
1706 }
1707
1708 memset(&attr, 0, sizeof(struct perf_event_attr));
1709 attr.size = sizeof(struct perf_event_attr);
1710 attr.type = PERF_TYPE_HARDWARE;
1711 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1712 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1713 PERF_SAMPLE_PERIOD;
1714 if (etm->timeless_decoding)
1715 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1716 else
1717 attr.sample_type |= PERF_SAMPLE_TIME;
1718
1719 attr.exclude_user = evsel->core.attr.exclude_user;
1720 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1721 attr.exclude_hv = evsel->core.attr.exclude_hv;
1722 attr.exclude_host = evsel->core.attr.exclude_host;
1723 attr.exclude_guest = evsel->core.attr.exclude_guest;
1724 attr.sample_id_all = evsel->core.attr.sample_id_all;
1725 attr.read_format = evsel->core.attr.read_format;
1726
1727 /* create new id val to be a fixed offset from evsel id */
1728 id = auxtrace_synth_id_range_start(evsel);
1729
1730 if (etm->synth_opts.branches) {
1731 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1732 attr.sample_period = 1;
1733 attr.sample_type |= PERF_SAMPLE_ADDR;
1734 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1735 if (err)
1736 return err;
1737 etm->branches_sample_type = attr.sample_type;
1738 etm->branches_id = id;
1739 id += 1;
1740 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1741 }
1742
1743 if (etm->synth_opts.last_branch) {
1744 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1745 /*
1746 * We don't use the hardware index, but the sample generation
1747 * code uses the new format branch_stack with this field,
1748 * so the event attributes must indicate that it's present.
1749 */
1750 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1751 }
1752
1753 if (etm->synth_opts.instructions) {
1754 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1755 attr.sample_period = etm->synth_opts.period;
1756 etm->instructions_sample_period = attr.sample_period;
1757 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1758 if (err)
1759 return err;
1760 etm->instructions_sample_type = attr.sample_type;
1761 etm->instructions_id = id;
1762 id += 1;
1763 }
1764
1765 return 0;
1766 }
1767
cs_etm__sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1768 static int cs_etm__sample(struct cs_etm_queue *etmq,
1769 struct cs_etm_traceid_queue *tidq)
1770 {
1771 struct cs_etm_auxtrace *etm = etmq->etm;
1772 int ret;
1773 u8 trace_chan_id = tidq->trace_chan_id;
1774 u64 instrs_prev;
1775
1776 /* Get instructions remainder from previous packet */
1777 instrs_prev = tidq->period_instructions;
1778
1779 tidq->period_instructions += tidq->packet->instr_count;
1780
1781 /*
1782 * Record a branch when the last instruction in
1783 * PREV_PACKET is a branch.
1784 */
1785 if (etm->synth_opts.last_branch &&
1786 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1787 tidq->prev_packet->last_instr_taken_branch)
1788 cs_etm__update_last_branch_rb(etmq, tidq);
1789
1790 if (etm->synth_opts.instructions &&
1791 tidq->period_instructions >= etm->instructions_sample_period) {
1792 /*
1793 * Emit instruction sample periodically
1794 * TODO: allow period to be defined in cycles and clock time
1795 */
1796
1797 /*
1798 * Below diagram demonstrates the instruction samples
1799 * generation flows:
1800 *
1801 * Instrs Instrs Instrs Instrs
1802 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1803 * | | | |
1804 * V V V V
1805 * --------------------------------------------------
1806 * ^ ^
1807 * | |
1808 * Period Period
1809 * instructions(Pi) instructions(Pi')
1810 *
1811 * | |
1812 * \---------------- -----------------/
1813 * V
1814 * tidq->packet->instr_count
1815 *
1816 * Instrs Sample(n...) are the synthesised samples occurring
1817 * every etm->instructions_sample_period instructions - as
1818 * defined on the perf command line. Sample(n) is being the
1819 * last sample before the current etm packet, n+1 to n+3
1820 * samples are generated from the current etm packet.
1821 *
1822 * tidq->packet->instr_count represents the number of
1823 * instructions in the current etm packet.
1824 *
1825 * Period instructions (Pi) contains the number of
1826 * instructions executed after the sample point(n) from the
1827 * previous etm packet. This will always be less than
1828 * etm->instructions_sample_period.
1829 *
1830 * When generate new samples, it combines with two parts
1831 * instructions, one is the tail of the old packet and another
1832 * is the head of the new coming packet, to generate
1833 * sample(n+1); sample(n+2) and sample(n+3) consume the
1834 * instructions with sample period. After sample(n+3), the rest
1835 * instructions will be used by later packet and it is assigned
1836 * to tidq->period_instructions for next round calculation.
1837 */
1838
1839 /*
1840 * Get the initial offset into the current packet instructions;
1841 * entry conditions ensure that instrs_prev is less than
1842 * etm->instructions_sample_period.
1843 */
1844 u64 offset = etm->instructions_sample_period - instrs_prev;
1845 u64 addr;
1846
1847 /* Prepare last branches for instruction sample */
1848 if (etm->synth_opts.last_branch)
1849 cs_etm__copy_last_branch_rb(etmq, tidq);
1850
1851 while (tidq->period_instructions >=
1852 etm->instructions_sample_period) {
1853 /*
1854 * Calculate the address of the sampled instruction (-1
1855 * as sample is reported as though instruction has just
1856 * been executed, but PC has not advanced to next
1857 * instruction)
1858 */
1859 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1860 tidq->packet, offset - 1);
1861 ret = cs_etm__synth_instruction_sample(
1862 etmq, tidq, addr,
1863 etm->instructions_sample_period);
1864 if (ret)
1865 return ret;
1866
1867 offset += etm->instructions_sample_period;
1868 tidq->period_instructions -=
1869 etm->instructions_sample_period;
1870 }
1871 }
1872
1873 if (etm->synth_opts.branches) {
1874 bool generate_sample = false;
1875
1876 /* Generate sample for tracing on packet */
1877 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1878 generate_sample = true;
1879
1880 /* Generate sample for branch taken packet */
1881 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1882 tidq->prev_packet->last_instr_taken_branch)
1883 generate_sample = true;
1884
1885 if (generate_sample) {
1886 ret = cs_etm__synth_branch_sample(etmq, tidq);
1887 if (ret)
1888 return ret;
1889 }
1890 }
1891
1892 cs_etm__packet_swap(etm, tidq);
1893
1894 return 0;
1895 }
1896
cs_etm__exception(struct cs_etm_traceid_queue * tidq)1897 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1898 {
1899 /*
1900 * When the exception packet is inserted, whether the last instruction
1901 * in previous range packet is taken branch or not, we need to force
1902 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1903 * to generate branch sample for the instruction range before the
1904 * exception is trapped to kernel or before the exception returning.
1905 *
1906 * The exception packet includes the dummy address values, so don't
1907 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1908 * for generating instruction and branch samples.
1909 */
1910 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1911 tidq->prev_packet->last_instr_taken_branch = true;
1912
1913 return 0;
1914 }
1915
cs_etm__flush(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1916 static int cs_etm__flush(struct cs_etm_queue *etmq,
1917 struct cs_etm_traceid_queue *tidq)
1918 {
1919 int err = 0;
1920 struct cs_etm_auxtrace *etm = etmq->etm;
1921
1922 /* Handle start tracing packet */
1923 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1924 goto swap_packet;
1925
1926 if (etmq->etm->synth_opts.last_branch &&
1927 etmq->etm->synth_opts.instructions &&
1928 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1929 u64 addr;
1930
1931 /* Prepare last branches for instruction sample */
1932 cs_etm__copy_last_branch_rb(etmq, tidq);
1933
1934 /*
1935 * Generate a last branch event for the branches left in the
1936 * circular buffer at the end of the trace.
1937 *
1938 * Use the address of the end of the last reported execution
1939 * range
1940 */
1941 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1942
1943 err = cs_etm__synth_instruction_sample(
1944 etmq, tidq, addr,
1945 tidq->period_instructions);
1946 if (err)
1947 return err;
1948
1949 tidq->period_instructions = 0;
1950
1951 }
1952
1953 if (etm->synth_opts.branches &&
1954 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1955 err = cs_etm__synth_branch_sample(etmq, tidq);
1956 if (err)
1957 return err;
1958 }
1959
1960 swap_packet:
1961 cs_etm__packet_swap(etm, tidq);
1962
1963 /* Reset last branches after flush the trace */
1964 if (etm->synth_opts.last_branch)
1965 cs_etm__reset_last_branch_rb(tidq);
1966
1967 return err;
1968 }
1969
cs_etm__end_block(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1970 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1971 struct cs_etm_traceid_queue *tidq)
1972 {
1973 int err;
1974
1975 /*
1976 * It has no new packet coming and 'etmq->packet' contains the stale
1977 * packet which was set at the previous time with packets swapping;
1978 * so skip to generate branch sample to avoid stale packet.
1979 *
1980 * For this case only flush branch stack and generate a last branch
1981 * event for the branches left in the circular buffer at the end of
1982 * the trace.
1983 */
1984 if (etmq->etm->synth_opts.last_branch &&
1985 etmq->etm->synth_opts.instructions &&
1986 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1987 u64 addr;
1988
1989 /* Prepare last branches for instruction sample */
1990 cs_etm__copy_last_branch_rb(etmq, tidq);
1991
1992 /*
1993 * Use the address of the end of the last reported execution
1994 * range.
1995 */
1996 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1997
1998 err = cs_etm__synth_instruction_sample(
1999 etmq, tidq, addr,
2000 tidq->period_instructions);
2001 if (err)
2002 return err;
2003
2004 tidq->period_instructions = 0;
2005 }
2006
2007 return 0;
2008 }
2009 /*
2010 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2011 * if need be.
2012 * Returns: < 0 if error
2013 * = 0 if no more auxtrace_buffer to read
2014 * > 0 if the current buffer isn't empty yet
2015 */
cs_etm__get_data_block(struct cs_etm_queue * etmq)2016 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2017 {
2018 int ret;
2019
2020 if (!etmq->buf_len) {
2021 ret = cs_etm__get_trace(etmq);
2022 if (ret <= 0)
2023 return ret;
2024 /*
2025 * We cannot assume consecutive blocks in the data file
2026 * are contiguous, reset the decoder to force re-sync.
2027 */
2028 ret = cs_etm_decoder__reset(etmq->decoder);
2029 if (ret)
2030 return ret;
2031 }
2032
2033 return etmq->buf_len;
2034 }
2035
cs_etm__is_svc_instr(struct cs_etm_queue * etmq,u8 trace_chan_id,struct cs_etm_packet * packet,u64 end_addr)2036 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2037 struct cs_etm_packet *packet,
2038 u64 end_addr)
2039 {
2040 /* Initialise to keep compiler happy */
2041 u16 instr16 = 0;
2042 u32 instr32 = 0;
2043 u64 addr;
2044
2045 switch (packet->isa) {
2046 case CS_ETM_ISA_T32:
2047 /*
2048 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2049 *
2050 * b'15 b'8
2051 * +-----------------+--------+
2052 * | 1 1 0 1 1 1 1 1 | imm8 |
2053 * +-----------------+--------+
2054 *
2055 * According to the specification, it only defines SVC for T32
2056 * with 16 bits instruction and has no definition for 32bits;
2057 * so below only read 2 bytes as instruction size for T32.
2058 */
2059 addr = end_addr - 2;
2060 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2061 (u8 *)&instr16, 0);
2062 if ((instr16 & 0xFF00) == 0xDF00)
2063 return true;
2064
2065 break;
2066 case CS_ETM_ISA_A32:
2067 /*
2068 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2069 *
2070 * b'31 b'28 b'27 b'24
2071 * +---------+---------+-------------------------+
2072 * | !1111 | 1 1 1 1 | imm24 |
2073 * +---------+---------+-------------------------+
2074 */
2075 addr = end_addr - 4;
2076 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2077 (u8 *)&instr32, 0);
2078 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2079 (instr32 & 0xF0000000) != 0xF0000000)
2080 return true;
2081
2082 break;
2083 case CS_ETM_ISA_A64:
2084 /*
2085 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2086 *
2087 * b'31 b'21 b'4 b'0
2088 * +-----------------------+---------+-----------+
2089 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2090 * +-----------------------+---------+-----------+
2091 */
2092 addr = end_addr - 4;
2093 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2094 (u8 *)&instr32, 0);
2095 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2096 return true;
2097
2098 break;
2099 case CS_ETM_ISA_UNKNOWN:
2100 default:
2101 break;
2102 }
2103
2104 return false;
2105 }
2106
cs_etm__is_syscall(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2107 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2108 struct cs_etm_traceid_queue *tidq, u64 magic)
2109 {
2110 u8 trace_chan_id = tidq->trace_chan_id;
2111 struct cs_etm_packet *packet = tidq->packet;
2112 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2113
2114 if (magic == __perf_cs_etmv3_magic)
2115 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2116 return true;
2117
2118 /*
2119 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2120 * HVC cases; need to check if it's SVC instruction based on
2121 * packet address.
2122 */
2123 if (magic == __perf_cs_etmv4_magic) {
2124 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2125 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2126 prev_packet->end_addr))
2127 return true;
2128 }
2129
2130 return false;
2131 }
2132
cs_etm__is_async_exception(struct cs_etm_traceid_queue * tidq,u64 magic)2133 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2134 u64 magic)
2135 {
2136 struct cs_etm_packet *packet = tidq->packet;
2137
2138 if (magic == __perf_cs_etmv3_magic)
2139 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2140 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2141 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2142 packet->exception_number == CS_ETMV3_EXC_IRQ ||
2143 packet->exception_number == CS_ETMV3_EXC_FIQ)
2144 return true;
2145
2146 if (magic == __perf_cs_etmv4_magic)
2147 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2148 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2149 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2150 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2151 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2152 packet->exception_number == CS_ETMV4_EXC_IRQ ||
2153 packet->exception_number == CS_ETMV4_EXC_FIQ)
2154 return true;
2155
2156 return false;
2157 }
2158
cs_etm__is_sync_exception(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2159 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2160 struct cs_etm_traceid_queue *tidq,
2161 u64 magic)
2162 {
2163 u8 trace_chan_id = tidq->trace_chan_id;
2164 struct cs_etm_packet *packet = tidq->packet;
2165 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2166
2167 if (magic == __perf_cs_etmv3_magic)
2168 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2169 packet->exception_number == CS_ETMV3_EXC_HYP ||
2170 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2171 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2172 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2173 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2174 packet->exception_number == CS_ETMV3_EXC_GENERIC)
2175 return true;
2176
2177 if (magic == __perf_cs_etmv4_magic) {
2178 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2179 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2180 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2181 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2182 return true;
2183
2184 /*
2185 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2186 * (SMC, HVC) are taken as sync exceptions.
2187 */
2188 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2189 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2190 prev_packet->end_addr))
2191 return true;
2192
2193 /*
2194 * ETMv4 has 5 bits for exception number; if the numbers
2195 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2196 * they are implementation defined exceptions.
2197 *
2198 * For this case, simply take it as sync exception.
2199 */
2200 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2201 packet->exception_number <= CS_ETMV4_EXC_END)
2202 return true;
2203 }
2204
2205 return false;
2206 }
2207
cs_etm__set_sample_flags(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2208 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2209 struct cs_etm_traceid_queue *tidq)
2210 {
2211 struct cs_etm_packet *packet = tidq->packet;
2212 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2213 u8 trace_chan_id = tidq->trace_chan_id;
2214 u64 magic;
2215 int ret;
2216
2217 switch (packet->sample_type) {
2218 case CS_ETM_RANGE:
2219 /*
2220 * Immediate branch instruction without neither link nor
2221 * return flag, it's normal branch instruction within
2222 * the function.
2223 */
2224 if (packet->last_instr_type == OCSD_INSTR_BR &&
2225 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2226 packet->flags = PERF_IP_FLAG_BRANCH;
2227
2228 if (packet->last_instr_cond)
2229 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2230 }
2231
2232 /*
2233 * Immediate branch instruction with link (e.g. BL), this is
2234 * branch instruction for function call.
2235 */
2236 if (packet->last_instr_type == OCSD_INSTR_BR &&
2237 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2238 packet->flags = PERF_IP_FLAG_BRANCH |
2239 PERF_IP_FLAG_CALL;
2240
2241 /*
2242 * Indirect branch instruction with link (e.g. BLR), this is
2243 * branch instruction for function call.
2244 */
2245 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2246 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2247 packet->flags = PERF_IP_FLAG_BRANCH |
2248 PERF_IP_FLAG_CALL;
2249
2250 /*
2251 * Indirect branch instruction with subtype of
2252 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2253 * function return for A32/T32.
2254 */
2255 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2256 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2257 packet->flags = PERF_IP_FLAG_BRANCH |
2258 PERF_IP_FLAG_RETURN;
2259
2260 /*
2261 * Indirect branch instruction without link (e.g. BR), usually
2262 * this is used for function return, especially for functions
2263 * within dynamic link lib.
2264 */
2265 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2266 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2267 packet->flags = PERF_IP_FLAG_BRANCH |
2268 PERF_IP_FLAG_RETURN;
2269
2270 /* Return instruction for function return. */
2271 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2272 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2273 packet->flags = PERF_IP_FLAG_BRANCH |
2274 PERF_IP_FLAG_RETURN;
2275
2276 /*
2277 * Decoder might insert a discontinuity in the middle of
2278 * instruction packets, fixup prev_packet with flag
2279 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2280 */
2281 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2282 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2283 PERF_IP_FLAG_TRACE_BEGIN;
2284
2285 /*
2286 * If the previous packet is an exception return packet
2287 * and the return address just follows SVC instruction,
2288 * it needs to calibrate the previous packet sample flags
2289 * as PERF_IP_FLAG_SYSCALLRET.
2290 */
2291 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2292 PERF_IP_FLAG_RETURN |
2293 PERF_IP_FLAG_INTERRUPT) &&
2294 cs_etm__is_svc_instr(etmq, trace_chan_id,
2295 packet, packet->start_addr))
2296 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2297 PERF_IP_FLAG_RETURN |
2298 PERF_IP_FLAG_SYSCALLRET;
2299 break;
2300 case CS_ETM_DISCONTINUITY:
2301 /*
2302 * The trace is discontinuous, if the previous packet is
2303 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2304 * for previous packet.
2305 */
2306 if (prev_packet->sample_type == CS_ETM_RANGE)
2307 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2308 PERF_IP_FLAG_TRACE_END;
2309 break;
2310 case CS_ETM_EXCEPTION:
2311 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2312 if (ret)
2313 return ret;
2314
2315 /* The exception is for system call. */
2316 if (cs_etm__is_syscall(etmq, tidq, magic))
2317 packet->flags = PERF_IP_FLAG_BRANCH |
2318 PERF_IP_FLAG_CALL |
2319 PERF_IP_FLAG_SYSCALLRET;
2320 /*
2321 * The exceptions are triggered by external signals from bus,
2322 * interrupt controller, debug module, PE reset or halt.
2323 */
2324 else if (cs_etm__is_async_exception(tidq, magic))
2325 packet->flags = PERF_IP_FLAG_BRANCH |
2326 PERF_IP_FLAG_CALL |
2327 PERF_IP_FLAG_ASYNC |
2328 PERF_IP_FLAG_INTERRUPT;
2329 /*
2330 * Otherwise, exception is caused by trap, instruction &
2331 * data fault, or alignment errors.
2332 */
2333 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2334 packet->flags = PERF_IP_FLAG_BRANCH |
2335 PERF_IP_FLAG_CALL |
2336 PERF_IP_FLAG_INTERRUPT;
2337
2338 /*
2339 * When the exception packet is inserted, since exception
2340 * packet is not used standalone for generating samples
2341 * and it's affiliation to the previous instruction range
2342 * packet; so set previous range packet flags to tell perf
2343 * it is an exception taken branch.
2344 */
2345 if (prev_packet->sample_type == CS_ETM_RANGE)
2346 prev_packet->flags = packet->flags;
2347 break;
2348 case CS_ETM_EXCEPTION_RET:
2349 /*
2350 * When the exception return packet is inserted, since
2351 * exception return packet is not used standalone for
2352 * generating samples and it's affiliation to the previous
2353 * instruction range packet; so set previous range packet
2354 * flags to tell perf it is an exception return branch.
2355 *
2356 * The exception return can be for either system call or
2357 * other exception types; unfortunately the packet doesn't
2358 * contain exception type related info so we cannot decide
2359 * the exception type purely based on exception return packet.
2360 * If we record the exception number from exception packet and
2361 * reuse it for exception return packet, this is not reliable
2362 * due the trace can be discontinuity or the interrupt can
2363 * be nested, thus the recorded exception number cannot be
2364 * used for exception return packet for these two cases.
2365 *
2366 * For exception return packet, we only need to distinguish the
2367 * packet is for system call or for other types. Thus the
2368 * decision can be deferred when receive the next packet which
2369 * contains the return address, based on the return address we
2370 * can read out the previous instruction and check if it's a
2371 * system call instruction and then calibrate the sample flag
2372 * as needed.
2373 */
2374 if (prev_packet->sample_type == CS_ETM_RANGE)
2375 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2376 PERF_IP_FLAG_RETURN |
2377 PERF_IP_FLAG_INTERRUPT;
2378 break;
2379 case CS_ETM_EMPTY:
2380 default:
2381 break;
2382 }
2383
2384 return 0;
2385 }
2386
cs_etm__decode_data_block(struct cs_etm_queue * etmq)2387 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2388 {
2389 int ret = 0;
2390 size_t processed = 0;
2391
2392 /*
2393 * Packets are decoded and added to the decoder's packet queue
2394 * until the decoder packet processing callback has requested that
2395 * processing stops or there is nothing left in the buffer. Normal
2396 * operations that stop processing are a timestamp packet or a full
2397 * decoder buffer queue.
2398 */
2399 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2400 etmq->offset,
2401 &etmq->buf[etmq->buf_used],
2402 etmq->buf_len,
2403 &processed);
2404 if (ret)
2405 goto out;
2406
2407 etmq->offset += processed;
2408 etmq->buf_used += processed;
2409 etmq->buf_len -= processed;
2410
2411 out:
2412 return ret;
2413 }
2414
cs_etm__process_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2415 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2416 struct cs_etm_traceid_queue *tidq)
2417 {
2418 int ret;
2419 struct cs_etm_packet_queue *packet_queue;
2420
2421 packet_queue = &tidq->packet_queue;
2422
2423 /* Process each packet in this chunk */
2424 while (1) {
2425 ret = cs_etm_decoder__get_packet(packet_queue,
2426 tidq->packet);
2427 if (ret <= 0)
2428 /*
2429 * Stop processing this chunk on
2430 * end of data or error
2431 */
2432 break;
2433
2434 /*
2435 * Since packet addresses are swapped in packet
2436 * handling within below switch() statements,
2437 * thus setting sample flags must be called
2438 * prior to switch() statement to use address
2439 * information before packets swapping.
2440 */
2441 ret = cs_etm__set_sample_flags(etmq, tidq);
2442 if (ret < 0)
2443 break;
2444
2445 switch (tidq->packet->sample_type) {
2446 case CS_ETM_RANGE:
2447 /*
2448 * If the packet contains an instruction
2449 * range, generate instruction sequence
2450 * events.
2451 */
2452 cs_etm__sample(etmq, tidq);
2453 break;
2454 case CS_ETM_EXCEPTION:
2455 case CS_ETM_EXCEPTION_RET:
2456 /*
2457 * If the exception packet is coming,
2458 * make sure the previous instruction
2459 * range packet to be handled properly.
2460 */
2461 cs_etm__exception(tidq);
2462 break;
2463 case CS_ETM_DISCONTINUITY:
2464 /*
2465 * Discontinuity in trace, flush
2466 * previous branch stack
2467 */
2468 cs_etm__flush(etmq, tidq);
2469 break;
2470 case CS_ETM_EMPTY:
2471 /*
2472 * Should not receive empty packet,
2473 * report error.
2474 */
2475 pr_err("CS ETM Trace: empty packet\n");
2476 return -EINVAL;
2477 default:
2478 break;
2479 }
2480 }
2481
2482 return ret;
2483 }
2484
cs_etm__clear_all_traceid_queues(struct cs_etm_queue * etmq)2485 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2486 {
2487 int idx;
2488 struct int_node *inode;
2489 struct cs_etm_traceid_queue *tidq;
2490 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2491
2492 intlist__for_each_entry(inode, traceid_queues_list) {
2493 idx = (int)(intptr_t)inode->priv;
2494 tidq = etmq->traceid_queues[idx];
2495
2496 /* Ignore return value */
2497 cs_etm__process_traceid_queue(etmq, tidq);
2498 }
2499 }
2500
cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue * etmq)2501 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2502 {
2503 int err = 0;
2504 struct cs_etm_traceid_queue *tidq;
2505
2506 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2507 if (!tidq)
2508 return -EINVAL;
2509
2510 /* Go through each buffer in the queue and decode them one by one */
2511 while (1) {
2512 err = cs_etm__get_data_block(etmq);
2513 if (err <= 0)
2514 return err;
2515
2516 /* Run trace decoder until buffer consumed or end of trace */
2517 do {
2518 err = cs_etm__decode_data_block(etmq);
2519 if (err)
2520 return err;
2521
2522 /*
2523 * Process each packet in this chunk, nothing to do if
2524 * an error occurs other than hoping the next one will
2525 * be better.
2526 */
2527 err = cs_etm__process_traceid_queue(etmq, tidq);
2528
2529 } while (etmq->buf_len);
2530
2531 if (err == 0)
2532 /* Flush any remaining branch stack entries */
2533 err = cs_etm__end_block(etmq, tidq);
2534 }
2535
2536 return err;
2537 }
2538
cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue * etmq)2539 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2540 {
2541 int idx, err = 0;
2542 struct cs_etm_traceid_queue *tidq;
2543 struct int_node *inode;
2544
2545 /* Go through each buffer in the queue and decode them one by one */
2546 while (1) {
2547 err = cs_etm__get_data_block(etmq);
2548 if (err <= 0)
2549 return err;
2550
2551 /* Run trace decoder until buffer consumed or end of trace */
2552 do {
2553 err = cs_etm__decode_data_block(etmq);
2554 if (err)
2555 return err;
2556
2557 /*
2558 * cs_etm__run_per_thread_timeless_decoder() runs on a
2559 * single traceID queue because each TID has a separate
2560 * buffer. But here in per-cpu mode we need to iterate
2561 * over each channel instead.
2562 */
2563 intlist__for_each_entry(inode,
2564 etmq->traceid_queues_list) {
2565 idx = (int)(intptr_t)inode->priv;
2566 tidq = etmq->traceid_queues[idx];
2567 cs_etm__process_traceid_queue(etmq, tidq);
2568 }
2569 } while (etmq->buf_len);
2570
2571 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2572 idx = (int)(intptr_t)inode->priv;
2573 tidq = etmq->traceid_queues[idx];
2574 /* Flush any remaining branch stack entries */
2575 err = cs_etm__end_block(etmq, tidq);
2576 if (err)
2577 return err;
2578 }
2579 }
2580
2581 return err;
2582 }
2583
cs_etm__process_timeless_queues(struct cs_etm_auxtrace * etm,pid_t tid)2584 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2585 pid_t tid)
2586 {
2587 unsigned int i;
2588 struct auxtrace_queues *queues = &etm->queues;
2589
2590 for (i = 0; i < queues->nr_queues; i++) {
2591 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2592 struct cs_etm_queue *etmq = queue->priv;
2593 struct cs_etm_traceid_queue *tidq;
2594
2595 if (!etmq)
2596 continue;
2597
2598 if (etm->per_thread_decoding) {
2599 tidq = cs_etm__etmq_get_traceid_queue(
2600 etmq, CS_ETM_PER_THREAD_TRACEID);
2601
2602 if (!tidq)
2603 continue;
2604
2605 if (tid == -1 || thread__tid(tidq->thread) == tid)
2606 cs_etm__run_per_thread_timeless_decoder(etmq);
2607 } else
2608 cs_etm__run_per_cpu_timeless_decoder(etmq);
2609 }
2610
2611 return 0;
2612 }
2613
cs_etm__process_timestamped_queues(struct cs_etm_auxtrace * etm)2614 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2615 {
2616 int ret = 0;
2617 unsigned int cs_queue_nr, queue_nr, i;
2618 u8 trace_chan_id;
2619 u64 cs_timestamp;
2620 struct auxtrace_queue *queue;
2621 struct cs_etm_queue *etmq;
2622 struct cs_etm_traceid_queue *tidq;
2623
2624 /*
2625 * Pre-populate the heap with one entry from each queue so that we can
2626 * start processing in time order across all queues.
2627 */
2628 for (i = 0; i < etm->queues.nr_queues; i++) {
2629 etmq = etm->queues.queue_array[i].priv;
2630 if (!etmq)
2631 continue;
2632
2633 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2634 if (ret)
2635 return ret;
2636 }
2637
2638 while (1) {
2639 if (!etm->heap.heap_cnt)
2640 break;
2641
2642 /* Take the entry at the top of the min heap */
2643 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2644 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2645 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2646 queue = &etm->queues.queue_array[queue_nr];
2647 etmq = queue->priv;
2648
2649 /*
2650 * Remove the top entry from the heap since we are about
2651 * to process it.
2652 */
2653 auxtrace_heap__pop(&etm->heap);
2654
2655 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2656 if (!tidq) {
2657 /*
2658 * No traceID queue has been allocated for this traceID,
2659 * which means something somewhere went very wrong. No
2660 * other choice than simply exit.
2661 */
2662 ret = -EINVAL;
2663 goto out;
2664 }
2665
2666 /*
2667 * Packets associated with this timestamp are already in
2668 * the etmq's traceID queue, so process them.
2669 */
2670 ret = cs_etm__process_traceid_queue(etmq, tidq);
2671 if (ret < 0)
2672 goto out;
2673
2674 /*
2675 * Packets for this timestamp have been processed, time to
2676 * move on to the next timestamp, fetching a new auxtrace_buffer
2677 * if need be.
2678 */
2679 refetch:
2680 ret = cs_etm__get_data_block(etmq);
2681 if (ret < 0)
2682 goto out;
2683
2684 /*
2685 * No more auxtrace_buffers to process in this etmq, simply
2686 * move on to another entry in the auxtrace_heap.
2687 */
2688 if (!ret)
2689 continue;
2690
2691 ret = cs_etm__decode_data_block(etmq);
2692 if (ret)
2693 goto out;
2694
2695 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2696
2697 if (!cs_timestamp) {
2698 /*
2699 * Function cs_etm__decode_data_block() returns when
2700 * there is no more traces to decode in the current
2701 * auxtrace_buffer OR when a timestamp has been
2702 * encountered on any of the traceID queues. Since we
2703 * did not get a timestamp, there is no more traces to
2704 * process in this auxtrace_buffer. As such empty and
2705 * flush all traceID queues.
2706 */
2707 cs_etm__clear_all_traceid_queues(etmq);
2708
2709 /* Fetch another auxtrace_buffer for this etmq */
2710 goto refetch;
2711 }
2712
2713 /*
2714 * Add to the min heap the timestamp for packets that have
2715 * just been decoded. They will be processed and synthesized
2716 * during the next call to cs_etm__process_traceid_queue() for
2717 * this queue/traceID.
2718 */
2719 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2720 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2721 }
2722
2723 for (i = 0; i < etm->queues.nr_queues; i++) {
2724 struct int_node *inode;
2725
2726 etmq = etm->queues.queue_array[i].priv;
2727 if (!etmq)
2728 continue;
2729
2730 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2731 int idx = (int)(intptr_t)inode->priv;
2732
2733 /* Flush any remaining branch stack entries */
2734 tidq = etmq->traceid_queues[idx];
2735 ret = cs_etm__end_block(etmq, tidq);
2736 if (ret)
2737 return ret;
2738 }
2739 }
2740 out:
2741 return ret;
2742 }
2743
cs_etm__process_itrace_start(struct cs_etm_auxtrace * etm,union perf_event * event)2744 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2745 union perf_event *event)
2746 {
2747 struct thread *th;
2748
2749 if (etm->timeless_decoding)
2750 return 0;
2751
2752 /*
2753 * Add the tid/pid to the log so that we can get a match when we get a
2754 * contextID from the decoder. Only track for the host: only kernel
2755 * trace is supported for guests which wouldn't need pids so this should
2756 * be fine.
2757 */
2758 th = machine__findnew_thread(&etm->session->machines.host,
2759 event->itrace_start.pid,
2760 event->itrace_start.tid);
2761 if (!th)
2762 return -ENOMEM;
2763
2764 thread__put(th);
2765
2766 return 0;
2767 }
2768
cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace * etm,union perf_event * event)2769 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2770 union perf_event *event)
2771 {
2772 struct thread *th;
2773 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2774
2775 /*
2776 * Context switch in per-thread mode are irrelevant since perf
2777 * will start/stop tracing as the process is scheduled.
2778 */
2779 if (etm->timeless_decoding)
2780 return 0;
2781
2782 /*
2783 * SWITCH_IN events carry the next process to be switched out while
2784 * SWITCH_OUT events carry the process to be switched in. As such
2785 * we don't care about IN events.
2786 */
2787 if (!out)
2788 return 0;
2789
2790 /*
2791 * Add the tid/pid to the log so that we can get a match when we get a
2792 * contextID from the decoder. Only track for the host: only kernel
2793 * trace is supported for guests which wouldn't need pids so this should
2794 * be fine.
2795 */
2796 th = machine__findnew_thread(&etm->session->machines.host,
2797 event->context_switch.next_prev_pid,
2798 event->context_switch.next_prev_tid);
2799 if (!th)
2800 return -ENOMEM;
2801
2802 thread__put(th);
2803
2804 return 0;
2805 }
2806
cs_etm__process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,const struct perf_tool * tool)2807 static int cs_etm__process_event(struct perf_session *session,
2808 union perf_event *event,
2809 struct perf_sample *sample,
2810 const struct perf_tool *tool)
2811 {
2812 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2813 struct cs_etm_auxtrace,
2814 auxtrace);
2815
2816 if (dump_trace)
2817 return 0;
2818
2819 if (!tool->ordered_events) {
2820 pr_err("CoreSight ETM Trace requires ordered events\n");
2821 return -EINVAL;
2822 }
2823
2824 switch (event->header.type) {
2825 case PERF_RECORD_EXIT:
2826 /*
2827 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2828 * start the decode because we know there will be no more trace from
2829 * this thread. All this does is emit samples earlier than waiting for
2830 * the flush in other modes, but with timestamps it makes sense to wait
2831 * for flush so that events from different threads are interleaved
2832 * properly.
2833 */
2834 if (etm->per_thread_decoding && etm->timeless_decoding)
2835 return cs_etm__process_timeless_queues(etm,
2836 event->fork.tid);
2837 break;
2838
2839 case PERF_RECORD_ITRACE_START:
2840 return cs_etm__process_itrace_start(etm, event);
2841
2842 case PERF_RECORD_SWITCH_CPU_WIDE:
2843 return cs_etm__process_switch_cpu_wide(etm, event);
2844
2845 case PERF_RECORD_AUX:
2846 /*
2847 * Record the latest kernel timestamp available in the header
2848 * for samples so that synthesised samples occur from this point
2849 * onwards.
2850 */
2851 if (sample->time && (sample->time != (u64)-1))
2852 etm->latest_kernel_timestamp = sample->time;
2853 break;
2854
2855 default:
2856 break;
2857 }
2858
2859 return 0;
2860 }
2861
dump_queued_data(struct cs_etm_auxtrace * etm,struct perf_record_auxtrace * event)2862 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2863 struct perf_record_auxtrace *event)
2864 {
2865 struct auxtrace_buffer *buf;
2866 unsigned int i;
2867 /*
2868 * Find all buffers with same reference in the queues and dump them.
2869 * This is because the queues can contain multiple entries of the same
2870 * buffer that were split on aux records.
2871 */
2872 for (i = 0; i < etm->queues.nr_queues; ++i)
2873 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2874 if (buf->reference == event->reference)
2875 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2876 }
2877
cs_etm__process_auxtrace_event(struct perf_session * session,union perf_event * event,const struct perf_tool * tool __maybe_unused)2878 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2879 union perf_event *event,
2880 const struct perf_tool *tool __maybe_unused)
2881 {
2882 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2883 struct cs_etm_auxtrace,
2884 auxtrace);
2885 if (!etm->data_queued) {
2886 struct auxtrace_buffer *buffer;
2887 off_t data_offset;
2888 int fd = perf_data__fd(session->data);
2889 bool is_pipe = perf_data__is_pipe(session->data);
2890 int err;
2891 int idx = event->auxtrace.idx;
2892
2893 if (is_pipe)
2894 data_offset = 0;
2895 else {
2896 data_offset = lseek(fd, 0, SEEK_CUR);
2897 if (data_offset == -1)
2898 return -errno;
2899 }
2900
2901 err = auxtrace_queues__add_event(&etm->queues, session,
2902 event, data_offset, &buffer);
2903 if (err)
2904 return err;
2905
2906 if (dump_trace)
2907 if (auxtrace_buffer__get_data(buffer, fd)) {
2908 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2909 auxtrace_buffer__put_data(buffer);
2910 }
2911 } else if (dump_trace)
2912 dump_queued_data(etm, &event->auxtrace);
2913
2914 return 0;
2915 }
2916
cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace * etm)2917 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2918 {
2919 struct evsel *evsel;
2920 struct evlist *evlist = etm->session->evlist;
2921
2922 /* Override timeless mode with user input from --itrace=Z */
2923 if (etm->synth_opts.timeless_decoding) {
2924 etm->timeless_decoding = true;
2925 return 0;
2926 }
2927
2928 /*
2929 * Find the cs_etm evsel and look at what its timestamp setting was
2930 */
2931 evlist__for_each_entry(evlist, evsel)
2932 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2933 etm->timeless_decoding =
2934 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2935 return 0;
2936 }
2937
2938 pr_err("CS ETM: Couldn't find ETM evsel\n");
2939 return -EINVAL;
2940 }
2941
2942 /*
2943 * Read a single cpu parameter block from the auxtrace_info priv block.
2944 *
2945 * For version 1 there is a per cpu nr_params entry. If we are handling
2946 * version 1 file, then there may be less, the same, or more params
2947 * indicated by this value than the compile time number we understand.
2948 *
2949 * For a version 0 info block, there are a fixed number, and we need to
2950 * fill out the nr_param value in the metadata we create.
2951 */
cs_etm__create_meta_blk(u64 * buff_in,int * buff_in_offset,int out_blk_size,int nr_params_v0)2952 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2953 int out_blk_size, int nr_params_v0)
2954 {
2955 u64 *metadata = NULL;
2956 int hdr_version;
2957 int nr_in_params, nr_out_params, nr_cmn_params;
2958 int i, k;
2959
2960 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2961 if (!metadata)
2962 return NULL;
2963
2964 /* read block current index & version */
2965 i = *buff_in_offset;
2966 hdr_version = buff_in[CS_HEADER_VERSION];
2967
2968 if (!hdr_version) {
2969 /* read version 0 info block into a version 1 metadata block */
2970 nr_in_params = nr_params_v0;
2971 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2972 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2973 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2974 /* remaining block params at offset +1 from source */
2975 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2976 metadata[k + 1] = buff_in[i + k];
2977 /* version 0 has 2 common params */
2978 nr_cmn_params = 2;
2979 } else {
2980 /* read version 1 info block - input and output nr_params may differ */
2981 /* version 1 has 3 common params */
2982 nr_cmn_params = 3;
2983 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2984
2985 /* if input has more params than output - skip excess */
2986 nr_out_params = nr_in_params + nr_cmn_params;
2987 if (nr_out_params > out_blk_size)
2988 nr_out_params = out_blk_size;
2989
2990 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2991 metadata[k] = buff_in[i + k];
2992
2993 /* record the actual nr params we copied */
2994 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2995 }
2996
2997 /* adjust in offset by number of in params used */
2998 i += nr_in_params + nr_cmn_params;
2999 *buff_in_offset = i;
3000 return metadata;
3001 }
3002
3003 /**
3004 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3005 * on the bounds of aux_event, if it matches with the buffer that's at
3006 * file_offset.
3007 *
3008 * Normally, whole auxtrace buffers would be added to the queue. But we
3009 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3010 * is reset across each buffer, so splitting the buffers up in advance has
3011 * the same effect.
3012 */
cs_etm__queue_aux_fragment(struct perf_session * session,off_t file_offset,size_t sz,struct perf_record_aux * aux_event,struct perf_sample * sample)3013 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3014 struct perf_record_aux *aux_event, struct perf_sample *sample)
3015 {
3016 int err;
3017 char buf[PERF_SAMPLE_MAX_SIZE];
3018 union perf_event *auxtrace_event_union;
3019 struct perf_record_auxtrace *auxtrace_event;
3020 union perf_event auxtrace_fragment;
3021 __u64 aux_offset, aux_size;
3022 enum cs_etm_format format;
3023
3024 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3025 struct cs_etm_auxtrace,
3026 auxtrace);
3027
3028 /*
3029 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3030 * from looping through the auxtrace index.
3031 */
3032 err = perf_session__peek_event(session, file_offset, buf,
3033 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3034 if (err)
3035 return err;
3036 auxtrace_event = &auxtrace_event_union->auxtrace;
3037 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3038 return -EINVAL;
3039
3040 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3041 auxtrace_event->header.size != sz) {
3042 return -EINVAL;
3043 }
3044
3045 /*
3046 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3047 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3048 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3049 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3050 * Return 'not found' if mismatch.
3051 */
3052 if (auxtrace_event->cpu == (__u32) -1) {
3053 etm->per_thread_decoding = true;
3054 if (auxtrace_event->tid != sample->tid)
3055 return 1;
3056 } else if (auxtrace_event->cpu != sample->cpu) {
3057 if (etm->per_thread_decoding) {
3058 /*
3059 * Found a per-cpu buffer after a per-thread one was
3060 * already found
3061 */
3062 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3063 return -EINVAL;
3064 }
3065 return 1;
3066 }
3067
3068 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3069 /*
3070 * Clamp size in snapshot mode. The buffer size is clamped in
3071 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3072 * the buffer size.
3073 */
3074 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3075
3076 /*
3077 * In this mode, the head also points to the end of the buffer so aux_offset
3078 * needs to have the size subtracted so it points to the beginning as in normal mode
3079 */
3080 aux_offset = aux_event->aux_offset - aux_size;
3081 } else {
3082 aux_size = aux_event->aux_size;
3083 aux_offset = aux_event->aux_offset;
3084 }
3085
3086 if (aux_offset >= auxtrace_event->offset &&
3087 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3088 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, auxtrace_event->cpu);
3089
3090 /*
3091 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3092 * based on the sizes of the aux event, and queue that fragment.
3093 */
3094 auxtrace_fragment.auxtrace = *auxtrace_event;
3095 auxtrace_fragment.auxtrace.size = aux_size;
3096 auxtrace_fragment.auxtrace.offset = aux_offset;
3097 auxtrace_fragment.auxtrace.idx = etmq->queue_nr;
3098 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3099
3100 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3101 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3102 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3103 file_offset, NULL);
3104 if (err)
3105 return err;
3106
3107 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3108 UNFORMATTED : FORMATTED;
3109 if (etmq->format != UNSET && format != etmq->format) {
3110 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3111 return -EINVAL;
3112 }
3113 etmq->format = format;
3114 return 0;
3115 }
3116
3117 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3118 return 1;
3119 }
3120
cs_etm__process_aux_hw_id_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3121 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3122 u64 offset __maybe_unused, void *data __maybe_unused)
3123 {
3124 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3125 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3126 (*(int *)data)++; /* increment found count */
3127 return cs_etm__process_aux_output_hw_id(session, event);
3128 }
3129 return 0;
3130 }
3131
cs_etm__queue_aux_records_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3132 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3133 u64 offset __maybe_unused, void *data __maybe_unused)
3134 {
3135 struct perf_sample sample;
3136 int ret;
3137 struct auxtrace_index_entry *ent;
3138 struct auxtrace_index *auxtrace_index;
3139 struct evsel *evsel;
3140 size_t i;
3141
3142 /* Don't care about any other events, we're only queuing buffers for AUX events */
3143 if (event->header.type != PERF_RECORD_AUX)
3144 return 0;
3145
3146 if (event->header.size < sizeof(struct perf_record_aux))
3147 return -EINVAL;
3148
3149 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3150 if (!event->aux.aux_size)
3151 return 0;
3152
3153 /*
3154 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3155 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3156 */
3157 evsel = evlist__event2evsel(session->evlist, event);
3158 if (!evsel)
3159 return -EINVAL;
3160 perf_sample__init(&sample, /*all=*/false);
3161 ret = evsel__parse_sample(evsel, event, &sample);
3162 if (ret)
3163 goto out;
3164
3165 /*
3166 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3167 */
3168 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3169 for (i = 0; i < auxtrace_index->nr; i++) {
3170 ent = &auxtrace_index->entries[i];
3171 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3172 ent->sz, &event->aux, &sample);
3173 /*
3174 * Stop search on error or successful values. Continue search on
3175 * 1 ('not found')
3176 */
3177 if (ret != 1)
3178 goto out;
3179 }
3180 }
3181
3182 /*
3183 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3184 * don't exit with an error because it will still be possible to decode other aux records.
3185 */
3186 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3187 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3188 ret = 0;
3189 out:
3190 perf_sample__exit(&sample);
3191 return ret;
3192 }
3193
cs_etm__queue_aux_records(struct perf_session * session)3194 static int cs_etm__queue_aux_records(struct perf_session *session)
3195 {
3196 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3197 struct auxtrace_index, list);
3198 if (index && index->nr > 0)
3199 return perf_session__peek_events(session, session->header.data_offset,
3200 session->header.data_size,
3201 cs_etm__queue_aux_records_cb, NULL);
3202
3203 /*
3204 * We would get here if there are no entries in the index (either no auxtrace
3205 * buffers or no index at all). Fail silently as there is the possibility of
3206 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3207 * false.
3208 *
3209 * In that scenario, buffers will not be split by AUX records.
3210 */
3211 return 0;
3212 }
3213
3214 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3215 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3216
3217 /*
3218 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3219 * timestamps).
3220 */
cs_etm__has_virtual_ts(u64 ** metadata,int num_cpu)3221 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3222 {
3223 int j;
3224
3225 for (j = 0; j < num_cpu; j++) {
3226 switch (metadata[j][CS_ETM_MAGIC]) {
3227 case __perf_cs_etmv4_magic:
3228 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3229 return false;
3230 break;
3231 case __perf_cs_ete_magic:
3232 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3233 return false;
3234 break;
3235 default:
3236 /* Unknown / unsupported magic number. */
3237 return false;
3238 }
3239 }
3240 return true;
3241 }
3242
3243 /* map trace ids to correct metadata block, from information in metadata */
cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace * etm,int num_cpu,u64 ** metadata)3244 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3245 u64 **metadata)
3246 {
3247 u64 cs_etm_magic;
3248 u8 trace_chan_id;
3249 int i, err;
3250
3251 for (i = 0; i < num_cpu; i++) {
3252 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3253 switch (cs_etm_magic) {
3254 case __perf_cs_etmv3_magic:
3255 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3256 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3257 break;
3258 case __perf_cs_etmv4_magic:
3259 case __perf_cs_ete_magic:
3260 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3261 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3262 break;
3263 default:
3264 /* unknown magic number */
3265 return -EINVAL;
3266 }
3267 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3268 if (err)
3269 return err;
3270 }
3271 return 0;
3272 }
3273
3274 /*
3275 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3276 * (formatted or not) packets to create the decoders.
3277 */
cs_etm__create_queue_decoders(struct cs_etm_queue * etmq)3278 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3279 {
3280 struct cs_etm_decoder_params d_params;
3281 struct cs_etm_trace_params *t_params;
3282 int decoders = intlist__nr_entries(etmq->traceid_list);
3283
3284 if (decoders == 0)
3285 return 0;
3286
3287 /*
3288 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3289 * needed.
3290 */
3291 if (etmq->format == UNFORMATTED)
3292 assert(decoders == 1);
3293
3294 /* Use metadata to fill in trace parameters for trace decoder */
3295 t_params = zalloc(sizeof(*t_params) * decoders);
3296
3297 if (!t_params)
3298 goto out_free;
3299
3300 if (cs_etm__init_trace_params(t_params, etmq))
3301 goto out_free;
3302
3303 /* Set decoder parameters to decode trace packets */
3304 if (cs_etm__init_decoder_params(&d_params, etmq,
3305 dump_trace ? CS_ETM_OPERATION_PRINT :
3306 CS_ETM_OPERATION_DECODE))
3307 goto out_free;
3308
3309 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3310 t_params);
3311
3312 if (!etmq->decoder)
3313 goto out_free;
3314
3315 /*
3316 * Register a function to handle all memory accesses required by
3317 * the trace decoder library.
3318 */
3319 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3320 0x0L, ((u64) -1L),
3321 cs_etm__mem_access))
3322 goto out_free_decoder;
3323
3324 zfree(&t_params);
3325 return 0;
3326
3327 out_free_decoder:
3328 cs_etm_decoder__free(etmq->decoder);
3329 out_free:
3330 zfree(&t_params);
3331 return -EINVAL;
3332 }
3333
cs_etm__create_decoders(struct cs_etm_auxtrace * etm)3334 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3335 {
3336 struct auxtrace_queues *queues = &etm->queues;
3337
3338 for (unsigned int i = 0; i < queues->nr_queues; i++) {
3339 bool empty = list_empty(&queues->queue_array[i].head);
3340 struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3341 int ret;
3342
3343 /*
3344 * Don't create decoders for empty queues, mainly because
3345 * etmq->format is unknown for empty queues.
3346 */
3347 assert(empty || etmq->format != UNSET);
3348 if (empty)
3349 continue;
3350
3351 ret = cs_etm__create_queue_decoders(etmq);
3352 if (ret)
3353 return ret;
3354 }
3355 return 0;
3356 }
3357
cs_etm__process_auxtrace_info_full(union perf_event * event,struct perf_session * session)3358 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3359 struct perf_session *session)
3360 {
3361 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3362 struct cs_etm_auxtrace *etm = NULL;
3363 struct perf_record_time_conv *tc = &session->time_conv;
3364 int event_header_size = sizeof(struct perf_event_header);
3365 int total_size = auxtrace_info->header.size;
3366 int priv_size = 0;
3367 int num_cpu, max_cpu = 0;
3368 int err = 0;
3369 int aux_hw_id_found;
3370 int i;
3371 u64 *ptr = NULL;
3372 u64 **metadata = NULL;
3373
3374 /* First the global part */
3375 ptr = (u64 *) auxtrace_info->priv;
3376 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3377 metadata = zalloc(sizeof(*metadata) * num_cpu);
3378 if (!metadata)
3379 return -ENOMEM;
3380
3381 /* Start parsing after the common part of the header */
3382 i = CS_HEADER_VERSION_MAX;
3383
3384 /*
3385 * The metadata is stored in the auxtrace_info section and encodes
3386 * the configuration of the ARM embedded trace macrocell which is
3387 * required by the trace decoder to properly decode the trace due
3388 * to its highly compressed nature.
3389 */
3390 for (int j = 0; j < num_cpu; j++) {
3391 if (ptr[i] == __perf_cs_etmv3_magic) {
3392 metadata[j] =
3393 cs_etm__create_meta_blk(ptr, &i,
3394 CS_ETM_PRIV_MAX,
3395 CS_ETM_NR_TRC_PARAMS_V0);
3396 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3397 metadata[j] =
3398 cs_etm__create_meta_blk(ptr, &i,
3399 CS_ETMV4_PRIV_MAX,
3400 CS_ETMV4_NR_TRC_PARAMS_V0);
3401 } else if (ptr[i] == __perf_cs_ete_magic) {
3402 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3403 } else {
3404 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3405 ptr[i]);
3406 err = -EINVAL;
3407 goto err_free_metadata;
3408 }
3409
3410 if (!metadata[j]) {
3411 err = -ENOMEM;
3412 goto err_free_metadata;
3413 }
3414
3415 if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3416 max_cpu = metadata[j][CS_ETM_CPU];
3417 }
3418
3419 /*
3420 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3421 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3422 * global metadata, and each cpu's metadata respectively.
3423 * The following tests if the correct number of double words was
3424 * present in the auxtrace info section.
3425 */
3426 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3427 if (i * 8 != priv_size) {
3428 err = -EINVAL;
3429 goto err_free_metadata;
3430 }
3431
3432 etm = zalloc(sizeof(*etm));
3433
3434 if (!etm) {
3435 err = -ENOMEM;
3436 goto err_free_metadata;
3437 }
3438
3439 /*
3440 * As all the ETMs run at the same exception level, the system should
3441 * have the same PID format crossing CPUs. So cache the PID format
3442 * and reuse it for sequential decoding.
3443 */
3444 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3445
3446 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3447 if (err)
3448 goto err_free_etm;
3449
3450 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3451 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3452 if (err)
3453 goto err_free_queues;
3454 }
3455
3456 if (session->itrace_synth_opts->set) {
3457 etm->synth_opts = *session->itrace_synth_opts;
3458 } else {
3459 itrace_synth_opts__set_default(&etm->synth_opts,
3460 session->itrace_synth_opts->default_no_sample);
3461 etm->synth_opts.callchain = false;
3462 }
3463
3464 etm->session = session;
3465
3466 etm->num_cpu = num_cpu;
3467 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3468 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3469 etm->metadata = metadata;
3470 etm->auxtrace_type = auxtrace_info->type;
3471
3472 if (etm->synth_opts.use_timestamp)
3473 /*
3474 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3475 * therefore the decoder cannot know if the timestamp trace is
3476 * same with the kernel time.
3477 *
3478 * If a user has knowledge for the working platform and can
3479 * specify itrace option 'T' to tell decoder to forcely use the
3480 * traced timestamp as the kernel time.
3481 */
3482 etm->has_virtual_ts = true;
3483 else
3484 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3485 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3486
3487 if (!etm->has_virtual_ts)
3488 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3489 "The time field of the samples will not be set accurately.\n"
3490 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3491 "you can specify the itrace option 'T' for timestamp decoding\n"
3492 "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3493
3494 etm->auxtrace.process_event = cs_etm__process_event;
3495 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3496 etm->auxtrace.flush_events = cs_etm__flush_events;
3497 etm->auxtrace.free_events = cs_etm__free_events;
3498 etm->auxtrace.free = cs_etm__free;
3499 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3500 session->auxtrace = &etm->auxtrace;
3501
3502 err = cs_etm__setup_timeless_decoding(etm);
3503 if (err)
3504 return err;
3505
3506 etm->tc.time_shift = tc->time_shift;
3507 etm->tc.time_mult = tc->time_mult;
3508 etm->tc.time_zero = tc->time_zero;
3509 if (event_contains(*tc, time_cycles)) {
3510 etm->tc.time_cycles = tc->time_cycles;
3511 etm->tc.time_mask = tc->time_mask;
3512 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3513 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3514 }
3515 err = cs_etm__synth_events(etm, session);
3516 if (err)
3517 goto err_free_queues;
3518
3519 err = cs_etm__queue_aux_records(session);
3520 if (err)
3521 goto err_free_queues;
3522
3523 /*
3524 * Map Trace ID values to CPU metadata.
3525 *
3526 * Trace metadata will always contain Trace ID values from the legacy algorithm
3527 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3528 * or the kernel doesn't emit them.
3529 *
3530 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3531 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3532 * in which case a different value will be used. This means an older perf may still
3533 * be able to record and read files generate on a newer system.
3534 *
3535 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3536 * those packets. If they are there then the values will be mapped and plugged into
3537 * the metadata and decoders are only created for each mapping received.
3538 *
3539 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3540 * then we map Trace ID values to CPU directly from the metadata and create decoders
3541 * for all mappings.
3542 */
3543
3544 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3545 aux_hw_id_found = 0;
3546 err = perf_session__peek_events(session, session->header.data_offset,
3547 session->header.data_size,
3548 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3549 if (err)
3550 goto err_free_queues;
3551
3552 /* if no HW ID found this is a file with metadata values only, map from metadata */
3553 if (!aux_hw_id_found) {
3554 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3555 if (err)
3556 goto err_free_queues;
3557 }
3558
3559 err = cs_etm__create_decoders(etm);
3560 if (err)
3561 goto err_free_queues;
3562
3563 etm->data_queued = etm->queues.populated;
3564 return 0;
3565
3566 err_free_queues:
3567 auxtrace_queues__free(&etm->queues);
3568 session->auxtrace = NULL;
3569 err_free_etm:
3570 zfree(&etm);
3571 err_free_metadata:
3572 /* No need to check @metadata[j], free(NULL) is supported */
3573 for (int j = 0; j < num_cpu; j++)
3574 zfree(&metadata[j]);
3575 zfree(&metadata);
3576 return err;
3577 }
3578