1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9 #include <linux/kernel.h>
10 #include <linux/bitfield.h>
11 #include <linux/bitops.h>
12 #include <linux/coresight-pmu.h>
13 #include <linux/err.h>
14 #include <linux/log2.h>
15 #include <linux/types.h>
16 #include <linux/zalloc.h>
17
18 #include <stdlib.h>
19
20 #include "auxtrace.h"
21 #include "color.h"
22 #include "cs-etm.h"
23 #include "cs-etm-decoder/cs-etm-decoder.h"
24 #include "debug.h"
25 #include "dso.h"
26 #include "evlist.h"
27 #include "intlist.h"
28 #include "machine.h"
29 #include "map.h"
30 #include "perf.h"
31 #include "session.h"
32 #include "map_symbol.h"
33 #include "branch.h"
34 #include "symbol.h"
35 #include "tool.h"
36 #include "thread.h"
37 #include "thread-stack.h"
38 #include "tsc.h"
39 #include <tools/libc_compat.h>
40 #include "util/synthetic-events.h"
41 #include "util/util.h"
42
43 struct cs_etm_auxtrace {
44 struct auxtrace auxtrace;
45 struct auxtrace_queues queues;
46 struct auxtrace_heap heap;
47 struct itrace_synth_opts synth_opts;
48 struct perf_session *session;
49 struct perf_tsc_conversion tc;
50
51 /*
52 * Timeless has no timestamps in the trace so overlapping mmap lookups
53 * are less accurate but produces smaller trace data. We use context IDs
54 * in the trace instead of matching timestamps with fork records so
55 * they're not really needed in the general case. Overlapping mmaps
56 * happen in cases like between a fork and an exec.
57 */
58 bool timeless_decoding;
59
60 /*
61 * Per-thread ignores the trace channel ID and instead assumes that
62 * everything in a buffer comes from the same process regardless of
63 * which CPU it ran on. It also implies no context IDs so the TID is
64 * taken from the auxtrace buffer.
65 */
66 bool per_thread_decoding;
67 bool snapshot_mode;
68 bool data_queued;
69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71 int num_cpu;
72 u64 latest_kernel_timestamp;
73 u32 auxtrace_type;
74 u64 branches_sample_type;
75 u64 branches_id;
76 u64 instructions_sample_type;
77 u64 instructions_sample_period;
78 u64 instructions_id;
79 u64 **metadata;
80 unsigned int pmu_type;
81 enum cs_etm_pid_fmt pid_fmt;
82 };
83
84 struct cs_etm_traceid_queue {
85 u8 trace_chan_id;
86 u64 period_instructions;
87 size_t last_branch_pos;
88 union perf_event *event_buf;
89 struct thread *thread;
90 struct thread *prev_packet_thread;
91 ocsd_ex_level prev_packet_el;
92 ocsd_ex_level el;
93 struct branch_stack *last_branch;
94 struct branch_stack *last_branch_rb;
95 struct cs_etm_packet *prev_packet;
96 struct cs_etm_packet *packet;
97 struct cs_etm_packet_queue packet_queue;
98 };
99
100 enum cs_etm_format {
101 UNSET,
102 FORMATTED,
103 UNFORMATTED
104 };
105
106 struct cs_etm_queue {
107 struct cs_etm_auxtrace *etm;
108 struct cs_etm_decoder *decoder;
109 struct auxtrace_buffer *buffer;
110 unsigned int queue_nr;
111 u8 pending_timestamp_chan_id;
112 enum cs_etm_format format;
113 u64 offset;
114 const unsigned char *buf;
115 size_t buf_len, buf_used;
116 /* Conversion between traceID and index in traceid_queues array */
117 struct intlist *traceid_queues_list;
118 struct cs_etm_traceid_queue **traceid_queues;
119 /* Conversion between traceID and metadata pointers */
120 struct intlist *traceid_list;
121 /*
122 * Same as traceid_list, but traceid_list may be a reference to another
123 * queue's which has a matching sink ID.
124 */
125 struct intlist *own_traceid_list;
126 u32 sink_id;
127 };
128
129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
131 pid_t tid);
132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
137
138 /* PTMs ETMIDR [11:8] set to b0011 */
139 #define ETMIDR_PTM_VERSION 0x00000300
140
141 /*
142 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
143 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
144 * encode the etm queue number as the upper 16 bit and the channel as
145 * the lower 16 bit.
146 */
147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
148 (queue_nr << 16 | trace_chan_id)
149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
151 #define SINK_UNSET ((u32) -1)
152
cs_etm__get_v7_protocol_version(u32 etmidr)153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
154 {
155 etmidr &= ETMIDR_PTM_VERSION;
156
157 if (etmidr == ETMIDR_PTM_VERSION)
158 return CS_ETM_PROTO_PTM;
159
160 return CS_ETM_PROTO_ETMV3;
161 }
162
cs_etm__get_magic(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 * magic)163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
164 {
165 struct int_node *inode;
166 u64 *metadata;
167
168 inode = intlist__find(etmq->traceid_list, trace_chan_id);
169 if (!inode)
170 return -EINVAL;
171
172 metadata = inode->priv;
173 *magic = metadata[CS_ETM_MAGIC];
174 return 0;
175 }
176
cs_etm__get_cpu(struct cs_etm_queue * etmq,u8 trace_chan_id,int * cpu)177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
178 {
179 struct int_node *inode;
180 u64 *metadata;
181
182 inode = intlist__find(etmq->traceid_list, trace_chan_id);
183 if (!inode)
184 return -EINVAL;
185
186 metadata = inode->priv;
187 *cpu = (int)metadata[CS_ETM_CPU];
188 return 0;
189 }
190
191 /*
192 * The returned PID format is presented as an enum:
193 *
194 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
195 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
196 * CS_ETM_PIDFMT_NONE: No context IDs
197 *
198 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
199 * are enabled at the same time when the session runs on an EL2 kernel.
200 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
201 * recorded in the trace data, the tool will selectively use
202 * CONTEXTIDR_EL2 as PID.
203 *
204 * The result is cached in etm->pid_fmt so this function only needs to be called
205 * when processing the aux info.
206 */
cs_etm__init_pid_fmt(u64 * metadata)207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
208 {
209 u64 val;
210
211 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
212 val = metadata[CS_ETM_ETMCR];
213 /* CONTEXTIDR is traced */
214 if (val & BIT(ETM_OPT_CTXTID))
215 return CS_ETM_PIDFMT_CTXTID;
216 } else {
217 val = metadata[CS_ETMV4_TRCCONFIGR];
218 /* CONTEXTIDR_EL2 is traced */
219 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
220 return CS_ETM_PIDFMT_CTXTID2;
221 /* CONTEXTIDR_EL1 is traced */
222 else if (val & BIT(ETM4_CFG_BIT_CTXTID))
223 return CS_ETM_PIDFMT_CTXTID;
224 }
225
226 return CS_ETM_PIDFMT_NONE;
227 }
228
cs_etm__get_pid_fmt(struct cs_etm_queue * etmq)229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
230 {
231 return etmq->etm->pid_fmt;
232 }
233
cs_etm__insert_trace_id_node(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 * cpu_metadata)234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
235 u8 trace_chan_id, u64 *cpu_metadata)
236 {
237 /* Get an RB node for this CPU */
238 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
239
240 /* Something went wrong, no need to continue */
241 if (!inode)
242 return -ENOMEM;
243
244 /* Disallow re-mapping a different traceID to metadata pair. */
245 if (inode->priv) {
246 u64 *curr_cpu_data = inode->priv;
247 u8 curr_chan_id;
248 int err;
249
250 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
251 /*
252 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
253 * are expected (but not supported) in per-thread mode,
254 * rather than signifying an error.
255 */
256 if (etmq->etm->per_thread_decoding)
257 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
258 else
259 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
260
261 return -EINVAL;
262 }
263
264 /* check that the mapped ID matches */
265 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
266 if (err)
267 return err;
268
269 if (curr_chan_id != trace_chan_id) {
270 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
271 return -EINVAL;
272 }
273
274 /* Skip re-adding the same mappings if everything matched */
275 return 0;
276 }
277
278 /* Not one we've seen before, associate the traceID with the metadata pointer */
279 inode->priv = cpu_metadata;
280
281 return 0;
282 }
283
cs_etm__get_queue(struct cs_etm_auxtrace * etm,int cpu)284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
285 {
286 if (etm->per_thread_decoding)
287 return etm->queues.queue_array[0].priv;
288 else
289 return etm->queues.queue_array[cpu].priv;
290 }
291
cs_etm__map_trace_id_v0(struct cs_etm_auxtrace * etm,u8 trace_chan_id,u64 * cpu_metadata)292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
293 u64 *cpu_metadata)
294 {
295 struct cs_etm_queue *etmq;
296
297 /*
298 * If the queue is unformatted then only save one mapping in the
299 * queue associated with that CPU so only one decoder is made.
300 */
301 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
302 if (etmq->format == UNFORMATTED)
303 return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
304 cpu_metadata);
305
306 /*
307 * Otherwise, version 0 trace IDs are global so save them into every
308 * queue.
309 */
310 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
311 int ret;
312
313 etmq = etm->queues.queue_array[i].priv;
314 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
315 cpu_metadata);
316 if (ret)
317 return ret;
318 }
319
320 return 0;
321 }
322
cs_etm__process_trace_id_v0(struct cs_etm_auxtrace * etm,int cpu,u64 hw_id)323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
324 u64 hw_id)
325 {
326 int err;
327 u64 *cpu_data;
328 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
329
330 cpu_data = get_cpu_data(etm, cpu);
331 if (cpu_data == NULL)
332 return -EINVAL;
333
334 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
335 if (err)
336 return err;
337
338 /*
339 * if we are picking up the association from the packet, need to plug
340 * the correct trace ID into the metadata for setting up decoders later.
341 */
342 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
343 }
344
cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace * etm,int cpu,u64 hw_id)345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
346 u64 hw_id)
347 {
348 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
349 int ret;
350 u64 *cpu_data;
351 u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
352 u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
353
354 /*
355 * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
356 * let it pass for now until an actual overlapping trace ID is hit. In
357 * most cases IDs won't overlap even if the sink changes.
358 */
359 if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
360 etmq->sink_id != sink_id) {
361 pr_err("CS_ETM: mismatch between sink IDs\n");
362 return -EINVAL;
363 }
364
365 etmq->sink_id = sink_id;
366
367 /* Find which other queues use this sink and link their ID maps */
368 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
369 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
370
371 /* Different sinks, skip */
372 if (other_etmq->sink_id != etmq->sink_id)
373 continue;
374
375 /* Already linked, skip */
376 if (other_etmq->traceid_list == etmq->traceid_list)
377 continue;
378
379 /* At the point of first linking, this one should be empty */
380 if (!intlist__empty(etmq->traceid_list)) {
381 pr_err("CS_ETM: Can't link populated trace ID lists\n");
382 return -EINVAL;
383 }
384
385 etmq->own_traceid_list = NULL;
386 intlist__delete(etmq->traceid_list);
387 etmq->traceid_list = other_etmq->traceid_list;
388 break;
389 }
390
391 cpu_data = get_cpu_data(etm, cpu);
392 ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
393 if (ret)
394 return ret;
395
396 ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
397 if (ret)
398 return ret;
399
400 return 0;
401 }
402
cs_etm__metadata_get_trace_id(u8 * trace_chan_id,u64 * cpu_metadata)403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
404 {
405 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
406
407 switch (cs_etm_magic) {
408 case __perf_cs_etmv3_magic:
409 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
410 CORESIGHT_TRACE_ID_VAL_MASK);
411 break;
412 case __perf_cs_etmv4_magic:
413 case __perf_cs_ete_magic:
414 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
415 CORESIGHT_TRACE_ID_VAL_MASK);
416 break;
417 default:
418 return -EINVAL;
419 }
420 return 0;
421 }
422
423 /*
424 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
425 */
cs_etm__metadata_set_trace_id(u8 trace_chan_id,u64 * cpu_metadata)426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
427 {
428 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
429
430 switch (cs_etm_magic) {
431 case __perf_cs_etmv3_magic:
432 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
433 break;
434 case __perf_cs_etmv4_magic:
435 case __perf_cs_ete_magic:
436 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
437 break;
438
439 default:
440 return -EINVAL;
441 }
442 return 0;
443 }
444
445 /*
446 * Get a metadata index for a specific cpu from an array.
447 *
448 */
get_cpu_data_idx(struct cs_etm_auxtrace * etm,int cpu)449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
450 {
451 int i;
452
453 for (i = 0; i < etm->num_cpu; i++) {
454 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
455 return i;
456 }
457 }
458
459 return -1;
460 }
461
462 /*
463 * Get a metadata for a specific cpu from an array.
464 *
465 */
get_cpu_data(struct cs_etm_auxtrace * etm,int cpu)466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
467 {
468 int idx = get_cpu_data_idx(etm, cpu);
469
470 return (idx != -1) ? etm->metadata[idx] : NULL;
471 }
472
473 /*
474 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
475 *
476 * The payload associates the Trace ID and the CPU.
477 * The routine is tolerant of seeing multiple packets with the same association,
478 * but a CPU / Trace ID association changing during a session is an error.
479 */
cs_etm__process_aux_output_hw_id(struct perf_session * session,union perf_event * event)480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
481 union perf_event *event)
482 {
483 struct cs_etm_auxtrace *etm;
484 struct perf_sample sample;
485 struct evsel *evsel;
486 u64 hw_id;
487 int cpu, version, err;
488
489 /* extract and parse the HW ID */
490 hw_id = event->aux_output_hw_id.hw_id;
491 version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
492
493 /* check that we can handle this version */
494 if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
495 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
496 version);
497 return -EINVAL;
498 }
499
500 /* get access to the etm metadata */
501 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
502 if (!etm || !etm->metadata)
503 return -EINVAL;
504
505 /* parse the sample to get the CPU */
506 evsel = evlist__event2evsel(session->evlist, event);
507 if (!evsel)
508 return -EINVAL;
509 perf_sample__init(&sample, /*all=*/false);
510 err = evsel__parse_sample(evsel, event, &sample);
511 if (err)
512 goto out;
513 cpu = sample.cpu;
514 if (cpu == -1) {
515 /* no CPU in the sample - possibly recorded with an old version of perf */
516 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
517 err = -EINVAL;
518 goto out;
519 }
520
521 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) {
522 err = cs_etm__process_trace_id_v0(etm, cpu, hw_id);
523 goto out;
524 }
525
526 err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
527 out:
528 perf_sample__exit(&sample);
529 return err;
530 }
531
cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue * etmq,u8 trace_chan_id)532 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
533 u8 trace_chan_id)
534 {
535 /*
536 * When a timestamp packet is encountered the backend code
537 * is stopped so that the front end has time to process packets
538 * that were accumulated in the traceID queue. Since there can
539 * be more than one channel per cs_etm_queue, we need to specify
540 * what traceID queue needs servicing.
541 */
542 etmq->pending_timestamp_chan_id = trace_chan_id;
543 }
544
cs_etm__etmq_get_timestamp(struct cs_etm_queue * etmq,u8 * trace_chan_id)545 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
546 u8 *trace_chan_id)
547 {
548 struct cs_etm_packet_queue *packet_queue;
549
550 if (!etmq->pending_timestamp_chan_id)
551 return 0;
552
553 if (trace_chan_id)
554 *trace_chan_id = etmq->pending_timestamp_chan_id;
555
556 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
557 etmq->pending_timestamp_chan_id);
558 if (!packet_queue)
559 return 0;
560
561 /* Acknowledge pending status */
562 etmq->pending_timestamp_chan_id = 0;
563
564 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
565 return packet_queue->cs_timestamp;
566 }
567
cs_etm__clear_packet_queue(struct cs_etm_packet_queue * queue)568 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
569 {
570 int i;
571
572 queue->head = 0;
573 queue->tail = 0;
574 queue->packet_count = 0;
575 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
576 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
577 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
578 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
579 queue->packet_buffer[i].instr_count = 0;
580 queue->packet_buffer[i].last_instr_taken_branch = false;
581 queue->packet_buffer[i].last_instr_size = 0;
582 queue->packet_buffer[i].last_instr_type = 0;
583 queue->packet_buffer[i].last_instr_subtype = 0;
584 queue->packet_buffer[i].last_instr_cond = 0;
585 queue->packet_buffer[i].flags = 0;
586 queue->packet_buffer[i].exception_number = UINT32_MAX;
587 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
588 queue->packet_buffer[i].cpu = INT_MIN;
589 }
590 }
591
cs_etm__clear_all_packet_queues(struct cs_etm_queue * etmq)592 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
593 {
594 int idx;
595 struct int_node *inode;
596 struct cs_etm_traceid_queue *tidq;
597 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
598
599 intlist__for_each_entry(inode, traceid_queues_list) {
600 idx = (int)(intptr_t)inode->priv;
601 tidq = etmq->traceid_queues[idx];
602 cs_etm__clear_packet_queue(&tidq->packet_queue);
603 }
604 }
605
cs_etm__init_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u8 trace_chan_id)606 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
607 struct cs_etm_traceid_queue *tidq,
608 u8 trace_chan_id)
609 {
610 int rc = -ENOMEM;
611 struct auxtrace_queue *queue;
612 struct cs_etm_auxtrace *etm = etmq->etm;
613
614 cs_etm__clear_packet_queue(&tidq->packet_queue);
615
616 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
617 tidq->trace_chan_id = trace_chan_id;
618 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
619 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
620 queue->tid);
621 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
622
623 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
624 if (!tidq->packet)
625 goto out;
626
627 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
628 if (!tidq->prev_packet)
629 goto out_free;
630
631 if (etm->synth_opts.last_branch) {
632 size_t sz = sizeof(struct branch_stack);
633
634 sz += etm->synth_opts.last_branch_sz *
635 sizeof(struct branch_entry);
636 tidq->last_branch = zalloc(sz);
637 if (!tidq->last_branch)
638 goto out_free;
639 tidq->last_branch_rb = zalloc(sz);
640 if (!tidq->last_branch_rb)
641 goto out_free;
642 }
643
644 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
645 if (!tidq->event_buf)
646 goto out_free;
647
648 return 0;
649
650 out_free:
651 zfree(&tidq->last_branch_rb);
652 zfree(&tidq->last_branch);
653 zfree(&tidq->prev_packet);
654 zfree(&tidq->packet);
655 out:
656 return rc;
657 }
658
659 static struct cs_etm_traceid_queue
cs_etm__etmq_get_traceid_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)660 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
661 {
662 int idx;
663 struct int_node *inode;
664 struct intlist *traceid_queues_list;
665 struct cs_etm_traceid_queue *tidq, **traceid_queues;
666 struct cs_etm_auxtrace *etm = etmq->etm;
667
668 if (etm->per_thread_decoding)
669 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
670
671 traceid_queues_list = etmq->traceid_queues_list;
672
673 /*
674 * Check if the traceid_queue exist for this traceID by looking
675 * in the queue list.
676 */
677 inode = intlist__find(traceid_queues_list, trace_chan_id);
678 if (inode) {
679 idx = (int)(intptr_t)inode->priv;
680 return etmq->traceid_queues[idx];
681 }
682
683 /* We couldn't find a traceid_queue for this traceID, allocate one */
684 tidq = malloc(sizeof(*tidq));
685 if (!tidq)
686 return NULL;
687
688 memset(tidq, 0, sizeof(*tidq));
689
690 /* Get a valid index for the new traceid_queue */
691 idx = intlist__nr_entries(traceid_queues_list);
692 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
693 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
694 if (!inode)
695 goto out_free;
696
697 /* Associate this traceID with this index */
698 inode->priv = (void *)(intptr_t)idx;
699
700 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
701 goto out_free;
702
703 /* Grow the traceid_queues array by one unit */
704 traceid_queues = etmq->traceid_queues;
705 traceid_queues = reallocarray(traceid_queues,
706 idx + 1,
707 sizeof(*traceid_queues));
708
709 /*
710 * On failure reallocarray() returns NULL and the original block of
711 * memory is left untouched.
712 */
713 if (!traceid_queues)
714 goto out_free;
715
716 traceid_queues[idx] = tidq;
717 etmq->traceid_queues = traceid_queues;
718
719 return etmq->traceid_queues[idx];
720
721 out_free:
722 /*
723 * Function intlist__remove() removes the inode from the list
724 * and delete the memory associated to it.
725 */
726 intlist__remove(traceid_queues_list, inode);
727 free(tidq);
728
729 return NULL;
730 }
731
732 struct cs_etm_packet_queue
cs_etm__etmq_get_packet_queue(struct cs_etm_queue * etmq,u8 trace_chan_id)733 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
734 {
735 struct cs_etm_traceid_queue *tidq;
736
737 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
738 if (tidq)
739 return &tidq->packet_queue;
740
741 return NULL;
742 }
743
cs_etm__packet_swap(struct cs_etm_auxtrace * etm,struct cs_etm_traceid_queue * tidq)744 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
745 struct cs_etm_traceid_queue *tidq)
746 {
747 struct cs_etm_packet *tmp;
748
749 if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
750 etm->synth_opts.instructions) {
751 /*
752 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
753 * the next incoming packet.
754 *
755 * Threads and exception levels are also tracked for both the
756 * previous and current packets. This is because the previous
757 * packet is used for the 'from' IP for branch samples, so the
758 * thread at that time must also be assigned to that sample.
759 * Across discontinuity packets the thread can change, so by
760 * tracking the thread for the previous packet the branch sample
761 * will have the correct info.
762 */
763 tmp = tidq->packet;
764 tidq->packet = tidq->prev_packet;
765 tidq->prev_packet = tmp;
766 tidq->prev_packet_el = tidq->el;
767 thread__put(tidq->prev_packet_thread);
768 tidq->prev_packet_thread = thread__get(tidq->thread);
769 }
770 }
771
cs_etm__packet_dump(const char * pkt_string,void * data)772 static void cs_etm__packet_dump(const char *pkt_string, void *data)
773 {
774 const char *color = PERF_COLOR_BLUE;
775 int len = strlen(pkt_string);
776 struct cs_etm_queue *etmq = data;
777 char queue_nr[64];
778
779 if (verbose)
780 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr);
781 else
782 queue_nr[0] = '\0';
783
784 if (len && (pkt_string[len-1] == '\n'))
785 color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string);
786 else
787 color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string);
788
789 fflush(stdout);
790 }
791
cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params * t_params,u64 * metadata,u32 etmidr)792 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
793 u64 *metadata, u32 etmidr)
794 {
795 t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
796 t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
797 t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
798 }
799
cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params * t_params,u64 * metadata)800 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
801 u64 *metadata)
802 {
803 t_params->protocol = CS_ETM_PROTO_ETMV4i;
804 t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
805 t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
806 t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
807 t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
808 t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
809 t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
810 }
811
cs_etm__set_trace_param_ete(struct cs_etm_trace_params * t_params,u64 * metadata)812 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
813 u64 *metadata)
814 {
815 t_params->protocol = CS_ETM_PROTO_ETE;
816 t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
817 t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
818 t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
819 t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
820 t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
821 t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
822 t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
823 }
824
cs_etm__init_trace_params(struct cs_etm_trace_params * t_params,struct cs_etm_queue * etmq)825 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
826 struct cs_etm_queue *etmq)
827 {
828 struct int_node *inode;
829
830 intlist__for_each_entry(inode, etmq->traceid_list) {
831 u64 *metadata = inode->priv;
832 u64 architecture = metadata[CS_ETM_MAGIC];
833 u32 etmidr;
834
835 switch (architecture) {
836 case __perf_cs_etmv3_magic:
837 etmidr = metadata[CS_ETM_ETMIDR];
838 cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
839 break;
840 case __perf_cs_etmv4_magic:
841 cs_etm__set_trace_param_etmv4(t_params++, metadata);
842 break;
843 case __perf_cs_ete_magic:
844 cs_etm__set_trace_param_ete(t_params++, metadata);
845 break;
846 default:
847 return -EINVAL;
848 }
849 }
850
851 return 0;
852 }
853
cs_etm__init_decoder_params(struct cs_etm_decoder_params * d_params,struct cs_etm_queue * etmq,enum cs_etm_decoder_operation mode)854 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
855 struct cs_etm_queue *etmq,
856 enum cs_etm_decoder_operation mode)
857 {
858 int ret = -EINVAL;
859
860 if (!(mode < CS_ETM_OPERATION_MAX))
861 goto out;
862
863 d_params->packet_printer = cs_etm__packet_dump;
864 d_params->operation = mode;
865 d_params->data = etmq;
866 d_params->formatted = etmq->format == FORMATTED;
867 d_params->fsyncs = false;
868 d_params->hsyncs = false;
869 d_params->frame_aligned = true;
870
871 ret = 0;
872 out:
873 return ret;
874 }
875
cs_etm__dump_event(struct cs_etm_queue * etmq,struct auxtrace_buffer * buffer)876 static void cs_etm__dump_event(struct cs_etm_queue *etmq,
877 struct auxtrace_buffer *buffer)
878 {
879 int ret;
880 const char *color = PERF_COLOR_BLUE;
881 size_t buffer_used = 0;
882
883 fprintf(stdout, "\n");
884 color_fprintf(stdout, color,
885 ". ... CoreSight %s Trace data: size %#zx bytes\n",
886 cs_etm_decoder__get_name(etmq->decoder), buffer->size);
887
888 do {
889 size_t consumed;
890
891 ret = cs_etm_decoder__process_data_block(
892 etmq->decoder, buffer->offset,
893 &((u8 *)buffer->data)[buffer_used],
894 buffer->size - buffer_used, &consumed);
895 if (ret)
896 break;
897
898 buffer_used += consumed;
899 } while (buffer_used < buffer->size);
900
901 cs_etm_decoder__reset(etmq->decoder);
902 }
903
cs_etm__flush_events(struct perf_session * session,const struct perf_tool * tool)904 static int cs_etm__flush_events(struct perf_session *session,
905 const struct perf_tool *tool)
906 {
907 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
908 struct cs_etm_auxtrace,
909 auxtrace);
910 if (dump_trace)
911 return 0;
912
913 if (!tool->ordered_events)
914 return -EINVAL;
915
916 if (etm->timeless_decoding) {
917 /*
918 * Pass tid = -1 to process all queues. But likely they will have
919 * already been processed on PERF_RECORD_EXIT anyway.
920 */
921 return cs_etm__process_timeless_queues(etm, -1);
922 }
923
924 return cs_etm__process_timestamped_queues(etm);
925 }
926
cs_etm__free_traceid_queues(struct cs_etm_queue * etmq)927 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
928 {
929 int idx;
930 uintptr_t priv;
931 struct int_node *inode, *tmp;
932 struct cs_etm_traceid_queue *tidq;
933 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
934
935 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
936 priv = (uintptr_t)inode->priv;
937 idx = priv;
938
939 /* Free this traceid_queue from the array */
940 tidq = etmq->traceid_queues[idx];
941 thread__zput(tidq->thread);
942 thread__zput(tidq->prev_packet_thread);
943 zfree(&tidq->event_buf);
944 zfree(&tidq->last_branch);
945 zfree(&tidq->last_branch_rb);
946 zfree(&tidq->prev_packet);
947 zfree(&tidq->packet);
948 zfree(&tidq);
949
950 /*
951 * Function intlist__remove() removes the inode from the list
952 * and delete the memory associated to it.
953 */
954 intlist__remove(traceid_queues_list, inode);
955 }
956
957 /* Then the RB tree itself */
958 intlist__delete(traceid_queues_list);
959 etmq->traceid_queues_list = NULL;
960
961 /* finally free the traceid_queues array */
962 zfree(&etmq->traceid_queues);
963 }
964
cs_etm__free_queue(void * priv)965 static void cs_etm__free_queue(void *priv)
966 {
967 struct int_node *inode, *tmp;
968 struct cs_etm_queue *etmq = priv;
969
970 if (!etmq)
971 return;
972
973 cs_etm_decoder__free(etmq->decoder);
974 cs_etm__free_traceid_queues(etmq);
975
976 if (etmq->own_traceid_list) {
977 /* First remove all traceID/metadata nodes for the RB tree */
978 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
979 intlist__remove(etmq->own_traceid_list, inode);
980
981 /* Then the RB tree itself */
982 intlist__delete(etmq->own_traceid_list);
983 }
984
985 free(etmq);
986 }
987
cs_etm__free_events(struct perf_session * session)988 static void cs_etm__free_events(struct perf_session *session)
989 {
990 unsigned int i;
991 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
992 struct cs_etm_auxtrace,
993 auxtrace);
994 struct auxtrace_queues *queues = &aux->queues;
995
996 for (i = 0; i < queues->nr_queues; i++) {
997 cs_etm__free_queue(queues->queue_array[i].priv);
998 queues->queue_array[i].priv = NULL;
999 }
1000
1001 auxtrace_queues__free(queues);
1002 }
1003
cs_etm__free(struct perf_session * session)1004 static void cs_etm__free(struct perf_session *session)
1005 {
1006 int i;
1007 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1008 struct cs_etm_auxtrace,
1009 auxtrace);
1010 cs_etm__free_events(session);
1011 session->auxtrace = NULL;
1012
1013 for (i = 0; i < aux->num_cpu; i++)
1014 zfree(&aux->metadata[i]);
1015
1016 zfree(&aux->metadata);
1017 zfree(&aux);
1018 }
1019
cs_etm__evsel_is_auxtrace(struct perf_session * session,struct evsel * evsel)1020 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
1021 struct evsel *evsel)
1022 {
1023 struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
1024 struct cs_etm_auxtrace,
1025 auxtrace);
1026
1027 return evsel->core.attr.type == aux->pmu_type;
1028 }
1029
cs_etm__get_machine(struct cs_etm_queue * etmq,ocsd_ex_level el)1030 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
1031 ocsd_ex_level el)
1032 {
1033 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
1034
1035 /*
1036 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
1037 * running at EL1 assume everything is the host.
1038 */
1039 if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
1040 return &etmq->etm->session->machines.host;
1041
1042 /*
1043 * Not perfect, but otherwise assume anything in EL1 is the default
1044 * guest, and everything else is the host. Distinguishing between guest
1045 * and host userspaces isn't currently supported either. Neither is
1046 * multiple guest support. All this does is reduce the likeliness of
1047 * decode errors where we look into the host kernel maps when it should
1048 * have been the guest maps.
1049 */
1050 switch (el) {
1051 case ocsd_EL1:
1052 return machines__find_guest(&etmq->etm->session->machines,
1053 DEFAULT_GUEST_KERNEL_ID);
1054 case ocsd_EL3:
1055 case ocsd_EL2:
1056 case ocsd_EL0:
1057 case ocsd_EL_unknown:
1058 default:
1059 return &etmq->etm->session->machines.host;
1060 }
1061 }
1062
cs_etm__cpu_mode(struct cs_etm_queue * etmq,u64 address,ocsd_ex_level el)1063 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
1064 ocsd_ex_level el)
1065 {
1066 struct machine *machine = cs_etm__get_machine(etmq, el);
1067
1068 if (address >= machine__kernel_start(machine)) {
1069 if (machine__is_host(machine))
1070 return PERF_RECORD_MISC_KERNEL;
1071 else
1072 return PERF_RECORD_MISC_GUEST_KERNEL;
1073 } else {
1074 if (machine__is_host(machine))
1075 return PERF_RECORD_MISC_USER;
1076 else {
1077 /*
1078 * Can't really happen at the moment because
1079 * cs_etm__get_machine() will always return
1080 * machines.host for any non EL1 trace.
1081 */
1082 return PERF_RECORD_MISC_GUEST_USER;
1083 }
1084 }
1085 }
1086
cs_etm__mem_access(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 address,size_t size,u8 * buffer,const ocsd_mem_space_acc_t mem_space)1087 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
1088 u64 address, size_t size, u8 *buffer,
1089 const ocsd_mem_space_acc_t mem_space)
1090 {
1091 u8 cpumode;
1092 u64 offset;
1093 int len;
1094 struct addr_location al;
1095 struct dso *dso;
1096 struct cs_etm_traceid_queue *tidq;
1097 int ret = 0;
1098
1099 if (!etmq)
1100 return 0;
1101
1102 addr_location__init(&al);
1103 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1104 if (!tidq)
1105 goto out;
1106
1107 /*
1108 * We've already tracked EL along side the PID in cs_etm__set_thread()
1109 * so double check that it matches what OpenCSD thinks as well. It
1110 * doesn't distinguish between EL0 and EL1 for this mem access callback
1111 * so we had to do the extra tracking. Skip validation if it's any of
1112 * the 'any' values.
1113 */
1114 if (!(mem_space == OCSD_MEM_SPACE_ANY ||
1115 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
1116 if (mem_space & OCSD_MEM_SPACE_EL1N) {
1117 /* Includes both non secure EL1 and EL0 */
1118 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
1119 } else if (mem_space & OCSD_MEM_SPACE_EL2)
1120 assert(tidq->el == ocsd_EL2);
1121 else if (mem_space & OCSD_MEM_SPACE_EL3)
1122 assert(tidq->el == ocsd_EL3);
1123 }
1124
1125 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
1126
1127 if (!thread__find_map(tidq->thread, cpumode, address, &al))
1128 goto out;
1129
1130 dso = map__dso(al.map);
1131 if (!dso)
1132 goto out;
1133
1134 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
1135 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
1136 goto out;
1137
1138 offset = map__map_ip(al.map, address);
1139
1140 map__load(al.map);
1141
1142 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
1143 offset, buffer, size);
1144
1145 if (len <= 0) {
1146 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1147 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1148 if (!dso__auxtrace_warned(dso)) {
1149 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1150 address,
1151 dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
1152 dso__set_auxtrace_warned(dso);
1153 }
1154 goto out;
1155 }
1156 ret = len;
1157 out:
1158 addr_location__exit(&al);
1159 return ret;
1160 }
1161
cs_etm__alloc_queue(void)1162 static struct cs_etm_queue *cs_etm__alloc_queue(void)
1163 {
1164 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
1165 if (!etmq)
1166 return NULL;
1167
1168 etmq->traceid_queues_list = intlist__new(NULL);
1169 if (!etmq->traceid_queues_list)
1170 goto out_free;
1171
1172 /*
1173 * Create an RB tree for traceID-metadata tuple. Since the conversion
1174 * has to be made for each packet that gets decoded, optimizing access
1175 * in anything other than a sequential array is worth doing.
1176 */
1177 etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
1178 if (!etmq->traceid_list)
1179 goto out_free;
1180
1181 return etmq;
1182
1183 out_free:
1184 intlist__delete(etmq->traceid_queues_list);
1185 free(etmq);
1186
1187 return NULL;
1188 }
1189
cs_etm__setup_queue(struct cs_etm_auxtrace * etm,struct auxtrace_queue * queue,unsigned int queue_nr)1190 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1191 struct auxtrace_queue *queue,
1192 unsigned int queue_nr)
1193 {
1194 struct cs_etm_queue *etmq = queue->priv;
1195
1196 if (etmq)
1197 return 0;
1198
1199 etmq = cs_etm__alloc_queue();
1200
1201 if (!etmq)
1202 return -ENOMEM;
1203
1204 queue->priv = etmq;
1205 etmq->etm = etm;
1206 etmq->queue_nr = queue_nr;
1207 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
1208 etmq->offset = 0;
1209 etmq->sink_id = SINK_UNSET;
1210
1211 return 0;
1212 }
1213
cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace * etm,struct cs_etm_queue * etmq,unsigned int queue_nr)1214 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1215 struct cs_etm_queue *etmq,
1216 unsigned int queue_nr)
1217 {
1218 int ret = 0;
1219 unsigned int cs_queue_nr;
1220 u8 trace_chan_id;
1221 u64 cs_timestamp;
1222
1223 /*
1224 * We are under a CPU-wide trace scenario. As such we need to know
1225 * when the code that generated the traces started to execute so that
1226 * it can be correlated with execution on other CPUs. So we get a
1227 * handle on the beginning of traces and decode until we find a
1228 * timestamp. The timestamp is then added to the auxtrace min heap
1229 * in order to know what nibble (of all the etmqs) to decode first.
1230 */
1231 while (1) {
1232 /*
1233 * Fetch an aux_buffer from this etmq. Bail if no more
1234 * blocks or an error has been encountered.
1235 */
1236 ret = cs_etm__get_data_block(etmq);
1237 if (ret <= 0)
1238 goto out;
1239
1240 /*
1241 * Run decoder on the trace block. The decoder will stop when
1242 * encountering a CS timestamp, a full packet queue or the end of
1243 * trace for that block.
1244 */
1245 ret = cs_etm__decode_data_block(etmq);
1246 if (ret)
1247 goto out;
1248
1249 /*
1250 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1251 * the timestamp calculation for us.
1252 */
1253 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1254
1255 /* We found a timestamp, no need to continue. */
1256 if (cs_timestamp)
1257 break;
1258
1259 /*
1260 * We didn't find a timestamp so empty all the traceid packet
1261 * queues before looking for another timestamp packet, either
1262 * in the current data block or a new one. Packets that were
1263 * just decoded are useless since no timestamp has been
1264 * associated with them. As such simply discard them.
1265 */
1266 cs_etm__clear_all_packet_queues(etmq);
1267 }
1268
1269 /*
1270 * We have a timestamp. Add it to the min heap to reflect when
1271 * instructions conveyed by the range packets of this traceID queue
1272 * started to execute. Once the same has been done for all the traceID
1273 * queues of each etmq, redenring and decoding can start in
1274 * chronological order.
1275 *
1276 * Note that packets decoded above are still in the traceID's packet
1277 * queue and will be processed in cs_etm__process_timestamped_queues().
1278 */
1279 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1280 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1281 out:
1282 return ret;
1283 }
1284
1285 static inline
cs_etm__copy_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1286 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1287 struct cs_etm_traceid_queue *tidq)
1288 {
1289 struct branch_stack *bs_src = tidq->last_branch_rb;
1290 struct branch_stack *bs_dst = tidq->last_branch;
1291 size_t nr = 0;
1292
1293 /*
1294 * Set the number of records before early exit: ->nr is used to
1295 * determine how many branches to copy from ->entries.
1296 */
1297 bs_dst->nr = bs_src->nr;
1298
1299 /*
1300 * Early exit when there is nothing to copy.
1301 */
1302 if (!bs_src->nr)
1303 return;
1304
1305 /*
1306 * As bs_src->entries is a circular buffer, we need to copy from it in
1307 * two steps. First, copy the branches from the most recently inserted
1308 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1309 */
1310 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1311 memcpy(&bs_dst->entries[0],
1312 &bs_src->entries[tidq->last_branch_pos],
1313 sizeof(struct branch_entry) * nr);
1314
1315 /*
1316 * If we wrapped around at least once, the branches from the beginning
1317 * of the bs_src->entries buffer and until the ->last_branch_pos element
1318 * are older valid branches: copy them over. The total number of
1319 * branches copied over will be equal to the number of branches asked by
1320 * the user in last_branch_sz.
1321 */
1322 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1323 memcpy(&bs_dst->entries[nr],
1324 &bs_src->entries[0],
1325 sizeof(struct branch_entry) * tidq->last_branch_pos);
1326 }
1327 }
1328
1329 static inline
cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue * tidq)1330 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1331 {
1332 tidq->last_branch_pos = 0;
1333 tidq->last_branch_rb->nr = 0;
1334 }
1335
cs_etm__t32_instr_size(struct cs_etm_queue * etmq,u8 trace_chan_id,u64 addr)1336 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1337 u8 trace_chan_id, u64 addr)
1338 {
1339 u8 instrBytes[2];
1340
1341 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1342 instrBytes, 0);
1343 /*
1344 * T32 instruction size is indicated by bits[15:11] of the first
1345 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1346 * denote a 32-bit instruction.
1347 */
1348 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1349 }
1350
cs_etm__first_executed_instr(struct cs_etm_packet * packet)1351 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1352 {
1353 /*
1354 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't
1355 * appear in samples.
1356 */
1357 if (packet->sample_type == CS_ETM_DISCONTINUITY ||
1358 packet->sample_type == CS_ETM_EXCEPTION)
1359 return 0;
1360
1361 return packet->start_addr;
1362 }
1363
1364 static inline
cs_etm__last_executed_instr(const struct cs_etm_packet * packet)1365 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1366 {
1367 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1368 if (packet->sample_type == CS_ETM_DISCONTINUITY)
1369 return 0;
1370
1371 return packet->end_addr - packet->last_instr_size;
1372 }
1373
cs_etm__instr_addr(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,u64 offset)1374 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1375 u64 trace_chan_id,
1376 const struct cs_etm_packet *packet,
1377 u64 offset)
1378 {
1379 if (packet->isa == CS_ETM_ISA_T32) {
1380 u64 addr = packet->start_addr;
1381
1382 while (offset) {
1383 addr += cs_etm__t32_instr_size(etmq,
1384 trace_chan_id, addr);
1385 offset--;
1386 }
1387 return addr;
1388 }
1389
1390 /* Assume a 4 byte instruction size (A32/A64) */
1391 return packet->start_addr + offset * 4;
1392 }
1393
cs_etm__update_last_branch_rb(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1394 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1395 struct cs_etm_traceid_queue *tidq)
1396 {
1397 struct branch_stack *bs = tidq->last_branch_rb;
1398 struct branch_entry *be;
1399
1400 /*
1401 * The branches are recorded in a circular buffer in reverse
1402 * chronological order: we start recording from the last element of the
1403 * buffer down. After writing the first element of the stack, move the
1404 * insert position back to the end of the buffer.
1405 */
1406 if (!tidq->last_branch_pos)
1407 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1408
1409 tidq->last_branch_pos -= 1;
1410
1411 be = &bs->entries[tidq->last_branch_pos];
1412 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1413 be->to = cs_etm__first_executed_instr(tidq->packet);
1414 /* No support for mispredict */
1415 be->flags.mispred = 0;
1416 be->flags.predicted = 1;
1417
1418 /*
1419 * Increment bs->nr until reaching the number of last branches asked by
1420 * the user on the command line.
1421 */
1422 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1423 bs->nr += 1;
1424 }
1425
cs_etm__inject_event(union perf_event * event,struct perf_sample * sample,u64 type)1426 static int cs_etm__inject_event(union perf_event *event,
1427 struct perf_sample *sample, u64 type)
1428 {
1429 event->header.size = perf_event__sample_event_size(sample, type, 0);
1430 return perf_event__synthesize_sample(event, type, 0, sample);
1431 }
1432
1433
1434 static int
cs_etm__get_trace(struct cs_etm_queue * etmq)1435 cs_etm__get_trace(struct cs_etm_queue *etmq)
1436 {
1437 struct auxtrace_buffer *aux_buffer = etmq->buffer;
1438 struct auxtrace_buffer *old_buffer = aux_buffer;
1439 struct auxtrace_queue *queue;
1440
1441 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1442
1443 aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1444
1445 /* If no more data, drop the previous auxtrace_buffer and return */
1446 if (!aux_buffer) {
1447 if (old_buffer)
1448 auxtrace_buffer__drop_data(old_buffer);
1449 etmq->buf_len = 0;
1450 return 0;
1451 }
1452
1453 etmq->buffer = aux_buffer;
1454
1455 /* If the aux_buffer doesn't have data associated, try to load it */
1456 if (!aux_buffer->data) {
1457 /* get the file desc associated with the perf data file */
1458 int fd = perf_data__fd(etmq->etm->session->data);
1459
1460 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1461 if (!aux_buffer->data)
1462 return -ENOMEM;
1463 }
1464
1465 /* If valid, drop the previous buffer */
1466 if (old_buffer)
1467 auxtrace_buffer__drop_data(old_buffer);
1468
1469 etmq->buf_used = 0;
1470 etmq->buf_len = aux_buffer->size;
1471 etmq->buf = aux_buffer->data;
1472
1473 return etmq->buf_len;
1474 }
1475
cs_etm__set_thread(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,pid_t tid,ocsd_ex_level el)1476 static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1477 struct cs_etm_traceid_queue *tidq, pid_t tid,
1478 ocsd_ex_level el)
1479 {
1480 struct machine *machine = cs_etm__get_machine(etmq, el);
1481
1482 if (tid != -1) {
1483 thread__zput(tidq->thread);
1484 tidq->thread = machine__find_thread(machine, -1, tid);
1485 }
1486
1487 /* Couldn't find a known thread */
1488 if (!tidq->thread)
1489 tidq->thread = machine__idle_thread(machine);
1490
1491 tidq->el = el;
1492 }
1493
cs_etm__etmq_set_tid_el(struct cs_etm_queue * etmq,pid_t tid,u8 trace_chan_id,ocsd_ex_level el)1494 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1495 u8 trace_chan_id, ocsd_ex_level el)
1496 {
1497 struct cs_etm_traceid_queue *tidq;
1498
1499 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1500 if (!tidq)
1501 return -EINVAL;
1502
1503 cs_etm__set_thread(etmq, tidq, tid, el);
1504 return 0;
1505 }
1506
cs_etm__etmq_is_timeless(struct cs_etm_queue * etmq)1507 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1508 {
1509 return !!etmq->etm->timeless_decoding;
1510 }
1511
cs_etm__copy_insn(struct cs_etm_queue * etmq,u64 trace_chan_id,const struct cs_etm_packet * packet,struct perf_sample * sample)1512 static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1513 u64 trace_chan_id,
1514 const struct cs_etm_packet *packet,
1515 struct perf_sample *sample)
1516 {
1517 /*
1518 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1519 * packet, so directly bail out with 'insn_len' = 0.
1520 */
1521 if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1522 sample->insn_len = 0;
1523 return;
1524 }
1525
1526 /*
1527 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1528 * cs_etm__t32_instr_size().
1529 */
1530 if (packet->isa == CS_ETM_ISA_T32)
1531 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1532 sample->ip);
1533 /* Otherwise, A64 and A32 instruction size are always 32-bit. */
1534 else
1535 sample->insn_len = 4;
1536
1537 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1538 (void *)sample->insn, 0);
1539 }
1540
cs_etm__convert_sample_time(struct cs_etm_queue * etmq,u64 cs_timestamp)1541 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1542 {
1543 struct cs_etm_auxtrace *etm = etmq->etm;
1544
1545 if (etm->has_virtual_ts)
1546 return tsc_to_perf_time(cs_timestamp, &etm->tc);
1547 else
1548 return cs_timestamp;
1549 }
1550
cs_etm__resolve_sample_time(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1551 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1552 struct cs_etm_traceid_queue *tidq)
1553 {
1554 struct cs_etm_auxtrace *etm = etmq->etm;
1555 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1556
1557 if (!etm->timeless_decoding && etm->has_virtual_ts)
1558 return packet_queue->cs_timestamp;
1559 else
1560 return etm->latest_kernel_timestamp;
1561 }
1562
cs_etm__synth_instruction_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 addr,u64 period)1563 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1564 struct cs_etm_traceid_queue *tidq,
1565 u64 addr, u64 period)
1566 {
1567 int ret = 0;
1568 struct cs_etm_auxtrace *etm = etmq->etm;
1569 union perf_event *event = tidq->event_buf;
1570 struct perf_sample sample;
1571
1572 perf_sample__init(&sample, /*all=*/true);
1573 event->sample.header.type = PERF_RECORD_SAMPLE;
1574 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1575 event->sample.header.size = sizeof(struct perf_event_header);
1576
1577 /* Set time field based on etm auxtrace config. */
1578 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1579
1580 sample.ip = addr;
1581 sample.pid = thread__pid(tidq->thread);
1582 sample.tid = thread__tid(tidq->thread);
1583 sample.id = etmq->etm->instructions_id;
1584 sample.stream_id = etmq->etm->instructions_id;
1585 sample.period = period;
1586 sample.cpu = tidq->packet->cpu;
1587 sample.flags = tidq->prev_packet->flags;
1588 sample.cpumode = event->sample.header.misc;
1589
1590 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1591
1592 if (etm->synth_opts.last_branch)
1593 sample.branch_stack = tidq->last_branch;
1594
1595 if (etm->synth_opts.inject) {
1596 ret = cs_etm__inject_event(event, &sample,
1597 etm->instructions_sample_type);
1598 if (ret)
1599 return ret;
1600 }
1601
1602 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1603
1604 if (ret)
1605 pr_err(
1606 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1607 ret);
1608
1609 perf_sample__exit(&sample);
1610 return ret;
1611 }
1612
1613 /*
1614 * The cs etm packet encodes an instruction range between a branch target
1615 * and the next taken branch. Generate sample accordingly.
1616 */
cs_etm__synth_branch_sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1617 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1618 struct cs_etm_traceid_queue *tidq)
1619 {
1620 int ret = 0;
1621 struct cs_etm_auxtrace *etm = etmq->etm;
1622 struct perf_sample sample = {.ip = 0,};
1623 union perf_event *event = tidq->event_buf;
1624 struct dummy_branch_stack {
1625 u64 nr;
1626 u64 hw_idx;
1627 struct branch_entry entries;
1628 } dummy_bs;
1629 u64 ip;
1630
1631 ip = cs_etm__last_executed_instr(tidq->prev_packet);
1632
1633 event->sample.header.type = PERF_RECORD_SAMPLE;
1634 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1635 tidq->prev_packet_el);
1636 event->sample.header.size = sizeof(struct perf_event_header);
1637
1638 /* Set time field based on etm auxtrace config. */
1639 sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1640
1641 sample.ip = ip;
1642 sample.pid = thread__pid(tidq->prev_packet_thread);
1643 sample.tid = thread__tid(tidq->prev_packet_thread);
1644 sample.addr = cs_etm__first_executed_instr(tidq->packet);
1645 sample.id = etmq->etm->branches_id;
1646 sample.stream_id = etmq->etm->branches_id;
1647 sample.period = 1;
1648 sample.cpu = tidq->packet->cpu;
1649 sample.flags = tidq->prev_packet->flags;
1650 sample.cpumode = event->sample.header.misc;
1651
1652 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1653 &sample);
1654
1655 /*
1656 * perf report cannot handle events without a branch stack
1657 */
1658 if (etm->synth_opts.last_branch) {
1659 dummy_bs = (struct dummy_branch_stack){
1660 .nr = 1,
1661 .hw_idx = -1ULL,
1662 .entries = {
1663 .from = sample.ip,
1664 .to = sample.addr,
1665 },
1666 };
1667 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1668 }
1669
1670 if (etm->synth_opts.inject) {
1671 ret = cs_etm__inject_event(event, &sample,
1672 etm->branches_sample_type);
1673 if (ret)
1674 return ret;
1675 }
1676
1677 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1678
1679 if (ret)
1680 pr_err(
1681 "CS ETM Trace: failed to deliver instruction event, error %d\n",
1682 ret);
1683
1684 return ret;
1685 }
1686
cs_etm__synth_events(struct cs_etm_auxtrace * etm,struct perf_session * session)1687 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1688 struct perf_session *session)
1689 {
1690 struct evlist *evlist = session->evlist;
1691 struct evsel *evsel;
1692 struct perf_event_attr attr;
1693 bool found = false;
1694 u64 id;
1695 int err;
1696
1697 evlist__for_each_entry(evlist, evsel) {
1698 if (evsel->core.attr.type == etm->pmu_type) {
1699 found = true;
1700 break;
1701 }
1702 }
1703
1704 if (!found) {
1705 pr_debug("No selected events with CoreSight Trace data\n");
1706 return 0;
1707 }
1708
1709 memset(&attr, 0, sizeof(struct perf_event_attr));
1710 attr.size = sizeof(struct perf_event_attr);
1711 attr.type = PERF_TYPE_HARDWARE;
1712 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1713 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1714 PERF_SAMPLE_PERIOD;
1715 if (etm->timeless_decoding)
1716 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1717 else
1718 attr.sample_type |= PERF_SAMPLE_TIME;
1719
1720 attr.exclude_user = evsel->core.attr.exclude_user;
1721 attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1722 attr.exclude_hv = evsel->core.attr.exclude_hv;
1723 attr.exclude_host = evsel->core.attr.exclude_host;
1724 attr.exclude_guest = evsel->core.attr.exclude_guest;
1725 attr.sample_id_all = evsel->core.attr.sample_id_all;
1726 attr.read_format = evsel->core.attr.read_format;
1727
1728 /* create new id val to be a fixed offset from evsel id */
1729 id = auxtrace_synth_id_range_start(evsel);
1730
1731 if (etm->synth_opts.branches) {
1732 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1733 attr.sample_period = 1;
1734 attr.sample_type |= PERF_SAMPLE_ADDR;
1735 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1736 if (err)
1737 return err;
1738 etm->branches_sample_type = attr.sample_type;
1739 etm->branches_id = id;
1740 id += 1;
1741 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1742 }
1743
1744 if (etm->synth_opts.last_branch) {
1745 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1746 /*
1747 * We don't use the hardware index, but the sample generation
1748 * code uses the new format branch_stack with this field,
1749 * so the event attributes must indicate that it's present.
1750 */
1751 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1752 }
1753
1754 if (etm->synth_opts.instructions) {
1755 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1756 attr.sample_period = etm->synth_opts.period;
1757 etm->instructions_sample_period = attr.sample_period;
1758 err = perf_session__deliver_synth_attr_event(session, &attr, id);
1759 if (err)
1760 return err;
1761 etm->instructions_sample_type = attr.sample_type;
1762 etm->instructions_id = id;
1763 id += 1;
1764 }
1765
1766 return 0;
1767 }
1768
cs_etm__sample(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1769 static int cs_etm__sample(struct cs_etm_queue *etmq,
1770 struct cs_etm_traceid_queue *tidq)
1771 {
1772 struct cs_etm_auxtrace *etm = etmq->etm;
1773 int ret;
1774 u8 trace_chan_id = tidq->trace_chan_id;
1775 u64 instrs_prev;
1776
1777 /* Get instructions remainder from previous packet */
1778 instrs_prev = tidq->period_instructions;
1779
1780 tidq->period_instructions += tidq->packet->instr_count;
1781
1782 /*
1783 * Record a branch when the last instruction in
1784 * PREV_PACKET is a branch.
1785 */
1786 if (etm->synth_opts.last_branch &&
1787 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1788 tidq->prev_packet->last_instr_taken_branch)
1789 cs_etm__update_last_branch_rb(etmq, tidq);
1790
1791 if (etm->synth_opts.instructions &&
1792 tidq->period_instructions >= etm->instructions_sample_period) {
1793 /*
1794 * Emit instruction sample periodically
1795 * TODO: allow period to be defined in cycles and clock time
1796 */
1797
1798 /*
1799 * Below diagram demonstrates the instruction samples
1800 * generation flows:
1801 *
1802 * Instrs Instrs Instrs Instrs
1803 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
1804 * | | | |
1805 * V V V V
1806 * --------------------------------------------------
1807 * ^ ^
1808 * | |
1809 * Period Period
1810 * instructions(Pi) instructions(Pi')
1811 *
1812 * | |
1813 * \---------------- -----------------/
1814 * V
1815 * tidq->packet->instr_count
1816 *
1817 * Instrs Sample(n...) are the synthesised samples occurring
1818 * every etm->instructions_sample_period instructions - as
1819 * defined on the perf command line. Sample(n) is being the
1820 * last sample before the current etm packet, n+1 to n+3
1821 * samples are generated from the current etm packet.
1822 *
1823 * tidq->packet->instr_count represents the number of
1824 * instructions in the current etm packet.
1825 *
1826 * Period instructions (Pi) contains the number of
1827 * instructions executed after the sample point(n) from the
1828 * previous etm packet. This will always be less than
1829 * etm->instructions_sample_period.
1830 *
1831 * When generate new samples, it combines with two parts
1832 * instructions, one is the tail of the old packet and another
1833 * is the head of the new coming packet, to generate
1834 * sample(n+1); sample(n+2) and sample(n+3) consume the
1835 * instructions with sample period. After sample(n+3), the rest
1836 * instructions will be used by later packet and it is assigned
1837 * to tidq->period_instructions for next round calculation.
1838 */
1839
1840 /*
1841 * Get the initial offset into the current packet instructions;
1842 * entry conditions ensure that instrs_prev is less than
1843 * etm->instructions_sample_period.
1844 */
1845 u64 offset = etm->instructions_sample_period - instrs_prev;
1846 u64 addr;
1847
1848 /* Prepare last branches for instruction sample */
1849 if (etm->synth_opts.last_branch)
1850 cs_etm__copy_last_branch_rb(etmq, tidq);
1851
1852 while (tidq->period_instructions >=
1853 etm->instructions_sample_period) {
1854 /*
1855 * Calculate the address of the sampled instruction (-1
1856 * as sample is reported as though instruction has just
1857 * been executed, but PC has not advanced to next
1858 * instruction)
1859 */
1860 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1861 tidq->packet, offset - 1);
1862 ret = cs_etm__synth_instruction_sample(
1863 etmq, tidq, addr,
1864 etm->instructions_sample_period);
1865 if (ret)
1866 return ret;
1867
1868 offset += etm->instructions_sample_period;
1869 tidq->period_instructions -=
1870 etm->instructions_sample_period;
1871 }
1872 }
1873
1874 if (etm->synth_opts.branches) {
1875 bool generate_sample = false;
1876
1877 /* Generate sample for tracing on packet */
1878 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1879 generate_sample = true;
1880
1881 /* Generate sample for branch taken packet */
1882 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1883 tidq->prev_packet->last_instr_taken_branch)
1884 generate_sample = true;
1885
1886 if (generate_sample) {
1887 ret = cs_etm__synth_branch_sample(etmq, tidq);
1888 if (ret)
1889 return ret;
1890 }
1891 }
1892
1893 cs_etm__packet_swap(etm, tidq);
1894
1895 return 0;
1896 }
1897
cs_etm__exception(struct cs_etm_traceid_queue * tidq)1898 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1899 {
1900 /*
1901 * When the exception packet is inserted, whether the last instruction
1902 * in previous range packet is taken branch or not, we need to force
1903 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
1904 * to generate branch sample for the instruction range before the
1905 * exception is trapped to kernel or before the exception returning.
1906 *
1907 * The exception packet includes the dummy address values, so don't
1908 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1909 * for generating instruction and branch samples.
1910 */
1911 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1912 tidq->prev_packet->last_instr_taken_branch = true;
1913
1914 return 0;
1915 }
1916
cs_etm__flush(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1917 static int cs_etm__flush(struct cs_etm_queue *etmq,
1918 struct cs_etm_traceid_queue *tidq)
1919 {
1920 int err = 0;
1921 struct cs_etm_auxtrace *etm = etmq->etm;
1922
1923 /* Handle start tracing packet */
1924 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1925 goto swap_packet;
1926
1927 if (etmq->etm->synth_opts.last_branch &&
1928 etmq->etm->synth_opts.instructions &&
1929 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1930 u64 addr;
1931
1932 /* Prepare last branches for instruction sample */
1933 cs_etm__copy_last_branch_rb(etmq, tidq);
1934
1935 /*
1936 * Generate a last branch event for the branches left in the
1937 * circular buffer at the end of the trace.
1938 *
1939 * Use the address of the end of the last reported execution
1940 * range
1941 */
1942 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1943
1944 err = cs_etm__synth_instruction_sample(
1945 etmq, tidq, addr,
1946 tidq->period_instructions);
1947 if (err)
1948 return err;
1949
1950 tidq->period_instructions = 0;
1951
1952 }
1953
1954 if (etm->synth_opts.branches &&
1955 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1956 err = cs_etm__synth_branch_sample(etmq, tidq);
1957 if (err)
1958 return err;
1959 }
1960
1961 swap_packet:
1962 cs_etm__packet_swap(etm, tidq);
1963
1964 /* Reset last branches after flush the trace */
1965 if (etm->synth_opts.last_branch)
1966 cs_etm__reset_last_branch_rb(tidq);
1967
1968 return err;
1969 }
1970
cs_etm__end_block(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)1971 static int cs_etm__end_block(struct cs_etm_queue *etmq,
1972 struct cs_etm_traceid_queue *tidq)
1973 {
1974 int err;
1975
1976 /*
1977 * It has no new packet coming and 'etmq->packet' contains the stale
1978 * packet which was set at the previous time with packets swapping;
1979 * so skip to generate branch sample to avoid stale packet.
1980 *
1981 * For this case only flush branch stack and generate a last branch
1982 * event for the branches left in the circular buffer at the end of
1983 * the trace.
1984 */
1985 if (etmq->etm->synth_opts.last_branch &&
1986 etmq->etm->synth_opts.instructions &&
1987 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1988 u64 addr;
1989
1990 /* Prepare last branches for instruction sample */
1991 cs_etm__copy_last_branch_rb(etmq, tidq);
1992
1993 /*
1994 * Use the address of the end of the last reported execution
1995 * range.
1996 */
1997 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1998
1999 err = cs_etm__synth_instruction_sample(
2000 etmq, tidq, addr,
2001 tidq->period_instructions);
2002 if (err)
2003 return err;
2004
2005 tidq->period_instructions = 0;
2006 }
2007
2008 return 0;
2009 }
2010 /*
2011 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
2012 * if need be.
2013 * Returns: < 0 if error
2014 * = 0 if no more auxtrace_buffer to read
2015 * > 0 if the current buffer isn't empty yet
2016 */
cs_etm__get_data_block(struct cs_etm_queue * etmq)2017 static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
2018 {
2019 int ret;
2020
2021 if (!etmq->buf_len) {
2022 ret = cs_etm__get_trace(etmq);
2023 if (ret <= 0)
2024 return ret;
2025 /*
2026 * We cannot assume consecutive blocks in the data file
2027 * are contiguous, reset the decoder to force re-sync.
2028 */
2029 ret = cs_etm_decoder__reset(etmq->decoder);
2030 if (ret)
2031 return ret;
2032 }
2033
2034 return etmq->buf_len;
2035 }
2036
cs_etm__is_svc_instr(struct cs_etm_queue * etmq,u8 trace_chan_id,struct cs_etm_packet * packet,u64 end_addr)2037 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
2038 struct cs_etm_packet *packet,
2039 u64 end_addr)
2040 {
2041 /* Initialise to keep compiler happy */
2042 u16 instr16 = 0;
2043 u32 instr32 = 0;
2044 u64 addr;
2045
2046 switch (packet->isa) {
2047 case CS_ETM_ISA_T32:
2048 /*
2049 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
2050 *
2051 * b'15 b'8
2052 * +-----------------+--------+
2053 * | 1 1 0 1 1 1 1 1 | imm8 |
2054 * +-----------------+--------+
2055 *
2056 * According to the specification, it only defines SVC for T32
2057 * with 16 bits instruction and has no definition for 32bits;
2058 * so below only read 2 bytes as instruction size for T32.
2059 */
2060 addr = end_addr - 2;
2061 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
2062 (u8 *)&instr16, 0);
2063 if ((instr16 & 0xFF00) == 0xDF00)
2064 return true;
2065
2066 break;
2067 case CS_ETM_ISA_A32:
2068 /*
2069 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
2070 *
2071 * b'31 b'28 b'27 b'24
2072 * +---------+---------+-------------------------+
2073 * | !1111 | 1 1 1 1 | imm24 |
2074 * +---------+---------+-------------------------+
2075 */
2076 addr = end_addr - 4;
2077 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2078 (u8 *)&instr32, 0);
2079 if ((instr32 & 0x0F000000) == 0x0F000000 &&
2080 (instr32 & 0xF0000000) != 0xF0000000)
2081 return true;
2082
2083 break;
2084 case CS_ETM_ISA_A64:
2085 /*
2086 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2087 *
2088 * b'31 b'21 b'4 b'0
2089 * +-----------------------+---------+-----------+
2090 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
2091 * +-----------------------+---------+-----------+
2092 */
2093 addr = end_addr - 4;
2094 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2095 (u8 *)&instr32, 0);
2096 if ((instr32 & 0xFFE0001F) == 0xd4000001)
2097 return true;
2098
2099 break;
2100 case CS_ETM_ISA_UNKNOWN:
2101 default:
2102 break;
2103 }
2104
2105 return false;
2106 }
2107
cs_etm__is_syscall(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2108 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2109 struct cs_etm_traceid_queue *tidq, u64 magic)
2110 {
2111 u8 trace_chan_id = tidq->trace_chan_id;
2112 struct cs_etm_packet *packet = tidq->packet;
2113 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2114
2115 if (magic == __perf_cs_etmv3_magic)
2116 if (packet->exception_number == CS_ETMV3_EXC_SVC)
2117 return true;
2118
2119 /*
2120 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2121 * HVC cases; need to check if it's SVC instruction based on
2122 * packet address.
2123 */
2124 if (magic == __perf_cs_etmv4_magic) {
2125 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2126 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2127 prev_packet->end_addr))
2128 return true;
2129 }
2130
2131 return false;
2132 }
2133
cs_etm__is_async_exception(struct cs_etm_traceid_queue * tidq,u64 magic)2134 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2135 u64 magic)
2136 {
2137 struct cs_etm_packet *packet = tidq->packet;
2138
2139 if (magic == __perf_cs_etmv3_magic)
2140 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2141 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2142 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2143 packet->exception_number == CS_ETMV3_EXC_IRQ ||
2144 packet->exception_number == CS_ETMV3_EXC_FIQ)
2145 return true;
2146
2147 if (magic == __perf_cs_etmv4_magic)
2148 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2149 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2150 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2151 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2152 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2153 packet->exception_number == CS_ETMV4_EXC_IRQ ||
2154 packet->exception_number == CS_ETMV4_EXC_FIQ)
2155 return true;
2156
2157 return false;
2158 }
2159
cs_etm__is_sync_exception(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq,u64 magic)2160 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2161 struct cs_etm_traceid_queue *tidq,
2162 u64 magic)
2163 {
2164 u8 trace_chan_id = tidq->trace_chan_id;
2165 struct cs_etm_packet *packet = tidq->packet;
2166 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2167
2168 if (magic == __perf_cs_etmv3_magic)
2169 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2170 packet->exception_number == CS_ETMV3_EXC_HYP ||
2171 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2172 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2173 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2174 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2175 packet->exception_number == CS_ETMV3_EXC_GENERIC)
2176 return true;
2177
2178 if (magic == __perf_cs_etmv4_magic) {
2179 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2180 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2181 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2182 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2183 return true;
2184
2185 /*
2186 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2187 * (SMC, HVC) are taken as sync exceptions.
2188 */
2189 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2190 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2191 prev_packet->end_addr))
2192 return true;
2193
2194 /*
2195 * ETMv4 has 5 bits for exception number; if the numbers
2196 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2197 * they are implementation defined exceptions.
2198 *
2199 * For this case, simply take it as sync exception.
2200 */
2201 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2202 packet->exception_number <= CS_ETMV4_EXC_END)
2203 return true;
2204 }
2205
2206 return false;
2207 }
2208
cs_etm__set_sample_flags(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2209 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2210 struct cs_etm_traceid_queue *tidq)
2211 {
2212 struct cs_etm_packet *packet = tidq->packet;
2213 struct cs_etm_packet *prev_packet = tidq->prev_packet;
2214 u8 trace_chan_id = tidq->trace_chan_id;
2215 u64 magic;
2216 int ret;
2217
2218 switch (packet->sample_type) {
2219 case CS_ETM_RANGE:
2220 /*
2221 * Immediate branch instruction without neither link nor
2222 * return flag, it's normal branch instruction within
2223 * the function.
2224 */
2225 if (packet->last_instr_type == OCSD_INSTR_BR &&
2226 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2227 packet->flags = PERF_IP_FLAG_BRANCH;
2228
2229 if (packet->last_instr_cond)
2230 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2231 }
2232
2233 /*
2234 * Immediate branch instruction with link (e.g. BL), this is
2235 * branch instruction for function call.
2236 */
2237 if (packet->last_instr_type == OCSD_INSTR_BR &&
2238 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2239 packet->flags = PERF_IP_FLAG_BRANCH |
2240 PERF_IP_FLAG_CALL;
2241
2242 /*
2243 * Indirect branch instruction with link (e.g. BLR), this is
2244 * branch instruction for function call.
2245 */
2246 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2247 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2248 packet->flags = PERF_IP_FLAG_BRANCH |
2249 PERF_IP_FLAG_CALL;
2250
2251 /*
2252 * Indirect branch instruction with subtype of
2253 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2254 * function return for A32/T32.
2255 */
2256 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2257 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2258 packet->flags = PERF_IP_FLAG_BRANCH |
2259 PERF_IP_FLAG_RETURN;
2260
2261 /*
2262 * Indirect branch instruction without link (e.g. BR), usually
2263 * this is used for function return, especially for functions
2264 * within dynamic link lib.
2265 */
2266 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2267 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2268 packet->flags = PERF_IP_FLAG_BRANCH |
2269 PERF_IP_FLAG_RETURN;
2270
2271 /* Return instruction for function return. */
2272 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2273 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2274 packet->flags = PERF_IP_FLAG_BRANCH |
2275 PERF_IP_FLAG_RETURN;
2276
2277 /*
2278 * Decoder might insert a discontinuity in the middle of
2279 * instruction packets, fixup prev_packet with flag
2280 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2281 */
2282 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2283 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2284 PERF_IP_FLAG_TRACE_BEGIN;
2285
2286 /*
2287 * If the previous packet is an exception return packet
2288 * and the return address just follows SVC instruction,
2289 * it needs to calibrate the previous packet sample flags
2290 * as PERF_IP_FLAG_SYSCALLRET.
2291 */
2292 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2293 PERF_IP_FLAG_RETURN |
2294 PERF_IP_FLAG_INTERRUPT) &&
2295 cs_etm__is_svc_instr(etmq, trace_chan_id,
2296 packet, packet->start_addr))
2297 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2298 PERF_IP_FLAG_RETURN |
2299 PERF_IP_FLAG_SYSCALLRET;
2300 break;
2301 case CS_ETM_DISCONTINUITY:
2302 /*
2303 * The trace is discontinuous, if the previous packet is
2304 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2305 * for previous packet.
2306 */
2307 if (prev_packet->sample_type == CS_ETM_RANGE)
2308 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2309 PERF_IP_FLAG_TRACE_END;
2310 break;
2311 case CS_ETM_EXCEPTION:
2312 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
2313 if (ret)
2314 return ret;
2315
2316 /* The exception is for system call. */
2317 if (cs_etm__is_syscall(etmq, tidq, magic))
2318 packet->flags = PERF_IP_FLAG_BRANCH |
2319 PERF_IP_FLAG_CALL |
2320 PERF_IP_FLAG_SYSCALLRET;
2321 /*
2322 * The exceptions are triggered by external signals from bus,
2323 * interrupt controller, debug module, PE reset or halt.
2324 */
2325 else if (cs_etm__is_async_exception(tidq, magic))
2326 packet->flags = PERF_IP_FLAG_BRANCH |
2327 PERF_IP_FLAG_CALL |
2328 PERF_IP_FLAG_ASYNC |
2329 PERF_IP_FLAG_INTERRUPT;
2330 /*
2331 * Otherwise, exception is caused by trap, instruction &
2332 * data fault, or alignment errors.
2333 */
2334 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2335 packet->flags = PERF_IP_FLAG_BRANCH |
2336 PERF_IP_FLAG_CALL |
2337 PERF_IP_FLAG_INTERRUPT;
2338
2339 /*
2340 * When the exception packet is inserted, since exception
2341 * packet is not used standalone for generating samples
2342 * and it's affiliation to the previous instruction range
2343 * packet; so set previous range packet flags to tell perf
2344 * it is an exception taken branch.
2345 */
2346 if (prev_packet->sample_type == CS_ETM_RANGE)
2347 prev_packet->flags = packet->flags;
2348 break;
2349 case CS_ETM_EXCEPTION_RET:
2350 /*
2351 * When the exception return packet is inserted, since
2352 * exception return packet is not used standalone for
2353 * generating samples and it's affiliation to the previous
2354 * instruction range packet; so set previous range packet
2355 * flags to tell perf it is an exception return branch.
2356 *
2357 * The exception return can be for either system call or
2358 * other exception types; unfortunately the packet doesn't
2359 * contain exception type related info so we cannot decide
2360 * the exception type purely based on exception return packet.
2361 * If we record the exception number from exception packet and
2362 * reuse it for exception return packet, this is not reliable
2363 * due the trace can be discontinuity or the interrupt can
2364 * be nested, thus the recorded exception number cannot be
2365 * used for exception return packet for these two cases.
2366 *
2367 * For exception return packet, we only need to distinguish the
2368 * packet is for system call or for other types. Thus the
2369 * decision can be deferred when receive the next packet which
2370 * contains the return address, based on the return address we
2371 * can read out the previous instruction and check if it's a
2372 * system call instruction and then calibrate the sample flag
2373 * as needed.
2374 */
2375 if (prev_packet->sample_type == CS_ETM_RANGE)
2376 prev_packet->flags = PERF_IP_FLAG_BRANCH |
2377 PERF_IP_FLAG_RETURN |
2378 PERF_IP_FLAG_INTERRUPT;
2379 break;
2380 case CS_ETM_EMPTY:
2381 default:
2382 break;
2383 }
2384
2385 return 0;
2386 }
2387
cs_etm__decode_data_block(struct cs_etm_queue * etmq)2388 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2389 {
2390 int ret = 0;
2391 size_t processed = 0;
2392
2393 /*
2394 * Packets are decoded and added to the decoder's packet queue
2395 * until the decoder packet processing callback has requested that
2396 * processing stops or there is nothing left in the buffer. Normal
2397 * operations that stop processing are a timestamp packet or a full
2398 * decoder buffer queue.
2399 */
2400 ret = cs_etm_decoder__process_data_block(etmq->decoder,
2401 etmq->offset,
2402 &etmq->buf[etmq->buf_used],
2403 etmq->buf_len,
2404 &processed);
2405 if (ret)
2406 goto out;
2407
2408 etmq->offset += processed;
2409 etmq->buf_used += processed;
2410 etmq->buf_len -= processed;
2411
2412 out:
2413 return ret;
2414 }
2415
cs_etm__process_traceid_queue(struct cs_etm_queue * etmq,struct cs_etm_traceid_queue * tidq)2416 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2417 struct cs_etm_traceid_queue *tidq)
2418 {
2419 int ret;
2420 struct cs_etm_packet_queue *packet_queue;
2421
2422 packet_queue = &tidq->packet_queue;
2423
2424 /* Process each packet in this chunk */
2425 while (1) {
2426 ret = cs_etm_decoder__get_packet(packet_queue,
2427 tidq->packet);
2428 if (ret <= 0)
2429 /*
2430 * Stop processing this chunk on
2431 * end of data or error
2432 */
2433 break;
2434
2435 /*
2436 * Since packet addresses are swapped in packet
2437 * handling within below switch() statements,
2438 * thus setting sample flags must be called
2439 * prior to switch() statement to use address
2440 * information before packets swapping.
2441 */
2442 ret = cs_etm__set_sample_flags(etmq, tidq);
2443 if (ret < 0)
2444 break;
2445
2446 switch (tidq->packet->sample_type) {
2447 case CS_ETM_RANGE:
2448 /*
2449 * If the packet contains an instruction
2450 * range, generate instruction sequence
2451 * events.
2452 */
2453 cs_etm__sample(etmq, tidq);
2454 break;
2455 case CS_ETM_EXCEPTION:
2456 case CS_ETM_EXCEPTION_RET:
2457 /*
2458 * If the exception packet is coming,
2459 * make sure the previous instruction
2460 * range packet to be handled properly.
2461 */
2462 cs_etm__exception(tidq);
2463 break;
2464 case CS_ETM_DISCONTINUITY:
2465 /*
2466 * Discontinuity in trace, flush
2467 * previous branch stack
2468 */
2469 cs_etm__flush(etmq, tidq);
2470 break;
2471 case CS_ETM_EMPTY:
2472 /*
2473 * Should not receive empty packet,
2474 * report error.
2475 */
2476 pr_err("CS ETM Trace: empty packet\n");
2477 return -EINVAL;
2478 default:
2479 break;
2480 }
2481 }
2482
2483 return ret;
2484 }
2485
cs_etm__clear_all_traceid_queues(struct cs_etm_queue * etmq)2486 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2487 {
2488 int idx;
2489 struct int_node *inode;
2490 struct cs_etm_traceid_queue *tidq;
2491 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2492
2493 intlist__for_each_entry(inode, traceid_queues_list) {
2494 idx = (int)(intptr_t)inode->priv;
2495 tidq = etmq->traceid_queues[idx];
2496
2497 /* Ignore return value */
2498 cs_etm__process_traceid_queue(etmq, tidq);
2499 }
2500 }
2501
cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue * etmq)2502 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2503 {
2504 int err = 0;
2505 struct cs_etm_traceid_queue *tidq;
2506
2507 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2508 if (!tidq)
2509 return -EINVAL;
2510
2511 /* Go through each buffer in the queue and decode them one by one */
2512 while (1) {
2513 err = cs_etm__get_data_block(etmq);
2514 if (err <= 0)
2515 return err;
2516
2517 /* Run trace decoder until buffer consumed or end of trace */
2518 do {
2519 err = cs_etm__decode_data_block(etmq);
2520 if (err)
2521 return err;
2522
2523 /*
2524 * Process each packet in this chunk, nothing to do if
2525 * an error occurs other than hoping the next one will
2526 * be better.
2527 */
2528 err = cs_etm__process_traceid_queue(etmq, tidq);
2529
2530 } while (etmq->buf_len);
2531
2532 if (err == 0)
2533 /* Flush any remaining branch stack entries */
2534 err = cs_etm__end_block(etmq, tidq);
2535 }
2536
2537 return err;
2538 }
2539
cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue * etmq)2540 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2541 {
2542 int idx, err = 0;
2543 struct cs_etm_traceid_queue *tidq;
2544 struct int_node *inode;
2545
2546 /* Go through each buffer in the queue and decode them one by one */
2547 while (1) {
2548 err = cs_etm__get_data_block(etmq);
2549 if (err <= 0)
2550 return err;
2551
2552 /* Run trace decoder until buffer consumed or end of trace */
2553 do {
2554 err = cs_etm__decode_data_block(etmq);
2555 if (err)
2556 return err;
2557
2558 /*
2559 * cs_etm__run_per_thread_timeless_decoder() runs on a
2560 * single traceID queue because each TID has a separate
2561 * buffer. But here in per-cpu mode we need to iterate
2562 * over each channel instead.
2563 */
2564 intlist__for_each_entry(inode,
2565 etmq->traceid_queues_list) {
2566 idx = (int)(intptr_t)inode->priv;
2567 tidq = etmq->traceid_queues[idx];
2568 cs_etm__process_traceid_queue(etmq, tidq);
2569 }
2570 } while (etmq->buf_len);
2571
2572 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2573 idx = (int)(intptr_t)inode->priv;
2574 tidq = etmq->traceid_queues[idx];
2575 /* Flush any remaining branch stack entries */
2576 err = cs_etm__end_block(etmq, tidq);
2577 if (err)
2578 return err;
2579 }
2580 }
2581
2582 return err;
2583 }
2584
cs_etm__process_timeless_queues(struct cs_etm_auxtrace * etm,pid_t tid)2585 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2586 pid_t tid)
2587 {
2588 unsigned int i;
2589 struct auxtrace_queues *queues = &etm->queues;
2590
2591 for (i = 0; i < queues->nr_queues; i++) {
2592 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2593 struct cs_etm_queue *etmq = queue->priv;
2594 struct cs_etm_traceid_queue *tidq;
2595
2596 if (!etmq)
2597 continue;
2598
2599 if (etm->per_thread_decoding) {
2600 tidq = cs_etm__etmq_get_traceid_queue(
2601 etmq, CS_ETM_PER_THREAD_TRACEID);
2602
2603 if (!tidq)
2604 continue;
2605
2606 if (tid == -1 || thread__tid(tidq->thread) == tid)
2607 cs_etm__run_per_thread_timeless_decoder(etmq);
2608 } else
2609 cs_etm__run_per_cpu_timeless_decoder(etmq);
2610 }
2611
2612 return 0;
2613 }
2614
cs_etm__process_timestamped_queues(struct cs_etm_auxtrace * etm)2615 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2616 {
2617 int ret = 0;
2618 unsigned int cs_queue_nr, queue_nr, i;
2619 u8 trace_chan_id;
2620 u64 cs_timestamp;
2621 struct auxtrace_queue *queue;
2622 struct cs_etm_queue *etmq;
2623 struct cs_etm_traceid_queue *tidq;
2624
2625 /*
2626 * Pre-populate the heap with one entry from each queue so that we can
2627 * start processing in time order across all queues.
2628 */
2629 for (i = 0; i < etm->queues.nr_queues; i++) {
2630 etmq = etm->queues.queue_array[i].priv;
2631 if (!etmq)
2632 continue;
2633
2634 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2635 if (ret)
2636 return ret;
2637 }
2638
2639 while (1) {
2640 if (!etm->heap.heap_cnt)
2641 break;
2642
2643 /* Take the entry at the top of the min heap */
2644 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2645 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2646 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2647 queue = &etm->queues.queue_array[queue_nr];
2648 etmq = queue->priv;
2649
2650 /*
2651 * Remove the top entry from the heap since we are about
2652 * to process it.
2653 */
2654 auxtrace_heap__pop(&etm->heap);
2655
2656 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2657 if (!tidq) {
2658 /*
2659 * No traceID queue has been allocated for this traceID,
2660 * which means something somewhere went very wrong. No
2661 * other choice than simply exit.
2662 */
2663 ret = -EINVAL;
2664 goto out;
2665 }
2666
2667 /*
2668 * Packets associated with this timestamp are already in
2669 * the etmq's traceID queue, so process them.
2670 */
2671 ret = cs_etm__process_traceid_queue(etmq, tidq);
2672 if (ret < 0)
2673 goto out;
2674
2675 /*
2676 * Packets for this timestamp have been processed, time to
2677 * move on to the next timestamp, fetching a new auxtrace_buffer
2678 * if need be.
2679 */
2680 refetch:
2681 ret = cs_etm__get_data_block(etmq);
2682 if (ret < 0)
2683 goto out;
2684
2685 /*
2686 * No more auxtrace_buffers to process in this etmq, simply
2687 * move on to another entry in the auxtrace_heap.
2688 */
2689 if (!ret)
2690 continue;
2691
2692 ret = cs_etm__decode_data_block(etmq);
2693 if (ret)
2694 goto out;
2695
2696 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2697
2698 if (!cs_timestamp) {
2699 /*
2700 * Function cs_etm__decode_data_block() returns when
2701 * there is no more traces to decode in the current
2702 * auxtrace_buffer OR when a timestamp has been
2703 * encountered on any of the traceID queues. Since we
2704 * did not get a timestamp, there is no more traces to
2705 * process in this auxtrace_buffer. As such empty and
2706 * flush all traceID queues.
2707 */
2708 cs_etm__clear_all_traceid_queues(etmq);
2709
2710 /* Fetch another auxtrace_buffer for this etmq */
2711 goto refetch;
2712 }
2713
2714 /*
2715 * Add to the min heap the timestamp for packets that have
2716 * just been decoded. They will be processed and synthesized
2717 * during the next call to cs_etm__process_traceid_queue() for
2718 * this queue/traceID.
2719 */
2720 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2721 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2722 }
2723
2724 for (i = 0; i < etm->queues.nr_queues; i++) {
2725 struct int_node *inode;
2726
2727 etmq = etm->queues.queue_array[i].priv;
2728 if (!etmq)
2729 continue;
2730
2731 intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2732 int idx = (int)(intptr_t)inode->priv;
2733
2734 /* Flush any remaining branch stack entries */
2735 tidq = etmq->traceid_queues[idx];
2736 ret = cs_etm__end_block(etmq, tidq);
2737 if (ret)
2738 return ret;
2739 }
2740 }
2741 out:
2742 return ret;
2743 }
2744
cs_etm__process_itrace_start(struct cs_etm_auxtrace * etm,union perf_event * event)2745 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2746 union perf_event *event)
2747 {
2748 struct thread *th;
2749
2750 if (etm->timeless_decoding)
2751 return 0;
2752
2753 /*
2754 * Add the tid/pid to the log so that we can get a match when we get a
2755 * contextID from the decoder. Only track for the host: only kernel
2756 * trace is supported for guests which wouldn't need pids so this should
2757 * be fine.
2758 */
2759 th = machine__findnew_thread(&etm->session->machines.host,
2760 event->itrace_start.pid,
2761 event->itrace_start.tid);
2762 if (!th)
2763 return -ENOMEM;
2764
2765 thread__put(th);
2766
2767 return 0;
2768 }
2769
cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace * etm,union perf_event * event)2770 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2771 union perf_event *event)
2772 {
2773 struct thread *th;
2774 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2775
2776 /*
2777 * Context switch in per-thread mode are irrelevant since perf
2778 * will start/stop tracing as the process is scheduled.
2779 */
2780 if (etm->timeless_decoding)
2781 return 0;
2782
2783 /*
2784 * SWITCH_IN events carry the next process to be switched out while
2785 * SWITCH_OUT events carry the process to be switched in. As such
2786 * we don't care about IN events.
2787 */
2788 if (!out)
2789 return 0;
2790
2791 /*
2792 * Add the tid/pid to the log so that we can get a match when we get a
2793 * contextID from the decoder. Only track for the host: only kernel
2794 * trace is supported for guests which wouldn't need pids so this should
2795 * be fine.
2796 */
2797 th = machine__findnew_thread(&etm->session->machines.host,
2798 event->context_switch.next_prev_pid,
2799 event->context_switch.next_prev_tid);
2800 if (!th)
2801 return -ENOMEM;
2802
2803 thread__put(th);
2804
2805 return 0;
2806 }
2807
cs_etm__process_event(struct perf_session * session,union perf_event * event,struct perf_sample * sample,const struct perf_tool * tool)2808 static int cs_etm__process_event(struct perf_session *session,
2809 union perf_event *event,
2810 struct perf_sample *sample,
2811 const struct perf_tool *tool)
2812 {
2813 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2814 struct cs_etm_auxtrace,
2815 auxtrace);
2816
2817 if (dump_trace)
2818 return 0;
2819
2820 if (!tool->ordered_events) {
2821 pr_err("CoreSight ETM Trace requires ordered events\n");
2822 return -EINVAL;
2823 }
2824
2825 switch (event->header.type) {
2826 case PERF_RECORD_EXIT:
2827 /*
2828 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2829 * start the decode because we know there will be no more trace from
2830 * this thread. All this does is emit samples earlier than waiting for
2831 * the flush in other modes, but with timestamps it makes sense to wait
2832 * for flush so that events from different threads are interleaved
2833 * properly.
2834 */
2835 if (etm->per_thread_decoding && etm->timeless_decoding)
2836 return cs_etm__process_timeless_queues(etm,
2837 event->fork.tid);
2838 break;
2839
2840 case PERF_RECORD_ITRACE_START:
2841 return cs_etm__process_itrace_start(etm, event);
2842
2843 case PERF_RECORD_SWITCH_CPU_WIDE:
2844 return cs_etm__process_switch_cpu_wide(etm, event);
2845
2846 case PERF_RECORD_AUX:
2847 /*
2848 * Record the latest kernel timestamp available in the header
2849 * for samples so that synthesised samples occur from this point
2850 * onwards.
2851 */
2852 if (sample->time && (sample->time != (u64)-1))
2853 etm->latest_kernel_timestamp = sample->time;
2854 break;
2855
2856 default:
2857 break;
2858 }
2859
2860 return 0;
2861 }
2862
dump_queued_data(struct cs_etm_auxtrace * etm,struct perf_record_auxtrace * event)2863 static void dump_queued_data(struct cs_etm_auxtrace *etm,
2864 struct perf_record_auxtrace *event)
2865 {
2866 struct auxtrace_buffer *buf;
2867 unsigned int i;
2868 /*
2869 * Find all buffers with same reference in the queues and dump them.
2870 * This is because the queues can contain multiple entries of the same
2871 * buffer that were split on aux records.
2872 */
2873 for (i = 0; i < etm->queues.nr_queues; ++i)
2874 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2875 if (buf->reference == event->reference)
2876 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2877 }
2878
cs_etm__process_auxtrace_event(struct perf_session * session,union perf_event * event,const struct perf_tool * tool __maybe_unused)2879 static int cs_etm__process_auxtrace_event(struct perf_session *session,
2880 union perf_event *event,
2881 const struct perf_tool *tool __maybe_unused)
2882 {
2883 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2884 struct cs_etm_auxtrace,
2885 auxtrace);
2886 if (!etm->data_queued) {
2887 struct auxtrace_buffer *buffer;
2888 off_t data_offset;
2889 int fd = perf_data__fd(session->data);
2890 bool is_pipe = perf_data__is_pipe(session->data);
2891 int err;
2892 int idx = event->auxtrace.idx;
2893
2894 if (is_pipe)
2895 data_offset = 0;
2896 else {
2897 data_offset = lseek(fd, 0, SEEK_CUR);
2898 if (data_offset == -1)
2899 return -errno;
2900 }
2901
2902 err = auxtrace_queues__add_event(&etm->queues, session,
2903 event, data_offset, &buffer);
2904 if (err)
2905 return err;
2906
2907 if (dump_trace)
2908 if (auxtrace_buffer__get_data(buffer, fd)) {
2909 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2910 auxtrace_buffer__put_data(buffer);
2911 }
2912 } else if (dump_trace)
2913 dump_queued_data(etm, &event->auxtrace);
2914
2915 return 0;
2916 }
2917
cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace * etm)2918 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2919 {
2920 struct evsel *evsel;
2921 struct evlist *evlist = etm->session->evlist;
2922
2923 /* Override timeless mode with user input from --itrace=Z */
2924 if (etm->synth_opts.timeless_decoding) {
2925 etm->timeless_decoding = true;
2926 return 0;
2927 }
2928
2929 /*
2930 * Find the cs_etm evsel and look at what its timestamp setting was
2931 */
2932 evlist__for_each_entry(evlist, evsel)
2933 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2934 etm->timeless_decoding =
2935 !(evsel->core.attr.config & BIT(ETM_OPT_TS));
2936 return 0;
2937 }
2938
2939 pr_err("CS ETM: Couldn't find ETM evsel\n");
2940 return -EINVAL;
2941 }
2942
2943 /*
2944 * Read a single cpu parameter block from the auxtrace_info priv block.
2945 *
2946 * For version 1 there is a per cpu nr_params entry. If we are handling
2947 * version 1 file, then there may be less, the same, or more params
2948 * indicated by this value than the compile time number we understand.
2949 *
2950 * For a version 0 info block, there are a fixed number, and we need to
2951 * fill out the nr_param value in the metadata we create.
2952 */
cs_etm__create_meta_blk(u64 * buff_in,int * buff_in_offset,int out_blk_size,int nr_params_v0)2953 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2954 int out_blk_size, int nr_params_v0)
2955 {
2956 u64 *metadata = NULL;
2957 int hdr_version;
2958 int nr_in_params, nr_out_params, nr_cmn_params;
2959 int i, k;
2960
2961 metadata = zalloc(sizeof(*metadata) * out_blk_size);
2962 if (!metadata)
2963 return NULL;
2964
2965 /* read block current index & version */
2966 i = *buff_in_offset;
2967 hdr_version = buff_in[CS_HEADER_VERSION];
2968
2969 if (!hdr_version) {
2970 /* read version 0 info block into a version 1 metadata block */
2971 nr_in_params = nr_params_v0;
2972 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2973 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2974 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2975 /* remaining block params at offset +1 from source */
2976 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2977 metadata[k + 1] = buff_in[i + k];
2978 /* version 0 has 2 common params */
2979 nr_cmn_params = 2;
2980 } else {
2981 /* read version 1 info block - input and output nr_params may differ */
2982 /* version 1 has 3 common params */
2983 nr_cmn_params = 3;
2984 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2985
2986 /* if input has more params than output - skip excess */
2987 nr_out_params = nr_in_params + nr_cmn_params;
2988 if (nr_out_params > out_blk_size)
2989 nr_out_params = out_blk_size;
2990
2991 for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2992 metadata[k] = buff_in[i + k];
2993
2994 /* record the actual nr params we copied */
2995 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2996 }
2997
2998 /* adjust in offset by number of in params used */
2999 i += nr_in_params + nr_cmn_params;
3000 *buff_in_offset = i;
3001 return metadata;
3002 }
3003
3004 /**
3005 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
3006 * on the bounds of aux_event, if it matches with the buffer that's at
3007 * file_offset.
3008 *
3009 * Normally, whole auxtrace buffers would be added to the queue. But we
3010 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
3011 * is reset across each buffer, so splitting the buffers up in advance has
3012 * the same effect.
3013 */
cs_etm__queue_aux_fragment(struct perf_session * session,off_t file_offset,size_t sz,struct perf_record_aux * aux_event,struct perf_sample * sample)3014 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
3015 struct perf_record_aux *aux_event, struct perf_sample *sample)
3016 {
3017 int err;
3018 char buf[PERF_SAMPLE_MAX_SIZE];
3019 union perf_event *auxtrace_event_union;
3020 struct perf_record_auxtrace *auxtrace_event;
3021 union perf_event auxtrace_fragment;
3022 __u64 aux_offset, aux_size;
3023 enum cs_etm_format format;
3024
3025 struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
3026 struct cs_etm_auxtrace,
3027 auxtrace);
3028
3029 /*
3030 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
3031 * from looping through the auxtrace index.
3032 */
3033 err = perf_session__peek_event(session, file_offset, buf,
3034 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
3035 if (err)
3036 return err;
3037 auxtrace_event = &auxtrace_event_union->auxtrace;
3038 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
3039 return -EINVAL;
3040
3041 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
3042 auxtrace_event->header.size != sz) {
3043 return -EINVAL;
3044 }
3045
3046 /*
3047 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
3048 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
3049 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
3050 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
3051 * Return 'not found' if mismatch.
3052 */
3053 if (auxtrace_event->cpu == (__u32) -1) {
3054 etm->per_thread_decoding = true;
3055 if (auxtrace_event->tid != sample->tid)
3056 return 1;
3057 } else if (auxtrace_event->cpu != sample->cpu) {
3058 if (etm->per_thread_decoding) {
3059 /*
3060 * Found a per-cpu buffer after a per-thread one was
3061 * already found
3062 */
3063 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
3064 return -EINVAL;
3065 }
3066 return 1;
3067 }
3068
3069 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
3070 /*
3071 * Clamp size in snapshot mode. The buffer size is clamped in
3072 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
3073 * the buffer size.
3074 */
3075 aux_size = min(aux_event->aux_size, auxtrace_event->size);
3076
3077 /*
3078 * In this mode, the head also points to the end of the buffer so aux_offset
3079 * needs to have the size subtracted so it points to the beginning as in normal mode
3080 */
3081 aux_offset = aux_event->aux_offset - aux_size;
3082 } else {
3083 aux_size = aux_event->aux_size;
3084 aux_offset = aux_event->aux_offset;
3085 }
3086
3087 if (aux_offset >= auxtrace_event->offset &&
3088 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3089 struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
3090
3091 /*
3092 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3093 * based on the sizes of the aux event, and queue that fragment.
3094 */
3095 auxtrace_fragment.auxtrace = *auxtrace_event;
3096 auxtrace_fragment.auxtrace.size = aux_size;
3097 auxtrace_fragment.auxtrace.offset = aux_offset;
3098 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3099
3100 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3101 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3102 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3103 file_offset, NULL);
3104 if (err)
3105 return err;
3106
3107 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
3108 UNFORMATTED : FORMATTED;
3109 if (etmq->format != UNSET && format != etmq->format) {
3110 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
3111 return -EINVAL;
3112 }
3113 etmq->format = format;
3114 return 0;
3115 }
3116
3117 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3118 return 1;
3119 }
3120
cs_etm__process_aux_hw_id_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3121 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3122 u64 offset __maybe_unused, void *data __maybe_unused)
3123 {
3124 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3125 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3126 (*(int *)data)++; /* increment found count */
3127 return cs_etm__process_aux_output_hw_id(session, event);
3128 }
3129 return 0;
3130 }
3131
cs_etm__queue_aux_records_cb(struct perf_session * session,union perf_event * event,u64 offset __maybe_unused,void * data __maybe_unused)3132 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3133 u64 offset __maybe_unused, void *data __maybe_unused)
3134 {
3135 struct perf_sample sample;
3136 int ret;
3137 struct auxtrace_index_entry *ent;
3138 struct auxtrace_index *auxtrace_index;
3139 struct evsel *evsel;
3140 size_t i;
3141
3142 /* Don't care about any other events, we're only queuing buffers for AUX events */
3143 if (event->header.type != PERF_RECORD_AUX)
3144 return 0;
3145
3146 if (event->header.size < sizeof(struct perf_record_aux))
3147 return -EINVAL;
3148
3149 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3150 if (!event->aux.aux_size)
3151 return 0;
3152
3153 /*
3154 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3155 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3156 */
3157 evsel = evlist__event2evsel(session->evlist, event);
3158 if (!evsel)
3159 return -EINVAL;
3160 perf_sample__init(&sample, /*all=*/false);
3161 ret = evsel__parse_sample(evsel, event, &sample);
3162 if (ret)
3163 goto out;
3164
3165 /*
3166 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3167 */
3168 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3169 for (i = 0; i < auxtrace_index->nr; i++) {
3170 ent = &auxtrace_index->entries[i];
3171 ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3172 ent->sz, &event->aux, &sample);
3173 /*
3174 * Stop search on error or successful values. Continue search on
3175 * 1 ('not found')
3176 */
3177 if (ret != 1)
3178 goto out;
3179 }
3180 }
3181
3182 /*
3183 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3184 * don't exit with an error because it will still be possible to decode other aux records.
3185 */
3186 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3187 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3188 ret = 0;
3189 out:
3190 perf_sample__exit(&sample);
3191 return ret;
3192 }
3193
cs_etm__queue_aux_records(struct perf_session * session)3194 static int cs_etm__queue_aux_records(struct perf_session *session)
3195 {
3196 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3197 struct auxtrace_index, list);
3198 if (index && index->nr > 0)
3199 return perf_session__peek_events(session, session->header.data_offset,
3200 session->header.data_size,
3201 cs_etm__queue_aux_records_cb, NULL);
3202
3203 /*
3204 * We would get here if there are no entries in the index (either no auxtrace
3205 * buffers or no index at all). Fail silently as there is the possibility of
3206 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3207 * false.
3208 *
3209 * In that scenario, buffers will not be split by AUX records.
3210 */
3211 return 0;
3212 }
3213
3214 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3215 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3216
3217 /*
3218 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3219 * timestamps).
3220 */
cs_etm__has_virtual_ts(u64 ** metadata,int num_cpu)3221 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3222 {
3223 int j;
3224
3225 for (j = 0; j < num_cpu; j++) {
3226 switch (metadata[j][CS_ETM_MAGIC]) {
3227 case __perf_cs_etmv4_magic:
3228 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3229 return false;
3230 break;
3231 case __perf_cs_ete_magic:
3232 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3233 return false;
3234 break;
3235 default:
3236 /* Unknown / unsupported magic number. */
3237 return false;
3238 }
3239 }
3240 return true;
3241 }
3242
3243 /* map trace ids to correct metadata block, from information in metadata */
cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace * etm,int num_cpu,u64 ** metadata)3244 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
3245 u64 **metadata)
3246 {
3247 u64 cs_etm_magic;
3248 u8 trace_chan_id;
3249 int i, err;
3250
3251 for (i = 0; i < num_cpu; i++) {
3252 cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3253 switch (cs_etm_magic) {
3254 case __perf_cs_etmv3_magic:
3255 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3256 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3257 break;
3258 case __perf_cs_etmv4_magic:
3259 case __perf_cs_ete_magic:
3260 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3261 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3262 break;
3263 default:
3264 /* unknown magic number */
3265 return -EINVAL;
3266 }
3267 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
3268 if (err)
3269 return err;
3270 }
3271 return 0;
3272 }
3273
3274 /*
3275 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
3276 * (formatted or not) packets to create the decoders.
3277 */
cs_etm__create_queue_decoders(struct cs_etm_queue * etmq)3278 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
3279 {
3280 struct cs_etm_decoder_params d_params;
3281 struct cs_etm_trace_params *t_params;
3282 int decoders = intlist__nr_entries(etmq->traceid_list);
3283
3284 if (decoders == 0)
3285 return 0;
3286
3287 /*
3288 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
3289 * needed.
3290 */
3291 if (etmq->format == UNFORMATTED)
3292 assert(decoders == 1);
3293
3294 /* Use metadata to fill in trace parameters for trace decoder */
3295 t_params = zalloc(sizeof(*t_params) * decoders);
3296
3297 if (!t_params)
3298 goto out_free;
3299
3300 if (cs_etm__init_trace_params(t_params, etmq))
3301 goto out_free;
3302
3303 /* Set decoder parameters to decode trace packets */
3304 if (cs_etm__init_decoder_params(&d_params, etmq,
3305 dump_trace ? CS_ETM_OPERATION_PRINT :
3306 CS_ETM_OPERATION_DECODE))
3307 goto out_free;
3308
3309 etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
3310 t_params);
3311
3312 if (!etmq->decoder)
3313 goto out_free;
3314
3315 /*
3316 * Register a function to handle all memory accesses required by
3317 * the trace decoder library.
3318 */
3319 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
3320 0x0L, ((u64) -1L),
3321 cs_etm__mem_access))
3322 goto out_free_decoder;
3323
3324 zfree(&t_params);
3325 return 0;
3326
3327 out_free_decoder:
3328 cs_etm_decoder__free(etmq->decoder);
3329 out_free:
3330 zfree(&t_params);
3331 return -EINVAL;
3332 }
3333
cs_etm__create_decoders(struct cs_etm_auxtrace * etm)3334 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
3335 {
3336 struct auxtrace_queues *queues = &etm->queues;
3337
3338 for (unsigned int i = 0; i < queues->nr_queues; i++) {
3339 bool empty = list_empty(&queues->queue_array[i].head);
3340 struct cs_etm_queue *etmq = queues->queue_array[i].priv;
3341 int ret;
3342
3343 /*
3344 * Don't create decoders for empty queues, mainly because
3345 * etmq->format is unknown for empty queues.
3346 */
3347 assert(empty || etmq->format != UNSET);
3348 if (empty)
3349 continue;
3350
3351 ret = cs_etm__create_queue_decoders(etmq);
3352 if (ret)
3353 return ret;
3354 }
3355 return 0;
3356 }
3357
cs_etm__process_auxtrace_info_full(union perf_event * event,struct perf_session * session)3358 int cs_etm__process_auxtrace_info_full(union perf_event *event,
3359 struct perf_session *session)
3360 {
3361 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3362 struct cs_etm_auxtrace *etm = NULL;
3363 struct perf_record_time_conv *tc = &session->time_conv;
3364 int event_header_size = sizeof(struct perf_event_header);
3365 int total_size = auxtrace_info->header.size;
3366 int priv_size = 0;
3367 int num_cpu, max_cpu = 0;
3368 int err = 0;
3369 int aux_hw_id_found;
3370 int i;
3371 u64 *ptr = NULL;
3372 u64 **metadata = NULL;
3373
3374 /* First the global part */
3375 ptr = (u64 *) auxtrace_info->priv;
3376 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3377 metadata = zalloc(sizeof(*metadata) * num_cpu);
3378 if (!metadata)
3379 return -ENOMEM;
3380
3381 /* Start parsing after the common part of the header */
3382 i = CS_HEADER_VERSION_MAX;
3383
3384 /*
3385 * The metadata is stored in the auxtrace_info section and encodes
3386 * the configuration of the ARM embedded trace macrocell which is
3387 * required by the trace decoder to properly decode the trace due
3388 * to its highly compressed nature.
3389 */
3390 for (int j = 0; j < num_cpu; j++) {
3391 if (ptr[i] == __perf_cs_etmv3_magic) {
3392 metadata[j] =
3393 cs_etm__create_meta_blk(ptr, &i,
3394 CS_ETM_PRIV_MAX,
3395 CS_ETM_NR_TRC_PARAMS_V0);
3396 } else if (ptr[i] == __perf_cs_etmv4_magic) {
3397 metadata[j] =
3398 cs_etm__create_meta_blk(ptr, &i,
3399 CS_ETMV4_PRIV_MAX,
3400 CS_ETMV4_NR_TRC_PARAMS_V0);
3401 } else if (ptr[i] == __perf_cs_ete_magic) {
3402 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3403 } else {
3404 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3405 ptr[i]);
3406 err = -EINVAL;
3407 goto err_free_metadata;
3408 }
3409
3410 if (!metadata[j]) {
3411 err = -ENOMEM;
3412 goto err_free_metadata;
3413 }
3414
3415 if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
3416 max_cpu = metadata[j][CS_ETM_CPU];
3417 }
3418
3419 /*
3420 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3421 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3422 * global metadata, and each cpu's metadata respectively.
3423 * The following tests if the correct number of double words was
3424 * present in the auxtrace info section.
3425 */
3426 priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3427 if (i * 8 != priv_size) {
3428 err = -EINVAL;
3429 goto err_free_metadata;
3430 }
3431
3432 etm = zalloc(sizeof(*etm));
3433
3434 if (!etm) {
3435 err = -ENOMEM;
3436 goto err_free_metadata;
3437 }
3438
3439 /*
3440 * As all the ETMs run at the same exception level, the system should
3441 * have the same PID format crossing CPUs. So cache the PID format
3442 * and reuse it for sequential decoding.
3443 */
3444 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3445
3446 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
3447 if (err)
3448 goto err_free_etm;
3449
3450 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
3451 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
3452 if (err)
3453 goto err_free_queues;
3454 }
3455
3456 if (session->itrace_synth_opts->set) {
3457 etm->synth_opts = *session->itrace_synth_opts;
3458 } else {
3459 itrace_synth_opts__set_default(&etm->synth_opts,
3460 session->itrace_synth_opts->default_no_sample);
3461 etm->synth_opts.callchain = false;
3462 }
3463
3464 etm->session = session;
3465
3466 etm->num_cpu = num_cpu;
3467 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3468 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3469 etm->metadata = metadata;
3470 etm->auxtrace_type = auxtrace_info->type;
3471
3472 if (etm->synth_opts.use_timestamp)
3473 /*
3474 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature,
3475 * therefore the decoder cannot know if the timestamp trace is
3476 * same with the kernel time.
3477 *
3478 * If a user has knowledge for the working platform and can
3479 * specify itrace option 'T' to tell decoder to forcely use the
3480 * traced timestamp as the kernel time.
3481 */
3482 etm->has_virtual_ts = true;
3483 else
3484 /* Use virtual timestamps if all ETMs report ts_source = 1 */
3485 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3486
3487 if (!etm->has_virtual_ts)
3488 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3489 "The time field of the samples will not be set accurately.\n"
3490 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n"
3491 "you can specify the itrace option 'T' for timestamp decoding\n"
3492 "if the Coresight timestamp on the platform is same with the kernel time.\n\n");
3493
3494 etm->auxtrace.process_event = cs_etm__process_event;
3495 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3496 etm->auxtrace.flush_events = cs_etm__flush_events;
3497 etm->auxtrace.free_events = cs_etm__free_events;
3498 etm->auxtrace.free = cs_etm__free;
3499 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3500 session->auxtrace = &etm->auxtrace;
3501
3502 err = cs_etm__setup_timeless_decoding(etm);
3503 if (err)
3504 return err;
3505
3506 etm->tc.time_shift = tc->time_shift;
3507 etm->tc.time_mult = tc->time_mult;
3508 etm->tc.time_zero = tc->time_zero;
3509 if (event_contains(*tc, time_cycles)) {
3510 etm->tc.time_cycles = tc->time_cycles;
3511 etm->tc.time_mask = tc->time_mask;
3512 etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3513 etm->tc.cap_user_time_short = tc->cap_user_time_short;
3514 }
3515 err = cs_etm__synth_events(etm, session);
3516 if (err)
3517 goto err_free_queues;
3518
3519 err = cs_etm__queue_aux_records(session);
3520 if (err)
3521 goto err_free_queues;
3522
3523 /*
3524 * Map Trace ID values to CPU metadata.
3525 *
3526 * Trace metadata will always contain Trace ID values from the legacy algorithm
3527 * in case it's read by a version of Perf that doesn't know about HW_ID packets
3528 * or the kernel doesn't emit them.
3529 *
3530 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3531 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3532 * in which case a different value will be used. This means an older perf may still
3533 * be able to record and read files generate on a newer system.
3534 *
3535 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3536 * those packets. If they are there then the values will be mapped and plugged into
3537 * the metadata and decoders are only created for each mapping received.
3538 *
3539 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3540 * then we map Trace ID values to CPU directly from the metadata and create decoders
3541 * for all mappings.
3542 */
3543
3544 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3545 aux_hw_id_found = 0;
3546 err = perf_session__peek_events(session, session->header.data_offset,
3547 session->header.data_size,
3548 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3549 if (err)
3550 goto err_free_queues;
3551
3552 /* if no HW ID found this is a file with metadata values only, map from metadata */
3553 if (!aux_hw_id_found) {
3554 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
3555 if (err)
3556 goto err_free_queues;
3557 }
3558
3559 err = cs_etm__create_decoders(etm);
3560 if (err)
3561 goto err_free_queues;
3562
3563 etm->data_queued = etm->queues.populated;
3564 return 0;
3565
3566 err_free_queues:
3567 auxtrace_queues__free(&etm->queues);
3568 session->auxtrace = NULL;
3569 err_free_etm:
3570 zfree(&etm);
3571 err_free_metadata:
3572 /* No need to check @metadata[j], free(NULL) is supported */
3573 for (int j = 0; j < num_cpu; j++)
3574 zfree(&metadata[j]);
3575 zfree(&metadata);
3576 return err;
3577 }
3578