1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <limits.h> 10 #include <linux/bitfield.h> 11 #include <linux/bitops.h> 12 #include <linux/coresight-pmu.h> 13 #include <linux/err.h> 14 #include <linux/log2.h> 15 #include <linux/types.h> 16 #include <linux/zalloc.h> 17 18 #include <stdlib.h> 19 20 #include "auxtrace.h" 21 #include "color.h" 22 #include "cs-etm.h" 23 #include "cs-etm-decoder/cs-etm-decoder.h" 24 #include "debug.h" 25 #include "dso.h" 26 #include "evlist.h" 27 #include "intlist.h" 28 #include "machine.h" 29 #include "map.h" 30 #include "perf.h" 31 #include "session.h" 32 #include "map_symbol.h" 33 #include "branch.h" 34 #include "symbol.h" 35 #include "tool.h" 36 #include "thread.h" 37 #include "thread-stack.h" 38 #include "tsc.h" 39 #include <tools/libc_compat.h> 40 #include "util/synthetic-events.h" 41 #include "util/util.h" 42 43 struct cs_etm_auxtrace { 44 struct auxtrace auxtrace; 45 struct auxtrace_queues queues; 46 struct auxtrace_heap heap; 47 struct itrace_synth_opts synth_opts; 48 struct perf_session *session; 49 struct perf_tsc_conversion tc; 50 51 /* 52 * Timeless has no timestamps in the trace so overlapping mmap lookups 53 * are less accurate but produces smaller trace data. We use context IDs 54 * in the trace instead of matching timestamps with fork records so 55 * they're not really needed in the general case. Overlapping mmaps 56 * happen in cases like between a fork and an exec. 57 */ 58 bool timeless_decoding; 59 60 /* 61 * Per-thread ignores the trace channel ID and instead assumes that 62 * everything in a buffer comes from the same process regardless of 63 * which CPU it ran on. It also implies no context IDs so the TID is 64 * taken from the auxtrace buffer. 65 */ 66 bool per_thread_decoding; 67 bool snapshot_mode; 68 bool data_queued; 69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ 70 71 int num_cpu; 72 u64 latest_kernel_timestamp; 73 u32 auxtrace_type; 74 u64 branches_sample_type; 75 u64 branches_id; 76 u64 instructions_sample_type; 77 u64 instructions_sample_period; 78 u64 instructions_id; 79 u64 **metadata; 80 unsigned int pmu_type; 81 enum cs_etm_pid_fmt pid_fmt; 82 }; 83 84 struct cs_etm_traceid_queue { 85 u8 trace_chan_id; 86 u64 period_instructions; 87 size_t last_branch_pos; 88 union perf_event *event_buf; 89 struct branch_stack *last_branch; 90 struct branch_stack *last_branch_rb; 91 struct cs_etm_packet *prev_packet; 92 struct cs_etm_packet *packet; 93 struct cs_etm_packet_queue packet_queue; 94 95 struct thread *decode_thread; 96 ocsd_ex_level decode_el; 97 98 /* 99 * The frontend accesses the EL from '[prev_]packet' because it needs 100 * previous EL for branch and current EL for instruction samples. It's 101 * not possible to change thread in a single branch sample so no need to 102 * store or access the thread through the packet. 103 */ 104 struct thread *frontend_thread; 105 }; 106 107 enum cs_etm_format { 108 UNSET, 109 FORMATTED, 110 UNFORMATTED 111 }; 112 113 struct cs_etm_queue { 114 struct cs_etm_auxtrace *etm; 115 struct cs_etm_decoder *decoder; 116 struct auxtrace_buffer *buffer; 117 unsigned int queue_nr; 118 u8 pending_timestamp_chan_id; 119 enum cs_etm_format format; 120 u64 offset; 121 const unsigned char *buf; 122 size_t buf_len, buf_used; 123 /* Conversion between traceID and index in traceid_queues array */ 124 struct intlist *traceid_queues_list; 125 struct cs_etm_traceid_queue **traceid_queues; 126 /* Conversion between traceID and metadata pointers */ 127 struct intlist *traceid_list; 128 /* 129 * Same as traceid_list, but traceid_list may be a reference to another 130 * queue's which has a matching sink ID. 131 */ 132 struct intlist *own_traceid_list; 133 u32 sink_id; 134 }; 135 136 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); 137 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 138 pid_t tid); 139 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 140 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 141 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); 142 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); 143 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); 144 145 /* PTMs ETMIDR [11:8] set to b0011 */ 146 #define ETMIDR_PTM_VERSION 0x00000300 147 148 /* 149 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 150 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 151 * encode the etm queue number as the upper 16 bit and the channel as 152 * the lower 16 bit. 153 */ 154 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ 155 (queue_nr << 16 | trace_chan_id) 156 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 157 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 158 #define SINK_UNSET ((u32) -1) 159 160 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 161 { 162 etmidr &= ETMIDR_PTM_VERSION; 163 164 if (etmidr == ETMIDR_PTM_VERSION) 165 return CS_ETM_PROTO_PTM; 166 167 return CS_ETM_PROTO_ETMV3; 168 } 169 170 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) 171 { 172 struct int_node *inode; 173 u64 *metadata; 174 175 inode = intlist__find(etmq->traceid_list, trace_chan_id); 176 if (!inode) 177 return -EINVAL; 178 179 metadata = inode->priv; 180 *magic = metadata[CS_ETM_MAGIC]; 181 return 0; 182 } 183 184 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) 185 { 186 struct int_node *inode; 187 u64 *metadata; 188 189 inode = intlist__find(etmq->traceid_list, trace_chan_id); 190 if (!inode) 191 return -EINVAL; 192 193 metadata = inode->priv; 194 *cpu = (int)metadata[CS_ETM_CPU]; 195 return 0; 196 } 197 198 /* 199 * The returned PID format is presented as an enum: 200 * 201 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. 202 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. 203 * CS_ETM_PIDFMT_NONE: No context IDs 204 * 205 * It's possible that the two format attributes 'contextid1' and 'contextid2' 206 * are enabled at the same time when the session runs on an EL2 kernel. 207 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 208 * recorded in the trace data, the tool will selectively use 209 * CONTEXTIDR_EL2 as PID. 210 * 211 * The result is cached in etm->pid_fmt so this function only needs to be called 212 * when processing the aux info. 213 */ 214 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) 215 { 216 u64 val; 217 218 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 219 val = metadata[CS_ETM_ETMCR]; 220 /* CONTEXTIDR is traced */ 221 if (val & ETMCR_CTXTID) 222 return CS_ETM_PIDFMT_CTXTID; 223 } else { 224 val = metadata[CS_ETMV4_TRCCONFIGR]; 225 /* CONTEXTIDR_EL2 is traced */ 226 if (val & (TRCCONFIGR_VMID | TRCCONFIGR_VMIDOPT)) 227 return CS_ETM_PIDFMT_CTXTID2; 228 /* CONTEXTIDR_EL1 is traced */ 229 else if (val & TRCCONFIGR_CID) 230 return CS_ETM_PIDFMT_CTXTID; 231 } 232 233 return CS_ETM_PIDFMT_NONE; 234 } 235 236 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) 237 { 238 return etmq->etm->pid_fmt; 239 } 240 241 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, 242 u8 trace_chan_id, u64 *cpu_metadata) 243 { 244 /* Get an RB node for this CPU */ 245 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); 246 247 /* Something went wrong, no need to continue */ 248 if (!inode) 249 return -ENOMEM; 250 251 /* Disallow re-mapping a different traceID to metadata pair. */ 252 if (inode->priv) { 253 u64 *curr_cpu_data = inode->priv; 254 u8 curr_chan_id; 255 int err; 256 257 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { 258 /* 259 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs 260 * are expected (but not supported) in per-thread mode, 261 * rather than signifying an error. 262 */ 263 if (etmq->etm->per_thread_decoding) 264 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); 265 else 266 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); 267 268 return -EINVAL; 269 } 270 271 /* check that the mapped ID matches */ 272 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); 273 if (err) 274 return err; 275 276 if (curr_chan_id != trace_chan_id) { 277 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); 278 return -EINVAL; 279 } 280 281 /* Skip re-adding the same mappings if everything matched */ 282 return 0; 283 } 284 285 /* Not one we've seen before, associate the traceID with the metadata pointer */ 286 inode->priv = cpu_metadata; 287 288 return 0; 289 } 290 291 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) 292 { 293 if (etm->per_thread_decoding) 294 return etm->queues.queue_array[0].priv; 295 296 if (cpu < 0 || cpu >= (int)etm->queues.nr_queues) 297 return NULL; 298 299 return etm->queues.queue_array[cpu].priv; 300 } 301 302 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, 303 u64 *cpu_metadata) 304 { 305 struct cs_etm_queue *etmq; 306 307 /* 308 * If the queue is unformatted then only save one mapping in the 309 * queue associated with that CPU so only one decoder is made. 310 */ 311 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); 312 if (!etmq) 313 return -EINVAL; 314 315 if (etmq->format == UNFORMATTED) 316 return cs_etm__insert_trace_id_node(etmq, trace_chan_id, 317 cpu_metadata); 318 319 /* 320 * Otherwise, version 0 trace IDs are global so save them into every 321 * queue. 322 */ 323 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 324 int ret; 325 326 etmq = etm->queues.queue_array[i].priv; 327 if (!etmq) 328 continue; 329 330 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, 331 cpu_metadata); 332 if (ret) 333 return ret; 334 } 335 336 return 0; 337 } 338 339 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, 340 u64 hw_id) 341 { 342 int err; 343 u64 *cpu_data; 344 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 345 346 cpu_data = get_cpu_data(etm, cpu); 347 if (cpu_data == NULL) 348 return -EINVAL; 349 350 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); 351 if (err) 352 return err; 353 354 /* 355 * if we are picking up the association from the packet, need to plug 356 * the correct trace ID into the metadata for setting up decoders later. 357 */ 358 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); 359 } 360 361 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, 362 u64 hw_id) 363 { 364 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); 365 int ret; 366 u64 *cpu_data; 367 u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); 368 u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 369 370 if (!etmq) 371 return -EINVAL; 372 373 /* 374 * Check sink id hasn't changed in per-cpu mode. In per-thread mode, 375 * let it pass for now until an actual overlapping trace ID is hit. In 376 * most cases IDs won't overlap even if the sink changes. 377 */ 378 if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && 379 etmq->sink_id != sink_id) { 380 pr_err("CS_ETM: mismatch between sink IDs\n"); 381 return -EINVAL; 382 } 383 384 etmq->sink_id = sink_id; 385 386 /* Find which other queues use this sink and link their ID maps */ 387 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 388 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; 389 390 if (!other_etmq) 391 continue; 392 393 /* Different sinks, skip */ 394 if (other_etmq->sink_id != etmq->sink_id) 395 continue; 396 397 /* Already linked, skip */ 398 if (other_etmq->traceid_list == etmq->traceid_list) 399 continue; 400 401 /* At the point of first linking, this one should be empty */ 402 if (!intlist__empty(etmq->traceid_list)) { 403 pr_err("CS_ETM: Can't link populated trace ID lists\n"); 404 return -EINVAL; 405 } 406 407 etmq->own_traceid_list = NULL; 408 intlist__delete(etmq->traceid_list); 409 etmq->traceid_list = other_etmq->traceid_list; 410 break; 411 } 412 413 cpu_data = get_cpu_data(etm, cpu); 414 if (!cpu_data) 415 return -EINVAL; 416 417 ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); 418 if (ret) 419 return ret; 420 421 ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); 422 if (ret) 423 return ret; 424 425 return 0; 426 } 427 428 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) 429 { 430 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 431 432 switch (cs_etm_magic) { 433 case __perf_cs_etmv3_magic: 434 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & 435 CORESIGHT_TRACE_ID_VAL_MASK); 436 break; 437 case __perf_cs_etmv4_magic: 438 case __perf_cs_ete_magic: 439 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & 440 CORESIGHT_TRACE_ID_VAL_MASK); 441 break; 442 default: 443 return -EINVAL; 444 } 445 return 0; 446 } 447 448 /* 449 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 450 */ 451 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 452 { 453 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 454 455 switch (cs_etm_magic) { 456 case __perf_cs_etmv3_magic: 457 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; 458 break; 459 case __perf_cs_etmv4_magic: 460 case __perf_cs_ete_magic: 461 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; 462 break; 463 464 default: 465 return -EINVAL; 466 } 467 return 0; 468 } 469 470 /* 471 * Get a metadata index for a specific cpu from an array. 472 * 473 */ 474 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) 475 { 476 int i; 477 478 for (i = 0; i < etm->num_cpu; i++) { 479 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { 480 return i; 481 } 482 } 483 484 return -1; 485 } 486 487 /* 488 * Get a metadata for a specific cpu from an array. 489 * 490 */ 491 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) 492 { 493 int idx = get_cpu_data_idx(etm, cpu); 494 495 return (idx != -1) ? etm->metadata[idx] : NULL; 496 } 497 498 /* 499 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. 500 * 501 * The payload associates the Trace ID and the CPU. 502 * The routine is tolerant of seeing multiple packets with the same association, 503 * but a CPU / Trace ID association changing during a session is an error. 504 */ 505 static int cs_etm__process_aux_output_hw_id(struct perf_session *session, 506 union perf_event *event) 507 { 508 struct cs_etm_auxtrace *etm; 509 struct perf_sample sample; 510 struct evsel *evsel; 511 u64 hw_id; 512 int cpu, version, err; 513 514 /* extract and parse the HW ID */ 515 hw_id = event->aux_output_hw_id.hw_id; 516 version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); 517 518 /* check that we can handle this version */ 519 if (version > CS_AUX_HW_ID_MAJOR_VERSION) { 520 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", 521 version); 522 return -EINVAL; 523 } 524 525 /* get access to the etm metadata */ 526 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); 527 if (!etm || !etm->metadata) 528 return -EINVAL; 529 530 /* parse the sample to get the CPU */ 531 evsel = evlist__event2evsel(session->evlist, event); 532 if (!evsel) 533 return -EINVAL; 534 perf_sample__init(&sample, /*all=*/false); 535 err = evsel__parse_sample(evsel, event, &sample); 536 if (err) 537 goto out; 538 cpu = sample.cpu; 539 if (cpu == -1) { 540 /* no CPU in the sample - possibly recorded with an old version of perf */ 541 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); 542 err = -EINVAL; 543 goto out; 544 } 545 546 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) { 547 err = cs_etm__process_trace_id_v0(etm, cpu, hw_id); 548 goto out; 549 } 550 551 err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); 552 out: 553 perf_sample__exit(&sample); 554 return err; 555 } 556 557 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 558 u8 trace_chan_id) 559 { 560 /* 561 * When a timestamp packet is encountered the backend code 562 * is stopped so that the front end has time to process packets 563 * that were accumulated in the traceID queue. Since there can 564 * be more than one channel per cs_etm_queue, we need to specify 565 * what traceID queue needs servicing. 566 */ 567 etmq->pending_timestamp_chan_id = trace_chan_id; 568 } 569 570 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 571 u8 *trace_chan_id) 572 { 573 struct cs_etm_packet_queue *packet_queue; 574 575 if (!etmq->pending_timestamp_chan_id) 576 return 0; 577 578 if (trace_chan_id) 579 *trace_chan_id = etmq->pending_timestamp_chan_id; 580 581 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 582 etmq->pending_timestamp_chan_id); 583 if (!packet_queue) 584 return 0; 585 586 /* Acknowledge pending status */ 587 etmq->pending_timestamp_chan_id = 0; 588 589 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 590 return packet_queue->cs_timestamp; 591 } 592 593 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 594 { 595 int i; 596 597 queue->head = 0; 598 queue->tail = 0; 599 queue->packet_count = 0; 600 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 601 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 602 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 603 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 604 queue->packet_buffer[i].instr_count = 0; 605 queue->packet_buffer[i].last_instr_taken_branch = false; 606 queue->packet_buffer[i].last_instr_size = 0; 607 queue->packet_buffer[i].last_instr_type = 0; 608 queue->packet_buffer[i].last_instr_subtype = 0; 609 queue->packet_buffer[i].last_instr_cond = 0; 610 queue->packet_buffer[i].flags = 0; 611 queue->packet_buffer[i].exception_number = UINT32_MAX; 612 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 613 queue->packet_buffer[i].cpu = INT_MIN; 614 } 615 } 616 617 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 618 { 619 int idx; 620 struct int_node *inode; 621 struct cs_etm_traceid_queue *tidq; 622 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 623 624 intlist__for_each_entry(inode, traceid_queues_list) { 625 idx = (int)(intptr_t)inode->priv; 626 tidq = etmq->traceid_queues[idx]; 627 cs_etm__clear_packet_queue(&tidq->packet_queue); 628 } 629 } 630 631 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 632 struct cs_etm_traceid_queue *tidq, 633 u8 trace_chan_id) 634 { 635 int rc = -ENOMEM; 636 struct auxtrace_queue *queue; 637 struct cs_etm_auxtrace *etm = etmq->etm; 638 639 cs_etm__clear_packet_queue(&tidq->packet_queue); 640 641 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 642 tidq->trace_chan_id = trace_chan_id; 643 tidq->decode_el = ocsd_EL_unknown; 644 tidq->frontend_thread = machine__findnew_thread(&etm->session->machines.host, -1, 645 queue->tid); 646 tidq->decode_thread = machine__findnew_thread(&etm->session->machines.host, -1, 647 queue->tid); 648 649 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 650 if (!tidq->packet) 651 goto out; 652 653 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 654 if (!tidq->prev_packet) 655 goto out_free; 656 657 if (etm->synth_opts.last_branch) { 658 size_t sz = sizeof(struct branch_stack); 659 660 sz += etm->synth_opts.last_branch_sz * 661 sizeof(struct branch_entry); 662 tidq->last_branch = zalloc(sz); 663 if (!tidq->last_branch) 664 goto out_free; 665 tidq->last_branch_rb = zalloc(sz); 666 if (!tidq->last_branch_rb) 667 goto out_free; 668 } 669 670 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 671 if (!tidq->event_buf) 672 goto out_free; 673 674 return 0; 675 676 out_free: 677 zfree(&tidq->last_branch_rb); 678 zfree(&tidq->last_branch); 679 zfree(&tidq->prev_packet); 680 zfree(&tidq->packet); 681 out: 682 return rc; 683 } 684 685 static struct cs_etm_traceid_queue 686 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 687 { 688 int idx; 689 struct int_node *inode; 690 struct intlist *traceid_queues_list; 691 struct cs_etm_traceid_queue *tidq, **traceid_queues; 692 struct cs_etm_auxtrace *etm = etmq->etm; 693 694 if (etm->per_thread_decoding) 695 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 696 697 traceid_queues_list = etmq->traceid_queues_list; 698 699 /* 700 * Check if the traceid_queue exist for this traceID by looking 701 * in the queue list. 702 */ 703 inode = intlist__find(traceid_queues_list, trace_chan_id); 704 if (inode) { 705 idx = (int)(intptr_t)inode->priv; 706 return etmq->traceid_queues[idx]; 707 } 708 709 /* We couldn't find a traceid_queue for this traceID, allocate one */ 710 tidq = malloc(sizeof(*tidq)); 711 if (!tidq) 712 return NULL; 713 714 memset(tidq, 0, sizeof(*tidq)); 715 716 /* Get a valid index for the new traceid_queue */ 717 idx = intlist__nr_entries(traceid_queues_list); 718 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 719 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 720 if (!inode) 721 goto out_free; 722 723 /* Associate this traceID with this index */ 724 inode->priv = (void *)(intptr_t)idx; 725 726 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 727 goto out_free; 728 729 /* Grow the traceid_queues array by one unit */ 730 traceid_queues = etmq->traceid_queues; 731 traceid_queues = reallocarray(traceid_queues, 732 idx + 1, 733 sizeof(*traceid_queues)); 734 735 /* 736 * On failure reallocarray() returns NULL and the original block of 737 * memory is left untouched. 738 */ 739 if (!traceid_queues) 740 goto out_free; 741 742 traceid_queues[idx] = tidq; 743 etmq->traceid_queues = traceid_queues; 744 745 return etmq->traceid_queues[idx]; 746 747 out_free: 748 /* 749 * Function intlist__remove() removes the inode from the list 750 * and delete the memory associated to it. 751 */ 752 intlist__remove(traceid_queues_list, inode); 753 free(tidq); 754 755 return NULL; 756 } 757 758 struct cs_etm_packet_queue 759 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 760 { 761 struct cs_etm_traceid_queue *tidq; 762 763 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 764 if (tidq) 765 return &tidq->packet_queue; 766 767 return NULL; 768 } 769 770 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 771 struct cs_etm_traceid_queue *tidq) 772 { 773 struct cs_etm_packet *tmp; 774 775 if (etm->synth_opts.branches || etm->synth_opts.last_branch || 776 etm->synth_opts.instructions) { 777 /* 778 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 779 * the next incoming packet. 780 */ 781 tmp = tidq->packet; 782 tidq->packet = tidq->prev_packet; 783 tidq->prev_packet = tmp; 784 } 785 } 786 787 static void cs_etm__packet_dump(const char *pkt_string, void *data) 788 { 789 const char *color = PERF_COLOR_BLUE; 790 int len = strlen(pkt_string); 791 struct cs_etm_queue *etmq = data; 792 char queue_nr[64]; 793 794 if (verbose) 795 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr); 796 else 797 queue_nr[0] = '\0'; 798 799 if (len && (pkt_string[len-1] == '\n')) 800 color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string); 801 else 802 color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string); 803 804 fflush(stdout); 805 } 806 807 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 808 u64 *metadata, u32 etmidr) 809 { 810 t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); 811 t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; 812 t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; 813 } 814 815 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 816 u64 *metadata) 817 { 818 t_params->protocol = CS_ETM_PROTO_ETMV4i; 819 t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; 820 t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; 821 t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; 822 t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; 823 t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; 824 t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; 825 } 826 827 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 828 u64 *metadata) 829 { 830 t_params->protocol = CS_ETM_PROTO_ETE; 831 t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; 832 t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; 833 t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; 834 t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; 835 t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; 836 t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; 837 t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; 838 } 839 840 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 841 struct cs_etm_queue *etmq) 842 { 843 struct int_node *inode; 844 845 intlist__for_each_entry(inode, etmq->traceid_list) { 846 u64 *metadata = inode->priv; 847 u64 architecture = metadata[CS_ETM_MAGIC]; 848 u32 etmidr; 849 850 switch (architecture) { 851 case __perf_cs_etmv3_magic: 852 etmidr = metadata[CS_ETM_ETMIDR]; 853 cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); 854 break; 855 case __perf_cs_etmv4_magic: 856 cs_etm__set_trace_param_etmv4(t_params++, metadata); 857 break; 858 case __perf_cs_ete_magic: 859 cs_etm__set_trace_param_ete(t_params++, metadata); 860 break; 861 default: 862 return -EINVAL; 863 } 864 } 865 866 return 0; 867 } 868 869 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 870 struct cs_etm_queue *etmq, 871 enum cs_etm_decoder_operation mode) 872 { 873 int ret = -EINVAL; 874 875 if (!(mode < CS_ETM_OPERATION_MAX)) 876 goto out; 877 878 d_params->packet_printer = cs_etm__packet_dump; 879 d_params->operation = mode; 880 d_params->data = etmq; 881 d_params->formatted = etmq->format == FORMATTED; 882 d_params->fsyncs = false; 883 d_params->hsyncs = false; 884 d_params->frame_aligned = true; 885 886 ret = 0; 887 out: 888 return ret; 889 } 890 891 static void cs_etm__dump_event(struct cs_etm_queue *etmq, 892 struct auxtrace_buffer *buffer) 893 { 894 int ret; 895 const char *color = PERF_COLOR_BLUE; 896 size_t buffer_used = 0; 897 898 fprintf(stdout, "\n"); 899 color_fprintf(stdout, color, 900 ". ... CoreSight %s Trace data: size %#zx bytes\n", 901 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 902 903 do { 904 size_t consumed; 905 906 ret = cs_etm_decoder__process_data_block( 907 etmq->decoder, buffer->offset, 908 &((u8 *)buffer->data)[buffer_used], 909 buffer->size - buffer_used, &consumed); 910 if (ret) 911 break; 912 913 buffer_used += consumed; 914 } while (buffer_used < buffer->size); 915 916 cs_etm_decoder__reset(etmq->decoder); 917 } 918 919 static int cs_etm__flush_events(struct perf_session *session, 920 const struct perf_tool *tool) 921 { 922 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 923 struct cs_etm_auxtrace, 924 auxtrace); 925 if (dump_trace) 926 return 0; 927 928 if (!tool->ordered_events) 929 return -EINVAL; 930 931 if (etm->timeless_decoding) { 932 /* 933 * Pass tid = -1 to process all queues. But likely they will have 934 * already been processed on PERF_RECORD_EXIT anyway. 935 */ 936 return cs_etm__process_timeless_queues(etm, -1); 937 } 938 939 return cs_etm__process_timestamped_queues(etm); 940 } 941 942 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 943 { 944 int idx; 945 uintptr_t priv; 946 struct int_node *inode, *tmp; 947 struct cs_etm_traceid_queue *tidq; 948 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 949 950 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 951 priv = (uintptr_t)inode->priv; 952 idx = priv; 953 954 /* Free this traceid_queue from the array */ 955 tidq = etmq->traceid_queues[idx]; 956 thread__zput(tidq->frontend_thread); 957 thread__zput(tidq->decode_thread); 958 zfree(&tidq->event_buf); 959 zfree(&tidq->last_branch); 960 zfree(&tidq->last_branch_rb); 961 zfree(&tidq->prev_packet); 962 zfree(&tidq->packet); 963 zfree(&tidq); 964 965 /* 966 * Function intlist__remove() removes the inode from the list 967 * and delete the memory associated to it. 968 */ 969 intlist__remove(traceid_queues_list, inode); 970 } 971 972 /* Then the RB tree itself */ 973 intlist__delete(traceid_queues_list); 974 etmq->traceid_queues_list = NULL; 975 976 /* finally free the traceid_queues array */ 977 zfree(&etmq->traceid_queues); 978 } 979 980 static void cs_etm__free_queue(void *priv) 981 { 982 struct int_node *inode, *tmp; 983 struct cs_etm_queue *etmq = priv; 984 985 if (!etmq) 986 return; 987 988 cs_etm_decoder__free(etmq->decoder); 989 cs_etm__free_traceid_queues(etmq); 990 991 if (etmq->own_traceid_list) { 992 /* First remove all traceID/metadata nodes for the RB tree */ 993 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) 994 intlist__remove(etmq->own_traceid_list, inode); 995 996 /* Then the RB tree itself */ 997 intlist__delete(etmq->own_traceid_list); 998 } 999 1000 free(etmq); 1001 } 1002 1003 static void cs_etm__free_events(struct perf_session *session) 1004 { 1005 unsigned int i; 1006 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1007 struct cs_etm_auxtrace, 1008 auxtrace); 1009 struct auxtrace_queues *queues = &aux->queues; 1010 1011 for (i = 0; i < queues->nr_queues; i++) { 1012 cs_etm__free_queue(queues->queue_array[i].priv); 1013 queues->queue_array[i].priv = NULL; 1014 } 1015 1016 auxtrace_queues__free(queues); 1017 } 1018 1019 static void cs_etm__free(struct perf_session *session) 1020 { 1021 int i; 1022 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1023 struct cs_etm_auxtrace, 1024 auxtrace); 1025 cs_etm__free_events(session); 1026 session->auxtrace = NULL; 1027 1028 for (i = 0; i < aux->num_cpu; i++) 1029 zfree(&aux->metadata[i]); 1030 1031 zfree(&aux->metadata); 1032 zfree(&aux); 1033 } 1034 1035 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, 1036 struct evsel *evsel) 1037 { 1038 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1039 struct cs_etm_auxtrace, 1040 auxtrace); 1041 1042 return evsel->core.attr.type == aux->pmu_type; 1043 } 1044 1045 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, 1046 ocsd_ex_level el) 1047 { 1048 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); 1049 1050 /* 1051 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels 1052 * running at EL1 assume everything is the host. 1053 */ 1054 if (pid_fmt == CS_ETM_PIDFMT_CTXTID) 1055 return &etmq->etm->session->machines.host; 1056 1057 /* 1058 * Not perfect, but otherwise assume anything in EL1 is the default 1059 * guest, and everything else is the host. Distinguishing between guest 1060 * and host userspaces isn't currently supported either. Neither is 1061 * multiple guest support. All this does is reduce the likeliness of 1062 * decode errors where we look into the host kernel maps when it should 1063 * have been the guest maps. 1064 */ 1065 switch (el) { 1066 case ocsd_EL1: 1067 return machines__find_guest(&etmq->etm->session->machines, 1068 DEFAULT_GUEST_KERNEL_ID); 1069 case ocsd_EL3: 1070 case ocsd_EL2: 1071 case ocsd_EL0: 1072 case ocsd_EL_unknown: 1073 default: 1074 return &etmq->etm->session->machines.host; 1075 } 1076 } 1077 1078 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, 1079 ocsd_ex_level el) 1080 { 1081 struct machine *machine = cs_etm__get_machine(etmq, el); 1082 1083 if (address >= machine__kernel_start(machine)) { 1084 if (machine__is_host(machine)) 1085 return PERF_RECORD_MISC_KERNEL; 1086 else 1087 return PERF_RECORD_MISC_GUEST_KERNEL; 1088 } else { 1089 if (machine__is_host(machine)) 1090 return PERF_RECORD_MISC_USER; 1091 else { 1092 /* 1093 * Can't really happen at the moment because 1094 * cs_etm__get_machine() will always return 1095 * machines.host for any non EL1 trace. 1096 */ 1097 return PERF_RECORD_MISC_GUEST_USER; 1098 } 1099 } 1100 } 1101 1102 static u32 __cs_etm__mem_access(struct cs_etm_queue *etmq, 1103 u64 address, size_t size, u8 *buffer, 1104 const ocsd_mem_space_acc_t mem_space, 1105 ocsd_ex_level el, struct thread *thread) 1106 { 1107 u8 cpumode; 1108 u64 offset; 1109 int len; 1110 struct addr_location al; 1111 struct dso *dso; 1112 int ret = 0; 1113 1114 if (!etmq) 1115 return 0; 1116 1117 addr_location__init(&al); 1118 1119 /* 1120 * We track EL for the frontend and the backend when receiving context 1121 * and range packets. OpenCSD doesn't distinguish between EL0 and EL1 1122 * for this mem access callback so we had to do the extra tracking. Skip 1123 * validation if it's any of the 'any' values. 1124 */ 1125 if (!(mem_space == OCSD_MEM_SPACE_ANY || 1126 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { 1127 if (mem_space & OCSD_MEM_SPACE_EL1N) { 1128 /* Includes both non secure EL1 and EL0 */ 1129 assert(el == ocsd_EL1 || el == ocsd_EL0); 1130 } else if (mem_space & OCSD_MEM_SPACE_EL2) 1131 assert(el == ocsd_EL2); 1132 else if (mem_space & OCSD_MEM_SPACE_EL3) 1133 assert(el == ocsd_EL3); 1134 } 1135 1136 cpumode = cs_etm__cpu_mode(etmq, address, el); 1137 1138 if (!thread__find_map(thread, cpumode, address, &al)) 1139 goto out; 1140 1141 dso = map__dso(al.map); 1142 if (!dso) 1143 goto out; 1144 1145 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && 1146 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) 1147 goto out; 1148 1149 offset = map__map_ip(al.map, address); 1150 1151 map__load(al.map); 1152 1153 len = dso__data_read_offset(dso, maps__machine(thread__maps(thread)), 1154 offset, buffer, size); 1155 1156 if (len <= 0) { 1157 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" 1158 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); 1159 if (!dso__auxtrace_warned(dso)) { 1160 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", 1161 address, 1162 dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); 1163 dso__set_auxtrace_warned(dso); 1164 } 1165 goto out; 1166 } 1167 ret = len; 1168 out: 1169 addr_location__exit(&al); 1170 return ret; 1171 } 1172 1173 static u32 cs_etm__frontend_mem_access(struct cs_etm_queue *etmq, 1174 struct cs_etm_traceid_queue *tidq, 1175 struct cs_etm_packet *packet, 1176 u64 address, size_t size, u8 *buffer) 1177 { 1178 return __cs_etm__mem_access(etmq, address, size, buffer, 0, packet->el, 1179 tidq->frontend_thread); 1180 } 1181 1182 static u32 cs_etm__decoder_mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 1183 u64 address, size_t size, u8 *buffer, 1184 const ocsd_mem_space_acc_t mem_space) 1185 { 1186 struct cs_etm_traceid_queue *tidq; 1187 1188 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1189 if (!tidq) 1190 return 0; 1191 1192 return __cs_etm__mem_access(etmq, address, size, buffer, 1193 mem_space, tidq->decode_el, 1194 tidq->decode_thread); 1195 } 1196 1197 static struct cs_etm_queue *cs_etm__alloc_queue(void) 1198 { 1199 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); 1200 if (!etmq) 1201 return NULL; 1202 1203 etmq->traceid_queues_list = intlist__new(NULL); 1204 if (!etmq->traceid_queues_list) 1205 goto out_free; 1206 1207 /* 1208 * Create an RB tree for traceID-metadata tuple. Since the conversion 1209 * has to be made for each packet that gets decoded, optimizing access 1210 * in anything other than a sequential array is worth doing. 1211 */ 1212 etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); 1213 if (!etmq->traceid_list) 1214 goto out_free; 1215 1216 return etmq; 1217 1218 out_free: 1219 intlist__delete(etmq->traceid_queues_list); 1220 free(etmq); 1221 1222 return NULL; 1223 } 1224 1225 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 1226 struct auxtrace_queue *queue, 1227 unsigned int queue_nr) 1228 { 1229 struct cs_etm_queue *etmq = queue->priv; 1230 1231 if (etmq) 1232 return 0; 1233 1234 etmq = cs_etm__alloc_queue(); 1235 1236 if (!etmq) 1237 return -ENOMEM; 1238 1239 queue->priv = etmq; 1240 etmq->etm = etm; 1241 etmq->queue_nr = queue_nr; 1242 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ 1243 etmq->offset = 0; 1244 etmq->sink_id = SINK_UNSET; 1245 1246 return 0; 1247 } 1248 1249 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, 1250 struct cs_etm_queue *etmq, 1251 unsigned int queue_nr) 1252 { 1253 int ret = 0; 1254 unsigned int cs_queue_nr; 1255 u8 trace_chan_id; 1256 u64 cs_timestamp; 1257 1258 /* 1259 * We are under a CPU-wide trace scenario. As such we need to know 1260 * when the code that generated the traces started to execute so that 1261 * it can be correlated with execution on other CPUs. So we get a 1262 * handle on the beginning of traces and decode until we find a 1263 * timestamp. The timestamp is then added to the auxtrace min heap 1264 * in order to know what nibble (of all the etmqs) to decode first. 1265 */ 1266 while (1) { 1267 /* 1268 * Fetch an aux_buffer from this etmq. Bail if no more 1269 * blocks or an error has been encountered. 1270 */ 1271 ret = cs_etm__get_data_block(etmq); 1272 if (ret <= 0) 1273 goto out; 1274 1275 /* 1276 * Run decoder on the trace block. The decoder will stop when 1277 * encountering a CS timestamp, a full packet queue or the end of 1278 * trace for that block. 1279 */ 1280 ret = cs_etm__decode_data_block(etmq); 1281 if (ret) 1282 goto out; 1283 1284 /* 1285 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 1286 * the timestamp calculation for us. 1287 */ 1288 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1289 1290 /* We found a timestamp, no need to continue. */ 1291 if (cs_timestamp) 1292 break; 1293 1294 /* 1295 * We didn't find a timestamp so empty all the traceid packet 1296 * queues before looking for another timestamp packet, either 1297 * in the current data block or a new one. Packets that were 1298 * just decoded are useless since no timestamp has been 1299 * associated with them. As such simply discard them. 1300 */ 1301 cs_etm__clear_all_packet_queues(etmq); 1302 } 1303 1304 /* 1305 * We have a timestamp. Add it to the min heap to reflect when 1306 * instructions conveyed by the range packets of this traceID queue 1307 * started to execute. Once the same has been done for all the traceID 1308 * queues of each etmq, redenring and decoding can start in 1309 * chronological order. 1310 * 1311 * Note that packets decoded above are still in the traceID's packet 1312 * queue and will be processed in cs_etm__process_timestamped_queues(). 1313 */ 1314 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1315 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 1316 out: 1317 return ret; 1318 } 1319 1320 static inline 1321 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 1322 struct cs_etm_traceid_queue *tidq) 1323 { 1324 struct branch_stack *bs_src = tidq->last_branch_rb; 1325 struct branch_stack *bs_dst = tidq->last_branch; 1326 size_t nr = 0; 1327 1328 /* 1329 * Set the number of records before early exit: ->nr is used to 1330 * determine how many branches to copy from ->entries. 1331 */ 1332 bs_dst->nr = bs_src->nr; 1333 1334 /* 1335 * Early exit when there is nothing to copy. 1336 */ 1337 if (!bs_src->nr) 1338 return; 1339 1340 /* 1341 * As bs_src->entries is a circular buffer, we need to copy from it in 1342 * two steps. First, copy the branches from the most recently inserted 1343 * branch ->last_branch_pos until the end of bs_src->entries buffer. 1344 */ 1345 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 1346 memcpy(&bs_dst->entries[0], 1347 &bs_src->entries[tidq->last_branch_pos], 1348 sizeof(struct branch_entry) * nr); 1349 1350 /* 1351 * If we wrapped around at least once, the branches from the beginning 1352 * of the bs_src->entries buffer and until the ->last_branch_pos element 1353 * are older valid branches: copy them over. The total number of 1354 * branches copied over will be equal to the number of branches asked by 1355 * the user in last_branch_sz. 1356 */ 1357 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 1358 memcpy(&bs_dst->entries[nr], 1359 &bs_src->entries[0], 1360 sizeof(struct branch_entry) * tidq->last_branch_pos); 1361 } 1362 } 1363 1364 static inline 1365 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 1366 { 1367 tidq->last_branch_pos = 0; 1368 tidq->last_branch_rb->nr = 0; 1369 } 1370 1371 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 1372 struct cs_etm_traceid_queue *tidq, 1373 struct cs_etm_packet *packet, u64 addr) 1374 { 1375 u8 instrBytes[2]; 1376 1377 cs_etm__frontend_mem_access(etmq, tidq, packet, addr, 1378 ARRAY_SIZE(instrBytes), instrBytes); 1379 /* 1380 * T32 instruction size is indicated by bits[15:11] of the first 1381 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 1382 * denote a 32-bit instruction. 1383 */ 1384 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 1385 } 1386 1387 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 1388 { 1389 /* 1390 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't 1391 * appear in samples. 1392 */ 1393 if (packet->sample_type == CS_ETM_DISCONTINUITY || 1394 packet->sample_type == CS_ETM_EXCEPTION) 1395 return 0; 1396 1397 return packet->start_addr; 1398 } 1399 1400 static inline 1401 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 1402 { 1403 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 1404 if (packet->sample_type == CS_ETM_DISCONTINUITY) 1405 return 0; 1406 1407 return packet->end_addr - packet->last_instr_size; 1408 } 1409 1410 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 1411 struct cs_etm_traceid_queue *tidq, 1412 struct cs_etm_packet *packet, 1413 u64 offset) 1414 { 1415 if (packet->isa == CS_ETM_ISA_T32) { 1416 u64 addr = packet->start_addr; 1417 1418 while (offset) { 1419 addr += cs_etm__t32_instr_size(etmq, tidq, packet, 1420 addr); 1421 offset--; 1422 } 1423 return addr; 1424 } 1425 1426 /* Assume a 4 byte instruction size (A32/A64) */ 1427 return packet->start_addr + offset * 4; 1428 } 1429 1430 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 1431 struct cs_etm_traceid_queue *tidq) 1432 { 1433 struct branch_stack *bs = tidq->last_branch_rb; 1434 struct branch_entry *be; 1435 1436 /* 1437 * The branches are recorded in a circular buffer in reverse 1438 * chronological order: we start recording from the last element of the 1439 * buffer down. After writing the first element of the stack, move the 1440 * insert position back to the end of the buffer. 1441 */ 1442 if (!tidq->last_branch_pos) 1443 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 1444 1445 tidq->last_branch_pos -= 1; 1446 1447 be = &bs->entries[tidq->last_branch_pos]; 1448 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 1449 be->to = cs_etm__first_executed_instr(tidq->packet); 1450 /* No support for mispredict */ 1451 be->flags.mispred = 0; 1452 be->flags.predicted = 1; 1453 1454 /* 1455 * Increment bs->nr until reaching the number of last branches asked by 1456 * the user on the command line. 1457 */ 1458 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 1459 bs->nr += 1; 1460 } 1461 1462 static int cs_etm__inject_event(struct cs_etm_auxtrace *etm, union perf_event *event, 1463 struct perf_sample *sample, u64 type) 1464 { 1465 struct evsel *evsel = sample->evsel; 1466 u64 branch_sample_type = 0; 1467 size_t sz; 1468 1469 if (!evsel && etm->session && etm->session->evlist) 1470 evsel = evlist__id2evsel(etm->session->evlist, sample->id); 1471 1472 if (evsel) 1473 branch_sample_type = evsel->core.attr.branch_sample_type; 1474 1475 sz = perf_event__sample_event_size(sample, type, /*read_format=*/0, 1476 branch_sample_type); 1477 if (sz >= PERF_SAMPLE_MAX_SIZE) { 1478 pr_err("Sample size %zu exceeds max size %d\n", sz, PERF_SAMPLE_MAX_SIZE); 1479 return -EFAULT; 1480 } 1481 event->header.size = sz; 1482 1483 return perf_event__synthesize_sample(event, type, /*read_format=*/0, 1484 branch_sample_type, sample); 1485 } 1486 1487 1488 static int 1489 cs_etm__get_trace(struct cs_etm_queue *etmq) 1490 { 1491 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1492 struct auxtrace_buffer *old_buffer = aux_buffer; 1493 struct auxtrace_queue *queue; 1494 1495 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1496 1497 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1498 1499 /* If no more data, drop the previous auxtrace_buffer and return */ 1500 if (!aux_buffer) { 1501 if (old_buffer) 1502 auxtrace_buffer__drop_data(old_buffer); 1503 etmq->buf_len = 0; 1504 return 0; 1505 } 1506 1507 etmq->buffer = aux_buffer; 1508 1509 /* If the aux_buffer doesn't have data associated, try to load it */ 1510 if (!aux_buffer->data) { 1511 /* get the file desc associated with the perf data file */ 1512 int fd = perf_data__fd(etmq->etm->session->data); 1513 1514 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1515 if (!aux_buffer->data) 1516 return -ENOMEM; 1517 } 1518 1519 /* If valid, drop the previous buffer */ 1520 if (old_buffer) 1521 auxtrace_buffer__drop_data(old_buffer); 1522 1523 etmq->buf_used = 0; 1524 etmq->buf_len = aux_buffer->size; 1525 etmq->buf = aux_buffer->data; 1526 1527 return etmq->buf_len; 1528 } 1529 1530 /* 1531 * Convert a raw thread number to a thread struct and assign it to **thread. 1532 */ 1533 static int cs_etm__etmq_update_thread(struct cs_etm_queue *etmq, 1534 ocsd_ex_level el, pid_t tid, 1535 struct thread **thread) 1536 { 1537 struct machine *machine = cs_etm__get_machine(etmq, el); 1538 1539 if (!machine || !*thread) 1540 return -EINVAL; 1541 1542 if (tid != -1) { 1543 thread__zput(*thread); 1544 *thread = machine__find_thread(machine, -1, tid); 1545 } 1546 1547 /* Couldn't find a known thread */ 1548 if (!*thread) 1549 *thread = machine__idle_thread(machine); 1550 1551 return 0; 1552 } 1553 1554 /* 1555 * Set the thread and EL of the decode context which is ahead in time of the 1556 * frontend context. 1557 */ 1558 int cs_etm__etmq_update_decode_context(struct cs_etm_queue *etmq, 1559 u8 trace_chan_id, 1560 ocsd_ex_level el, pid_t tid) 1561 { 1562 struct cs_etm_traceid_queue *tidq; 1563 int ret; 1564 1565 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1566 if (!tidq) 1567 return -EINVAL; 1568 1569 ret = cs_etm__etmq_update_thread(etmq, el, tid, 1570 &tidq->decode_thread); 1571 if (ret) 1572 return ret; 1573 1574 tidq->decode_el = el; 1575 return 0; 1576 } 1577 1578 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1579 { 1580 return !!etmq->etm->timeless_decoding; 1581 } 1582 1583 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1584 struct cs_etm_traceid_queue *tidq, 1585 struct cs_etm_packet *packet, 1586 struct perf_sample *sample) 1587 { 1588 /* 1589 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1590 * packet, so directly bail out with 'insn_len' = 0. 1591 */ 1592 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1593 sample->insn_len = 0; 1594 return; 1595 } 1596 1597 /* 1598 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1599 * cs_etm__t32_instr_size(). 1600 */ 1601 if (packet->isa == CS_ETM_ISA_T32) 1602 sample->insn_len = cs_etm__t32_instr_size(etmq, tidq, packet, 1603 sample->ip); 1604 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1605 else 1606 sample->insn_len = 4; 1607 1608 cs_etm__frontend_mem_access(etmq, tidq, packet, sample->ip, 1609 sample->insn_len, (void *)sample->insn); 1610 } 1611 1612 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) 1613 { 1614 struct cs_etm_auxtrace *etm = etmq->etm; 1615 1616 if (etm->has_virtual_ts) 1617 return tsc_to_perf_time(cs_timestamp, &etm->tc); 1618 else 1619 return cs_timestamp; 1620 } 1621 1622 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, 1623 struct cs_etm_traceid_queue *tidq) 1624 { 1625 struct cs_etm_auxtrace *etm = etmq->etm; 1626 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; 1627 1628 if (!etm->timeless_decoding && etm->has_virtual_ts) 1629 return packet_queue->cs_timestamp; 1630 else 1631 return etm->latest_kernel_timestamp; 1632 } 1633 1634 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1635 struct cs_etm_traceid_queue *tidq, 1636 struct cs_etm_packet *packet, 1637 u64 addr, u64 period) 1638 { 1639 int ret = 0; 1640 struct cs_etm_auxtrace *etm = etmq->etm; 1641 union perf_event *event = tidq->event_buf; 1642 struct perf_sample sample; 1643 1644 perf_sample__init(&sample, /*all=*/true); 1645 event->sample.header.type = PERF_RECORD_SAMPLE; 1646 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, packet->el); 1647 event->sample.header.size = sizeof(struct perf_event_header); 1648 1649 /* Set time field based on etm auxtrace config. */ 1650 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1651 1652 sample.ip = addr; 1653 sample.pid = thread__pid(tidq->frontend_thread); 1654 sample.tid = thread__tid(tidq->frontend_thread); 1655 sample.id = etmq->etm->instructions_id; 1656 sample.stream_id = etmq->etm->instructions_id; 1657 sample.period = period; 1658 sample.cpu = packet->cpu; 1659 sample.flags = tidq->prev_packet->flags; 1660 sample.cpumode = event->sample.header.misc; 1661 1662 cs_etm__copy_insn(etmq, tidq, packet, &sample); 1663 1664 if (etm->synth_opts.last_branch) 1665 sample.branch_stack = tidq->last_branch; 1666 1667 if (etm->synth_opts.inject) { 1668 ret = cs_etm__inject_event(etm, event, &sample, 1669 etm->instructions_sample_type); 1670 if (ret) 1671 return ret; 1672 } 1673 1674 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1675 1676 if (ret) 1677 pr_err( 1678 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1679 ret); 1680 1681 perf_sample__exit(&sample); 1682 return ret; 1683 } 1684 1685 /* 1686 * The cs etm packet encodes an instruction range between a branch target 1687 * and the next taken branch. Generate sample accordingly. 1688 */ 1689 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1690 struct cs_etm_traceid_queue *tidq) 1691 { 1692 int ret = 0; 1693 struct cs_etm_auxtrace *etm = etmq->etm; 1694 struct perf_sample sample = {.ip = 0,}; 1695 union perf_event *event = tidq->event_buf; 1696 struct dummy_branch_stack { 1697 u64 nr; 1698 u64 hw_idx; 1699 struct branch_entry entries; 1700 } dummy_bs; 1701 u64 ip; 1702 1703 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1704 1705 event->sample.header.type = PERF_RECORD_SAMPLE; 1706 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, 1707 tidq->prev_packet->el); 1708 event->sample.header.size = sizeof(struct perf_event_header); 1709 1710 /* Set time field based on etm auxtrace config. */ 1711 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1712 1713 sample.ip = ip; 1714 sample.pid = thread__pid(tidq->frontend_thread); 1715 sample.tid = thread__tid(tidq->frontend_thread); 1716 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1717 sample.id = etmq->etm->branches_id; 1718 sample.stream_id = etmq->etm->branches_id; 1719 sample.period = 1; 1720 sample.cpu = tidq->packet->cpu; 1721 sample.flags = tidq->prev_packet->flags; 1722 sample.cpumode = event->sample.header.misc; 1723 1724 cs_etm__copy_insn(etmq, tidq, tidq->prev_packet, &sample); 1725 1726 /* 1727 * perf report cannot handle events without a branch stack 1728 */ 1729 if (etm->synth_opts.last_branch) { 1730 dummy_bs = (struct dummy_branch_stack){ 1731 .nr = 1, 1732 .hw_idx = -1ULL, 1733 .entries = { 1734 .from = sample.ip, 1735 .to = sample.addr, 1736 }, 1737 }; 1738 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1739 } 1740 1741 if (etm->synth_opts.inject) { 1742 ret = cs_etm__inject_event(etm, event, &sample, 1743 etm->branches_sample_type); 1744 if (ret) 1745 return ret; 1746 } 1747 1748 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1749 1750 if (ret) 1751 pr_err( 1752 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1753 ret); 1754 1755 return ret; 1756 } 1757 1758 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1759 struct perf_session *session) 1760 { 1761 struct evlist *evlist = session->evlist; 1762 struct evsel *evsel; 1763 struct perf_event_attr attr; 1764 bool found = false; 1765 u64 id; 1766 int err; 1767 1768 evlist__for_each_entry(evlist, evsel) { 1769 if (evsel->core.attr.type == etm->pmu_type) { 1770 found = true; 1771 break; 1772 } 1773 } 1774 1775 if (!found) { 1776 pr_debug("No selected events with CoreSight Trace data\n"); 1777 return 0; 1778 } 1779 1780 memset(&attr, 0, sizeof(struct perf_event_attr)); 1781 attr.size = sizeof(struct perf_event_attr); 1782 attr.type = PERF_TYPE_HARDWARE; 1783 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1784 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1785 PERF_SAMPLE_PERIOD; 1786 if (etm->timeless_decoding) 1787 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1788 else 1789 attr.sample_type |= PERF_SAMPLE_TIME; 1790 1791 attr.exclude_user = evsel->core.attr.exclude_user; 1792 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1793 attr.exclude_hv = evsel->core.attr.exclude_hv; 1794 attr.exclude_host = evsel->core.attr.exclude_host; 1795 attr.exclude_guest = evsel->core.attr.exclude_guest; 1796 attr.sample_id_all = evsel->core.attr.sample_id_all; 1797 attr.read_format = evsel->core.attr.read_format; 1798 1799 /* create new id val to be a fixed offset from evsel id */ 1800 id = auxtrace_synth_id_range_start(evsel); 1801 1802 if (etm->synth_opts.branches) { 1803 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1804 attr.sample_period = 1; 1805 attr.sample_type |= PERF_SAMPLE_ADDR; 1806 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1807 if (err) 1808 return err; 1809 etm->branches_sample_type = attr.sample_type; 1810 etm->branches_id = id; 1811 id += 1; 1812 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1813 } 1814 1815 if (etm->synth_opts.last_branch) { 1816 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1817 /* 1818 * We don't use the hardware index, but the sample generation 1819 * code uses the new format branch_stack with this field, 1820 * so the event attributes must indicate that it's present. 1821 */ 1822 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1823 } 1824 1825 if (etm->synth_opts.instructions) { 1826 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1827 attr.sample_period = etm->synth_opts.period; 1828 etm->instructions_sample_period = attr.sample_period; 1829 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1830 if (err) 1831 return err; 1832 etm->instructions_sample_type = attr.sample_type; 1833 etm->instructions_id = id; 1834 id += 1; 1835 } 1836 1837 return 0; 1838 } 1839 1840 static int cs_etm__sample(struct cs_etm_queue *etmq, 1841 struct cs_etm_traceid_queue *tidq) 1842 { 1843 struct cs_etm_auxtrace *etm = etmq->etm; 1844 int ret; 1845 u64 instrs_prev; 1846 1847 /* Get instructions remainder from previous packet */ 1848 instrs_prev = tidq->period_instructions; 1849 1850 tidq->period_instructions += tidq->packet->instr_count; 1851 1852 /* 1853 * Record a branch when the last instruction in 1854 * PREV_PACKET is a branch. 1855 */ 1856 if (etm->synth_opts.last_branch && 1857 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1858 tidq->prev_packet->last_instr_taken_branch) 1859 cs_etm__update_last_branch_rb(etmq, tidq); 1860 1861 if (etm->synth_opts.instructions && 1862 tidq->period_instructions >= etm->instructions_sample_period) { 1863 /* 1864 * Emit instruction sample periodically 1865 * TODO: allow period to be defined in cycles and clock time 1866 */ 1867 1868 /* 1869 * Below diagram demonstrates the instruction samples 1870 * generation flows: 1871 * 1872 * Instrs Instrs Instrs Instrs 1873 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1874 * | | | | 1875 * V V V V 1876 * -------------------------------------------------- 1877 * ^ ^ 1878 * | | 1879 * Period Period 1880 * instructions(Pi) instructions(Pi') 1881 * 1882 * | | 1883 * \---------------- -----------------/ 1884 * V 1885 * tidq->packet->instr_count 1886 * 1887 * Instrs Sample(n...) are the synthesised samples occurring 1888 * every etm->instructions_sample_period instructions - as 1889 * defined on the perf command line. Sample(n) is being the 1890 * last sample before the current etm packet, n+1 to n+3 1891 * samples are generated from the current etm packet. 1892 * 1893 * tidq->packet->instr_count represents the number of 1894 * instructions in the current etm packet. 1895 * 1896 * Period instructions (Pi) contains the number of 1897 * instructions executed after the sample point(n) from the 1898 * previous etm packet. This will always be less than 1899 * etm->instructions_sample_period. 1900 * 1901 * When generate new samples, it combines with two parts 1902 * instructions, one is the tail of the old packet and another 1903 * is the head of the new coming packet, to generate 1904 * sample(n+1); sample(n+2) and sample(n+3) consume the 1905 * instructions with sample period. After sample(n+3), the rest 1906 * instructions will be used by later packet and it is assigned 1907 * to tidq->period_instructions for next round calculation. 1908 */ 1909 1910 /* 1911 * Get the initial offset into the current packet instructions; 1912 * entry conditions ensure that instrs_prev is less than 1913 * etm->instructions_sample_period. 1914 */ 1915 u64 offset = etm->instructions_sample_period - instrs_prev; 1916 u64 addr; 1917 1918 /* Prepare last branches for instruction sample */ 1919 if (etm->synth_opts.last_branch) 1920 cs_etm__copy_last_branch_rb(etmq, tidq); 1921 1922 while (tidq->period_instructions >= 1923 etm->instructions_sample_period) { 1924 /* 1925 * Calculate the address of the sampled instruction (-1 1926 * as sample is reported as though instruction has just 1927 * been executed, but PC has not advanced to next 1928 * instruction) 1929 */ 1930 addr = cs_etm__instr_addr(etmq, tidq, tidq->packet, 1931 offset - 1); 1932 ret = cs_etm__synth_instruction_sample( 1933 etmq, tidq, tidq->packet, addr, 1934 etm->instructions_sample_period); 1935 if (ret) 1936 return ret; 1937 1938 offset += etm->instructions_sample_period; 1939 tidq->period_instructions -= 1940 etm->instructions_sample_period; 1941 } 1942 } 1943 1944 if (etm->synth_opts.branches) { 1945 bool generate_sample = false; 1946 1947 /* Generate sample for tracing on packet */ 1948 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1949 generate_sample = true; 1950 1951 /* Generate sample for branch taken packet */ 1952 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1953 tidq->prev_packet->last_instr_taken_branch) 1954 generate_sample = true; 1955 1956 if (generate_sample) { 1957 ret = cs_etm__synth_branch_sample(etmq, tidq); 1958 if (ret) 1959 return ret; 1960 } 1961 } 1962 1963 cs_etm__packet_swap(etm, tidq); 1964 1965 return 0; 1966 } 1967 1968 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1969 { 1970 /* 1971 * When the exception packet is inserted, whether the last instruction 1972 * in previous range packet is taken branch or not, we need to force 1973 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1974 * to generate branch sample for the instruction range before the 1975 * exception is trapped to kernel or before the exception returning. 1976 * 1977 * The exception packet includes the dummy address values, so don't 1978 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1979 * for generating instruction and branch samples. 1980 */ 1981 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1982 tidq->prev_packet->last_instr_taken_branch = true; 1983 1984 return 0; 1985 } 1986 1987 static int cs_etm__flush(struct cs_etm_queue *etmq, 1988 struct cs_etm_traceid_queue *tidq) 1989 { 1990 int err = 0; 1991 struct cs_etm_auxtrace *etm = etmq->etm; 1992 1993 /* Handle start tracing packet */ 1994 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1995 goto swap_packet; 1996 1997 if (etmq->etm->synth_opts.last_branch && 1998 etmq->etm->synth_opts.instructions && 1999 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 2000 u64 addr; 2001 2002 /* Prepare last branches for instruction sample */ 2003 cs_etm__copy_last_branch_rb(etmq, tidq); 2004 2005 /* 2006 * Generate a last branch event for the branches left in the 2007 * circular buffer at the end of the trace. 2008 * 2009 * Use the address of the end of the last reported execution 2010 * range 2011 */ 2012 addr = cs_etm__last_executed_instr(tidq->prev_packet); 2013 2014 err = cs_etm__synth_instruction_sample( 2015 etmq, tidq, tidq->prev_packet, addr, 2016 tidq->period_instructions); 2017 if (err) 2018 return err; 2019 2020 tidq->period_instructions = 0; 2021 2022 } 2023 2024 if (etm->synth_opts.branches && 2025 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 2026 err = cs_etm__synth_branch_sample(etmq, tidq); 2027 if (err) 2028 return err; 2029 } 2030 2031 swap_packet: 2032 cs_etm__packet_swap(etm, tidq); 2033 2034 /* Reset last branches after flush the trace */ 2035 if (etm->synth_opts.last_branch) 2036 cs_etm__reset_last_branch_rb(tidq); 2037 2038 return err; 2039 } 2040 2041 static int cs_etm__end_block(struct cs_etm_queue *etmq, 2042 struct cs_etm_traceid_queue *tidq) 2043 { 2044 int err; 2045 2046 /* 2047 * It has no new packet coming and 'etmq->packet' contains the stale 2048 * packet which was set at the previous time with packets swapping; 2049 * so skip to generate branch sample to avoid stale packet. 2050 * 2051 * For this case only flush branch stack and generate a last branch 2052 * event for the branches left in the circular buffer at the end of 2053 * the trace. 2054 */ 2055 if (etmq->etm->synth_opts.last_branch && 2056 etmq->etm->synth_opts.instructions && 2057 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 2058 u64 addr; 2059 2060 /* Prepare last branches for instruction sample */ 2061 cs_etm__copy_last_branch_rb(etmq, tidq); 2062 2063 /* 2064 * Use the address of the end of the last reported execution 2065 * range. 2066 */ 2067 addr = cs_etm__last_executed_instr(tidq->prev_packet); 2068 2069 err = cs_etm__synth_instruction_sample( 2070 etmq, tidq, tidq->prev_packet, addr, 2071 tidq->period_instructions); 2072 if (err) 2073 return err; 2074 2075 tidq->period_instructions = 0; 2076 } 2077 2078 return 0; 2079 } 2080 /* 2081 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 2082 * if need be. 2083 * Returns: < 0 if error 2084 * = 0 if no more auxtrace_buffer to read 2085 * > 0 if the current buffer isn't empty yet 2086 */ 2087 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 2088 { 2089 int ret; 2090 2091 if (!etmq->buf_len) { 2092 ret = cs_etm__get_trace(etmq); 2093 if (ret <= 0) 2094 return ret; 2095 /* 2096 * We cannot assume consecutive blocks in the data file 2097 * are contiguous, reset the decoder to force re-sync. 2098 */ 2099 ret = cs_etm_decoder__reset(etmq->decoder); 2100 if (ret) 2101 return ret; 2102 } 2103 2104 return etmq->buf_len; 2105 } 2106 2107 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, 2108 struct cs_etm_traceid_queue *tidq, 2109 struct cs_etm_packet *packet, u64 end_addr) 2110 { 2111 /* Initialise to keep compiler happy */ 2112 u16 instr16 = 0; 2113 u32 instr32 = 0; 2114 u64 addr; 2115 2116 switch (packet->isa) { 2117 case CS_ETM_ISA_T32: 2118 /* 2119 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 2120 * 2121 * b'15 b'8 2122 * +-----------------+--------+ 2123 * | 1 1 0 1 1 1 1 1 | imm8 | 2124 * +-----------------+--------+ 2125 * 2126 * According to the specification, it only defines SVC for T32 2127 * with 16 bits instruction and has no definition for 32bits; 2128 * so below only read 2 bytes as instruction size for T32. 2129 */ 2130 addr = end_addr - 2; 2131 cs_etm__frontend_mem_access(etmq, tidq, packet, addr, 2132 sizeof(instr16), (u8 *)&instr16); 2133 if ((instr16 & 0xFF00) == 0xDF00) 2134 return true; 2135 2136 break; 2137 case CS_ETM_ISA_A32: 2138 /* 2139 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 2140 * 2141 * b'31 b'28 b'27 b'24 2142 * +---------+---------+-------------------------+ 2143 * | !1111 | 1 1 1 1 | imm24 | 2144 * +---------+---------+-------------------------+ 2145 */ 2146 addr = end_addr - 4; 2147 cs_etm__frontend_mem_access(etmq, tidq, packet, addr, 2148 sizeof(instr32), (u8 *)&instr32); 2149 if ((instr32 & 0x0F000000) == 0x0F000000 && 2150 (instr32 & 0xF0000000) != 0xF0000000) 2151 return true; 2152 2153 break; 2154 case CS_ETM_ISA_A64: 2155 /* 2156 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 2157 * 2158 * b'31 b'21 b'4 b'0 2159 * +-----------------------+---------+-----------+ 2160 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 2161 * +-----------------------+---------+-----------+ 2162 */ 2163 addr = end_addr - 4; 2164 cs_etm__frontend_mem_access(etmq, tidq, packet, addr, 2165 sizeof(instr32), (u8 *)&instr32); 2166 if ((instr32 & 0xFFE0001F) == 0xd4000001) 2167 return true; 2168 2169 break; 2170 case CS_ETM_ISA_UNKNOWN: 2171 default: 2172 break; 2173 } 2174 2175 return false; 2176 } 2177 2178 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 2179 struct cs_etm_traceid_queue *tidq, u64 magic) 2180 { 2181 struct cs_etm_packet *packet = tidq->packet; 2182 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2183 2184 if (magic == __perf_cs_etmv3_magic) 2185 if (packet->exception_number == CS_ETMV3_EXC_SVC) 2186 return true; 2187 2188 /* 2189 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 2190 * HVC cases; need to check if it's SVC instruction based on 2191 * packet address. 2192 */ 2193 if (magic == __perf_cs_etmv4_magic) { 2194 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2195 cs_etm__is_svc_instr(etmq, tidq, prev_packet, 2196 prev_packet->end_addr)) 2197 return true; 2198 } 2199 2200 return false; 2201 } 2202 2203 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 2204 u64 magic) 2205 { 2206 struct cs_etm_packet *packet = tidq->packet; 2207 2208 if (magic == __perf_cs_etmv3_magic) 2209 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 2210 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 2211 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 2212 packet->exception_number == CS_ETMV3_EXC_IRQ || 2213 packet->exception_number == CS_ETMV3_EXC_FIQ) 2214 return true; 2215 2216 if (magic == __perf_cs_etmv4_magic) 2217 if (packet->exception_number == CS_ETMV4_EXC_RESET || 2218 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 2219 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 2220 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 2221 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 2222 packet->exception_number == CS_ETMV4_EXC_IRQ || 2223 packet->exception_number == CS_ETMV4_EXC_FIQ) 2224 return true; 2225 2226 return false; 2227 } 2228 2229 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 2230 struct cs_etm_traceid_queue *tidq, 2231 u64 magic) 2232 { 2233 struct cs_etm_packet *packet = tidq->packet; 2234 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2235 2236 if (magic == __perf_cs_etmv3_magic) 2237 if (packet->exception_number == CS_ETMV3_EXC_SMC || 2238 packet->exception_number == CS_ETMV3_EXC_HYP || 2239 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 2240 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 2241 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 2242 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 2243 packet->exception_number == CS_ETMV3_EXC_GENERIC) 2244 return true; 2245 2246 if (magic == __perf_cs_etmv4_magic) { 2247 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 2248 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 2249 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 2250 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 2251 return true; 2252 2253 /* 2254 * For CS_ETMV4_EXC_CALL, except SVC other instructions 2255 * (SMC, HVC) are taken as sync exceptions. 2256 */ 2257 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2258 !cs_etm__is_svc_instr(etmq, tidq, prev_packet, 2259 prev_packet->end_addr)) 2260 return true; 2261 2262 /* 2263 * ETMv4 has 5 bits for exception number; if the numbers 2264 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 2265 * they are implementation defined exceptions. 2266 * 2267 * For this case, simply take it as sync exception. 2268 */ 2269 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 2270 packet->exception_number <= CS_ETMV4_EXC_END) 2271 return true; 2272 } 2273 2274 return false; 2275 } 2276 2277 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 2278 struct cs_etm_traceid_queue *tidq) 2279 { 2280 struct cs_etm_packet *packet = tidq->packet; 2281 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2282 u64 magic; 2283 int ret; 2284 2285 switch (packet->sample_type) { 2286 case CS_ETM_RANGE: 2287 /* 2288 * Immediate branch instruction without neither link nor 2289 * return flag, it's normal branch instruction within 2290 * the function. 2291 */ 2292 if (packet->last_instr_type == OCSD_INSTR_BR && 2293 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 2294 packet->flags = PERF_IP_FLAG_BRANCH; 2295 2296 if (packet->last_instr_cond) 2297 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 2298 } 2299 2300 /* 2301 * Immediate branch instruction with link (e.g. BL), this is 2302 * branch instruction for function call. 2303 */ 2304 if (packet->last_instr_type == OCSD_INSTR_BR && 2305 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2306 packet->flags = PERF_IP_FLAG_BRANCH | 2307 PERF_IP_FLAG_CALL; 2308 2309 /* 2310 * Indirect branch instruction with link (e.g. BLR), this is 2311 * branch instruction for function call. 2312 */ 2313 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2314 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2315 packet->flags = PERF_IP_FLAG_BRANCH | 2316 PERF_IP_FLAG_CALL; 2317 2318 /* 2319 * Indirect branch instruction with subtype of 2320 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 2321 * function return for A32/T32. 2322 */ 2323 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2324 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 2325 packet->flags = PERF_IP_FLAG_BRANCH | 2326 PERF_IP_FLAG_RETURN; 2327 2328 /* 2329 * Indirect branch instruction without link (e.g. BR), usually 2330 * this is used for function return, especially for functions 2331 * within dynamic link lib. 2332 */ 2333 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2334 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 2335 packet->flags = PERF_IP_FLAG_BRANCH | 2336 PERF_IP_FLAG_RETURN; 2337 2338 /* Return instruction for function return. */ 2339 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2340 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 2341 packet->flags = PERF_IP_FLAG_BRANCH | 2342 PERF_IP_FLAG_RETURN; 2343 2344 /* 2345 * Decoder might insert a discontinuity in the middle of 2346 * instruction packets, fixup prev_packet with flag 2347 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 2348 */ 2349 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 2350 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2351 PERF_IP_FLAG_TRACE_BEGIN; 2352 2353 /* 2354 * If the previous packet is an exception return packet 2355 * and the return address just follows SVC instruction, 2356 * it needs to calibrate the previous packet sample flags 2357 * as PERF_IP_FLAG_SYSCALLRET. 2358 */ 2359 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 2360 PERF_IP_FLAG_RETURN | 2361 PERF_IP_FLAG_INTERRUPT) && 2362 cs_etm__is_svc_instr(etmq, tidq, packet, packet->start_addr)) { 2363 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2364 PERF_IP_FLAG_RETURN | 2365 PERF_IP_FLAG_SYSCALLRET; 2366 } 2367 break; 2368 case CS_ETM_DISCONTINUITY: 2369 /* 2370 * The trace is discontinuous, if the previous packet is 2371 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 2372 * for previous packet. 2373 */ 2374 if (prev_packet->sample_type == CS_ETM_RANGE) 2375 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2376 PERF_IP_FLAG_TRACE_END; 2377 break; 2378 case CS_ETM_EXCEPTION: 2379 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); 2380 if (ret) 2381 return ret; 2382 2383 /* The exception is for system call. */ 2384 if (cs_etm__is_syscall(etmq, tidq, magic)) 2385 packet->flags = PERF_IP_FLAG_BRANCH | 2386 PERF_IP_FLAG_CALL | 2387 PERF_IP_FLAG_SYSCALLRET; 2388 /* 2389 * The exceptions are triggered by external signals from bus, 2390 * interrupt controller, debug module, PE reset or halt. 2391 */ 2392 else if (cs_etm__is_async_exception(tidq, magic)) 2393 packet->flags = PERF_IP_FLAG_BRANCH | 2394 PERF_IP_FLAG_CALL | 2395 PERF_IP_FLAG_ASYNC | 2396 PERF_IP_FLAG_INTERRUPT; 2397 /* 2398 * Otherwise, exception is caused by trap, instruction & 2399 * data fault, or alignment errors. 2400 */ 2401 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 2402 packet->flags = PERF_IP_FLAG_BRANCH | 2403 PERF_IP_FLAG_CALL | 2404 PERF_IP_FLAG_INTERRUPT; 2405 2406 /* 2407 * When the exception packet is inserted, since exception 2408 * packet is not used standalone for generating samples 2409 * and it's affiliation to the previous instruction range 2410 * packet; so set previous range packet flags to tell perf 2411 * it is an exception taken branch. 2412 */ 2413 if (prev_packet->sample_type == CS_ETM_RANGE) 2414 prev_packet->flags = packet->flags; 2415 break; 2416 case CS_ETM_EXCEPTION_RET: 2417 /* 2418 * When the exception return packet is inserted, since 2419 * exception return packet is not used standalone for 2420 * generating samples and it's affiliation to the previous 2421 * instruction range packet; so set previous range packet 2422 * flags to tell perf it is an exception return branch. 2423 * 2424 * The exception return can be for either system call or 2425 * other exception types; unfortunately the packet doesn't 2426 * contain exception type related info so we cannot decide 2427 * the exception type purely based on exception return packet. 2428 * If we record the exception number from exception packet and 2429 * reuse it for exception return packet, this is not reliable 2430 * due the trace can be discontinuity or the interrupt can 2431 * be nested, thus the recorded exception number cannot be 2432 * used for exception return packet for these two cases. 2433 * 2434 * For exception return packet, we only need to distinguish the 2435 * packet is for system call or for other types. Thus the 2436 * decision can be deferred when receive the next packet which 2437 * contains the return address, based on the return address we 2438 * can read out the previous instruction and check if it's a 2439 * system call instruction and then calibrate the sample flag 2440 * as needed. 2441 */ 2442 if (prev_packet->sample_type == CS_ETM_RANGE) 2443 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2444 PERF_IP_FLAG_RETURN | 2445 PERF_IP_FLAG_INTERRUPT; 2446 break; 2447 case CS_ETM_CONTEXT: 2448 case CS_ETM_EMPTY: 2449 default: 2450 break; 2451 } 2452 2453 return 0; 2454 } 2455 2456 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 2457 { 2458 int ret = 0; 2459 size_t processed = 0; 2460 2461 /* 2462 * Packets are decoded and added to the decoder's packet queue 2463 * until the decoder packet processing callback has requested that 2464 * processing stops or there is nothing left in the buffer. Normal 2465 * operations that stop processing are a timestamp packet or a full 2466 * decoder buffer queue. 2467 */ 2468 ret = cs_etm_decoder__process_data_block(etmq->decoder, 2469 etmq->offset, 2470 &etmq->buf[etmq->buf_used], 2471 etmq->buf_len, 2472 &processed); 2473 if (ret) 2474 goto out; 2475 2476 etmq->offset += processed; 2477 etmq->buf_used += processed; 2478 etmq->buf_len -= processed; 2479 2480 out: 2481 return ret; 2482 } 2483 2484 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 2485 struct cs_etm_traceid_queue *tidq) 2486 { 2487 int ret; 2488 struct cs_etm_packet_queue *packet_queue; 2489 2490 packet_queue = &tidq->packet_queue; 2491 2492 /* Process each packet in this chunk */ 2493 while (1) { 2494 ret = cs_etm_decoder__get_packet(packet_queue, 2495 tidq->packet); 2496 if (ret <= 0) 2497 /* 2498 * Stop processing this chunk on 2499 * end of data or error 2500 */ 2501 break; 2502 2503 /* 2504 * Since packet addresses are swapped in packet 2505 * handling within below switch() statements, 2506 * thus setting sample flags must be called 2507 * prior to switch() statement to use address 2508 * information before packets swapping. 2509 */ 2510 ret = cs_etm__set_sample_flags(etmq, tidq); 2511 if (ret < 0) 2512 break; 2513 2514 switch (tidq->packet->sample_type) { 2515 case CS_ETM_RANGE: 2516 /* 2517 * If the packet contains an instruction 2518 * range, generate instruction sequence 2519 * events. 2520 */ 2521 cs_etm__sample(etmq, tidq); 2522 break; 2523 case CS_ETM_CONTEXT: 2524 /* 2525 * Update context but don't swap packet. Keep the 2526 * previous one for branch source address info, if 2527 * tracing the kernel the context packet will be emitted 2528 * between two ranges. 2529 */ 2530 ret = cs_etm__etmq_update_thread(etmq, tidq->packet->el, 2531 tidq->packet->tid, 2532 &tidq->frontend_thread); 2533 if (ret) 2534 goto out; 2535 break; 2536 case CS_ETM_EXCEPTION: 2537 case CS_ETM_EXCEPTION_RET: 2538 /* 2539 * If the exception packet is coming, 2540 * make sure the previous instruction 2541 * range packet to be handled properly. 2542 */ 2543 cs_etm__exception(tidq); 2544 break; 2545 case CS_ETM_DISCONTINUITY: 2546 /* 2547 * Discontinuity in trace, flush 2548 * previous branch stack 2549 */ 2550 cs_etm__flush(etmq, tidq); 2551 break; 2552 case CS_ETM_EMPTY: 2553 /* 2554 * Should not receive empty packet, 2555 * report error. 2556 */ 2557 pr_err("CS ETM Trace: empty packet\n"); 2558 return -EINVAL; 2559 default: 2560 break; 2561 } 2562 } 2563 2564 out: 2565 return ret; 2566 } 2567 2568 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2569 { 2570 int idx; 2571 struct int_node *inode; 2572 struct cs_etm_traceid_queue *tidq; 2573 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2574 2575 intlist__for_each_entry(inode, traceid_queues_list) { 2576 idx = (int)(intptr_t)inode->priv; 2577 tidq = etmq->traceid_queues[idx]; 2578 2579 /* Ignore return value */ 2580 cs_etm__process_traceid_queue(etmq, tidq); 2581 } 2582 } 2583 2584 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) 2585 { 2586 int err = 0; 2587 struct cs_etm_traceid_queue *tidq; 2588 2589 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2590 if (!tidq) 2591 return -EINVAL; 2592 2593 /* Go through each buffer in the queue and decode them one by one */ 2594 while (1) { 2595 err = cs_etm__get_data_block(etmq); 2596 if (err <= 0) 2597 return err; 2598 2599 /* Run trace decoder until buffer consumed or end of trace */ 2600 do { 2601 err = cs_etm__decode_data_block(etmq); 2602 if (err) 2603 return err; 2604 2605 /* 2606 * Process each packet in this chunk, nothing to do if 2607 * an error occurs other than hoping the next one will 2608 * be better. 2609 */ 2610 err = cs_etm__process_traceid_queue(etmq, tidq); 2611 2612 } while (etmq->buf_len); 2613 2614 if (err == 0) 2615 /* Flush any remaining branch stack entries */ 2616 err = cs_etm__end_block(etmq, tidq); 2617 } 2618 2619 return err; 2620 } 2621 2622 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) 2623 { 2624 int idx, err = 0; 2625 struct cs_etm_traceid_queue *tidq; 2626 struct int_node *inode; 2627 2628 /* Go through each buffer in the queue and decode them one by one */ 2629 while (1) { 2630 err = cs_etm__get_data_block(etmq); 2631 if (err <= 0) 2632 return err; 2633 2634 /* Run trace decoder until buffer consumed or end of trace */ 2635 do { 2636 err = cs_etm__decode_data_block(etmq); 2637 if (err) 2638 return err; 2639 2640 /* 2641 * cs_etm__run_per_thread_timeless_decoder() runs on a 2642 * single traceID queue because each TID has a separate 2643 * buffer. But here in per-cpu mode we need to iterate 2644 * over each channel instead. 2645 */ 2646 intlist__for_each_entry(inode, 2647 etmq->traceid_queues_list) { 2648 idx = (int)(intptr_t)inode->priv; 2649 tidq = etmq->traceid_queues[idx]; 2650 cs_etm__process_traceid_queue(etmq, tidq); 2651 } 2652 } while (etmq->buf_len); 2653 2654 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2655 idx = (int)(intptr_t)inode->priv; 2656 tidq = etmq->traceid_queues[idx]; 2657 /* Flush any remaining branch stack entries */ 2658 err = cs_etm__end_block(etmq, tidq); 2659 if (err) 2660 return err; 2661 } 2662 } 2663 2664 return err; 2665 } 2666 2667 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2668 pid_t tid) 2669 { 2670 unsigned int i; 2671 struct auxtrace_queues *queues = &etm->queues; 2672 2673 for (i = 0; i < queues->nr_queues; i++) { 2674 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2675 struct cs_etm_queue *etmq = queue->priv; 2676 struct cs_etm_traceid_queue *tidq; 2677 2678 if (!etmq) 2679 continue; 2680 2681 if (etm->per_thread_decoding) { 2682 tidq = cs_etm__etmq_get_traceid_queue( 2683 etmq, CS_ETM_PER_THREAD_TRACEID); 2684 2685 if (!tidq) 2686 continue; 2687 2688 if (tid == -1 || thread__tid(tidq->frontend_thread) == tid) 2689 cs_etm__run_per_thread_timeless_decoder(etmq); 2690 } else 2691 cs_etm__run_per_cpu_timeless_decoder(etmq); 2692 } 2693 2694 return 0; 2695 } 2696 2697 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) 2698 { 2699 int ret = 0; 2700 unsigned int cs_queue_nr, queue_nr, i; 2701 u8 trace_chan_id; 2702 u64 cs_timestamp; 2703 struct auxtrace_queue *queue; 2704 struct cs_etm_queue *etmq; 2705 struct cs_etm_traceid_queue *tidq; 2706 2707 /* 2708 * Pre-populate the heap with one entry from each queue so that we can 2709 * start processing in time order across all queues. 2710 */ 2711 for (i = 0; i < etm->queues.nr_queues; i++) { 2712 etmq = etm->queues.queue_array[i].priv; 2713 if (!etmq) 2714 continue; 2715 2716 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); 2717 if (ret) 2718 return ret; 2719 } 2720 2721 while (1) { 2722 if (!etm->heap.heap_cnt) 2723 break; 2724 2725 /* Take the entry at the top of the min heap */ 2726 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2727 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2728 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2729 queue = &etm->queues.queue_array[queue_nr]; 2730 etmq = queue->priv; 2731 2732 /* 2733 * Remove the top entry from the heap since we are about 2734 * to process it. 2735 */ 2736 auxtrace_heap__pop(&etm->heap); 2737 2738 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2739 if (!tidq) { 2740 /* 2741 * No traceID queue has been allocated for this traceID, 2742 * which means something somewhere went very wrong. No 2743 * other choice than simply exit. 2744 */ 2745 ret = -EINVAL; 2746 goto out; 2747 } 2748 2749 /* 2750 * Packets associated with this timestamp are already in 2751 * the etmq's traceID queue, so process them. 2752 */ 2753 ret = cs_etm__process_traceid_queue(etmq, tidq); 2754 if (ret < 0) 2755 goto out; 2756 2757 /* 2758 * Packets for this timestamp have been processed, time to 2759 * move on to the next timestamp, fetching a new auxtrace_buffer 2760 * if need be. 2761 */ 2762 refetch: 2763 ret = cs_etm__get_data_block(etmq); 2764 if (ret < 0) 2765 goto out; 2766 2767 /* 2768 * No more auxtrace_buffers to process in this etmq, simply 2769 * move on to another entry in the auxtrace_heap. 2770 */ 2771 if (!ret) 2772 continue; 2773 2774 ret = cs_etm__decode_data_block(etmq); 2775 if (ret) 2776 goto out; 2777 2778 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2779 2780 if (!cs_timestamp) { 2781 /* 2782 * Function cs_etm__decode_data_block() returns when 2783 * there is no more traces to decode in the current 2784 * auxtrace_buffer OR when a timestamp has been 2785 * encountered on any of the traceID queues. Since we 2786 * did not get a timestamp, there is no more traces to 2787 * process in this auxtrace_buffer. As such empty and 2788 * flush all traceID queues. 2789 */ 2790 cs_etm__clear_all_traceid_queues(etmq); 2791 2792 /* Fetch another auxtrace_buffer for this etmq */ 2793 goto refetch; 2794 } 2795 2796 /* 2797 * Add to the min heap the timestamp for packets that have 2798 * just been decoded. They will be processed and synthesized 2799 * during the next call to cs_etm__process_traceid_queue() for 2800 * this queue/traceID. 2801 */ 2802 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2803 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 2804 } 2805 2806 for (i = 0; i < etm->queues.nr_queues; i++) { 2807 struct int_node *inode; 2808 2809 etmq = etm->queues.queue_array[i].priv; 2810 if (!etmq) 2811 continue; 2812 2813 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2814 int idx = (int)(intptr_t)inode->priv; 2815 2816 /* Flush any remaining branch stack entries */ 2817 tidq = etmq->traceid_queues[idx]; 2818 ret = cs_etm__end_block(etmq, tidq); 2819 if (ret) 2820 return ret; 2821 } 2822 } 2823 out: 2824 return ret; 2825 } 2826 2827 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2828 union perf_event *event) 2829 { 2830 struct thread *th; 2831 2832 if (etm->timeless_decoding) 2833 return 0; 2834 2835 /* 2836 * Add the tid/pid to the log so that we can get a match when we get a 2837 * contextID from the decoder. Only track for the host: only kernel 2838 * trace is supported for guests which wouldn't need pids so this should 2839 * be fine. 2840 */ 2841 th = machine__findnew_thread(&etm->session->machines.host, 2842 event->itrace_start.pid, 2843 event->itrace_start.tid); 2844 if (!th) 2845 return -ENOMEM; 2846 2847 thread__put(th); 2848 2849 return 0; 2850 } 2851 2852 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2853 union perf_event *event) 2854 { 2855 struct thread *th; 2856 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2857 2858 /* 2859 * Context switch in per-thread mode are irrelevant since perf 2860 * will start/stop tracing as the process is scheduled. 2861 */ 2862 if (etm->timeless_decoding) 2863 return 0; 2864 2865 /* 2866 * SWITCH_IN events carry the next process to be switched out while 2867 * SWITCH_OUT events carry the process to be switched in. As such 2868 * we don't care about IN events. 2869 */ 2870 if (!out) 2871 return 0; 2872 2873 /* 2874 * Add the tid/pid to the log so that we can get a match when we get a 2875 * contextID from the decoder. Only track for the host: only kernel 2876 * trace is supported for guests which wouldn't need pids so this should 2877 * be fine. 2878 */ 2879 th = machine__findnew_thread(&etm->session->machines.host, 2880 event->context_switch.next_prev_pid, 2881 event->context_switch.next_prev_tid); 2882 if (!th) 2883 return -ENOMEM; 2884 2885 thread__put(th); 2886 2887 return 0; 2888 } 2889 2890 static int cs_etm__process_event(struct perf_session *session, 2891 union perf_event *event, 2892 struct perf_sample *sample, 2893 const struct perf_tool *tool) 2894 { 2895 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2896 struct cs_etm_auxtrace, 2897 auxtrace); 2898 2899 if (dump_trace) 2900 return 0; 2901 2902 if (!tool->ordered_events) { 2903 pr_err("CoreSight ETM Trace requires ordered events\n"); 2904 return -EINVAL; 2905 } 2906 2907 switch (event->header.type) { 2908 case PERF_RECORD_EXIT: 2909 /* 2910 * Don't need to wait for cs_etm__flush_events() in per-thread mode to 2911 * start the decode because we know there will be no more trace from 2912 * this thread. All this does is emit samples earlier than waiting for 2913 * the flush in other modes, but with timestamps it makes sense to wait 2914 * for flush so that events from different threads are interleaved 2915 * properly. 2916 */ 2917 if (etm->per_thread_decoding && etm->timeless_decoding) 2918 return cs_etm__process_timeless_queues(etm, 2919 event->fork.tid); 2920 break; 2921 2922 case PERF_RECORD_ITRACE_START: 2923 return cs_etm__process_itrace_start(etm, event); 2924 2925 case PERF_RECORD_SWITCH_CPU_WIDE: 2926 return cs_etm__process_switch_cpu_wide(etm, event); 2927 2928 case PERF_RECORD_AUX: 2929 /* 2930 * Record the latest kernel timestamp available in the header 2931 * for samples so that synthesised samples occur from this point 2932 * onwards. 2933 */ 2934 if (sample->time && (sample->time != (u64)-1)) 2935 etm->latest_kernel_timestamp = sample->time; 2936 break; 2937 2938 default: 2939 break; 2940 } 2941 2942 return 0; 2943 } 2944 2945 static void dump_queued_data(struct cs_etm_auxtrace *etm, 2946 struct perf_record_auxtrace *event) 2947 { 2948 struct auxtrace_buffer *buf; 2949 unsigned int i; 2950 /* 2951 * Find all buffers with same reference in the queues and dump them. 2952 * This is because the queues can contain multiple entries of the same 2953 * buffer that were split on aux records. 2954 */ 2955 for (i = 0; i < etm->queues.nr_queues; ++i) 2956 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) 2957 if (buf->reference == event->reference) 2958 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); 2959 } 2960 2961 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2962 union perf_event *event, 2963 const struct perf_tool *tool __maybe_unused) 2964 { 2965 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2966 struct cs_etm_auxtrace, 2967 auxtrace); 2968 if (!etm->data_queued) { 2969 struct auxtrace_buffer *buffer; 2970 off_t data_offset; 2971 int fd = perf_data__fd(session->data); 2972 bool is_pipe = perf_data__is_pipe(session->data); 2973 int err; 2974 int idx = event->auxtrace.idx; 2975 2976 if (is_pipe) 2977 data_offset = 0; 2978 else { 2979 data_offset = lseek(fd, 0, SEEK_CUR); 2980 if (data_offset == -1) 2981 return -errno; 2982 } 2983 2984 err = auxtrace_queues__add_event(&etm->queues, session, 2985 event, data_offset, &buffer); 2986 if (err) 2987 return err; 2988 2989 if (dump_trace) 2990 if (auxtrace_buffer__get_data(buffer, fd)) { 2991 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); 2992 auxtrace_buffer__put_data(buffer); 2993 } 2994 } else if (dump_trace) 2995 dump_queued_data(etm, &event->auxtrace); 2996 2997 return 0; 2998 } 2999 3000 static void cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) 3001 { 3002 /* Take first ETM as all options will be the same for all ETMs */ 3003 u64 *metadata = etm->metadata[0]; 3004 3005 /* Override timeless mode with user input from --itrace=Z */ 3006 if (etm->synth_opts.timeless_decoding) { 3007 etm->timeless_decoding = true; 3008 return; 3009 } 3010 3011 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) 3012 etm->timeless_decoding = !(metadata[CS_ETM_ETMCR] & ETMCR_TIMESTAMP_EN); 3013 else 3014 etm->timeless_decoding = !(metadata[CS_ETMV4_TRCCONFIGR] & TRCCONFIGR_TS); 3015 } 3016 3017 /* 3018 * Read a single cpu parameter block from the auxtrace_info priv block. 3019 * 3020 * For version 1 there is a per cpu nr_params entry. If we are handling 3021 * version 1 file, then there may be less, the same, or more params 3022 * indicated by this value than the compile time number we understand. 3023 * 3024 * For a version 0 info block, there are a fixed number, and we need to 3025 * fill out the nr_param value in the metadata we create. 3026 */ 3027 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 3028 int out_blk_size, int nr_params_v0) 3029 { 3030 u64 *metadata = NULL; 3031 int hdr_version; 3032 int nr_in_params, nr_out_params, nr_cmn_params; 3033 int i, k; 3034 3035 metadata = zalloc(sizeof(*metadata) * out_blk_size); 3036 if (!metadata) 3037 return NULL; 3038 3039 /* read block current index & version */ 3040 i = *buff_in_offset; 3041 hdr_version = buff_in[CS_HEADER_VERSION]; 3042 3043 if (!hdr_version) { 3044 /* read version 0 info block into a version 1 metadata block */ 3045 nr_in_params = nr_params_v0; 3046 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 3047 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 3048 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 3049 /* remaining block params at offset +1 from source */ 3050 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 3051 metadata[k + 1] = buff_in[i + k]; 3052 /* version 0 has 2 common params */ 3053 nr_cmn_params = 2; 3054 } else { 3055 /* read version 1 info block - input and output nr_params may differ */ 3056 /* version 1 has 3 common params */ 3057 nr_cmn_params = 3; 3058 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 3059 3060 /* if input has more params than output - skip excess */ 3061 nr_out_params = nr_in_params + nr_cmn_params; 3062 if (nr_out_params > out_blk_size) 3063 nr_out_params = out_blk_size; 3064 3065 for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 3066 metadata[k] = buff_in[i + k]; 3067 3068 /* record the actual nr params we copied */ 3069 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 3070 } 3071 3072 /* adjust in offset by number of in params used */ 3073 i += nr_in_params + nr_cmn_params; 3074 *buff_in_offset = i; 3075 return metadata; 3076 } 3077 3078 /** 3079 * Puts a fragment of an auxtrace buffer into the auxtrace queues based 3080 * on the bounds of aux_event, if it matches with the buffer that's at 3081 * file_offset. 3082 * 3083 * Normally, whole auxtrace buffers would be added to the queue. But we 3084 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder 3085 * is reset across each buffer, so splitting the buffers up in advance has 3086 * the same effect. 3087 */ 3088 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, 3089 struct perf_record_aux *aux_event, struct perf_sample *sample) 3090 { 3091 int err; 3092 char buf[PERF_SAMPLE_MAX_SIZE]; 3093 union perf_event *auxtrace_event_union; 3094 struct perf_record_auxtrace *auxtrace_event; 3095 union perf_event auxtrace_fragment; 3096 __u64 aux_offset, aux_size; 3097 enum cs_etm_format format; 3098 3099 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 3100 struct cs_etm_auxtrace, 3101 auxtrace); 3102 3103 /* 3104 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got 3105 * from looping through the auxtrace index. 3106 */ 3107 err = perf_session__peek_event(session, file_offset, buf, 3108 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); 3109 if (err) 3110 return err; 3111 auxtrace_event = &auxtrace_event_union->auxtrace; 3112 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) 3113 return -EINVAL; 3114 3115 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || 3116 auxtrace_event->header.size != sz) { 3117 return -EINVAL; 3118 } 3119 3120 /* 3121 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See 3122 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a 3123 * CPU as we set this always for the AUX_OUTPUT_HW_ID event. 3124 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. 3125 * Return 'not found' if mismatch. 3126 */ 3127 if (auxtrace_event->cpu == (__u32) -1) { 3128 etm->per_thread_decoding = true; 3129 if (auxtrace_event->tid != sample->tid) 3130 return 1; 3131 } else if (auxtrace_event->cpu != sample->cpu) { 3132 if (etm->per_thread_decoding) { 3133 /* 3134 * Found a per-cpu buffer after a per-thread one was 3135 * already found 3136 */ 3137 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); 3138 return -EINVAL; 3139 } 3140 return 1; 3141 } 3142 3143 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { 3144 /* 3145 * Clamp size in snapshot mode. The buffer size is clamped in 3146 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect 3147 * the buffer size. 3148 */ 3149 aux_size = min(aux_event->aux_size, auxtrace_event->size); 3150 3151 /* 3152 * In this mode, the head also points to the end of the buffer so aux_offset 3153 * needs to have the size subtracted so it points to the beginning as in normal mode 3154 */ 3155 aux_offset = aux_event->aux_offset - aux_size; 3156 } else { 3157 aux_size = aux_event->aux_size; 3158 aux_offset = aux_event->aux_offset; 3159 } 3160 3161 if (aux_offset >= auxtrace_event->offset && 3162 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { 3163 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, auxtrace_event->cpu); 3164 3165 if (!etmq) 3166 return -EINVAL; 3167 3168 /* 3169 * If this AUX event was inside this buffer somewhere, create a new auxtrace event 3170 * based on the sizes of the aux event, and queue that fragment. 3171 */ 3172 auxtrace_fragment.auxtrace = *auxtrace_event; 3173 auxtrace_fragment.auxtrace.size = aux_size; 3174 auxtrace_fragment.auxtrace.offset = aux_offset; 3175 auxtrace_fragment.auxtrace.idx = etmq->queue_nr; 3176 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; 3177 3178 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 3179 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); 3180 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, 3181 file_offset, NULL); 3182 if (err) 3183 return err; 3184 3185 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? 3186 UNFORMATTED : FORMATTED; 3187 if (etmq->format != UNSET && format != etmq->format) { 3188 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); 3189 return -EINVAL; 3190 } 3191 etmq->format = format; 3192 return 0; 3193 } 3194 3195 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ 3196 return 1; 3197 } 3198 3199 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, 3200 u64 offset __maybe_unused, void *data __maybe_unused) 3201 { 3202 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ 3203 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { 3204 (*(int *)data)++; /* increment found count */ 3205 return cs_etm__process_aux_output_hw_id(session, event); 3206 } 3207 return 0; 3208 } 3209 3210 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, 3211 u64 offset __maybe_unused, void *data __maybe_unused) 3212 { 3213 struct perf_sample sample; 3214 int ret; 3215 struct auxtrace_index_entry *ent; 3216 struct auxtrace_index *auxtrace_index; 3217 struct evsel *evsel; 3218 size_t i; 3219 3220 /* Don't care about any other events, we're only queuing buffers for AUX events */ 3221 if (event->header.type != PERF_RECORD_AUX) 3222 return 0; 3223 3224 if (event->header.size < sizeof(struct perf_record_aux)) 3225 return -EINVAL; 3226 3227 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ 3228 if (!event->aux.aux_size) 3229 return 0; 3230 3231 /* 3232 * Parse the sample, we need the sample_id_all data that comes after the event so that the 3233 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. 3234 */ 3235 evsel = evlist__event2evsel(session->evlist, event); 3236 if (!evsel) 3237 return -EINVAL; 3238 perf_sample__init(&sample, /*all=*/false); 3239 ret = evsel__parse_sample(evsel, event, &sample); 3240 if (ret) 3241 goto out; 3242 3243 /* 3244 * Loop through the auxtrace index to find the buffer that matches up with this aux event. 3245 */ 3246 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { 3247 for (i = 0; i < auxtrace_index->nr; i++) { 3248 ent = &auxtrace_index->entries[i]; 3249 ret = cs_etm__queue_aux_fragment(session, ent->file_offset, 3250 ent->sz, &event->aux, &sample); 3251 /* 3252 * Stop search on error or successful values. Continue search on 3253 * 1 ('not found') 3254 */ 3255 if (ret != 1) 3256 goto out; 3257 } 3258 } 3259 3260 /* 3261 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but 3262 * don't exit with an error because it will still be possible to decode other aux records. 3263 */ 3264 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 3265 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); 3266 ret = 0; 3267 out: 3268 perf_sample__exit(&sample); 3269 return ret; 3270 } 3271 3272 static int cs_etm__queue_aux_records(struct perf_session *session) 3273 { 3274 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, 3275 struct auxtrace_index, list); 3276 if (index && index->nr > 0) 3277 return perf_session__peek_events(session, session->header.data_offset, 3278 session->header.data_size, 3279 cs_etm__queue_aux_records_cb, NULL); 3280 3281 /* 3282 * We would get here if there are no entries in the index (either no auxtrace 3283 * buffers or no index at all). Fail silently as there is the possibility of 3284 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still 3285 * false. 3286 * 3287 * In that scenario, buffers will not be split by AUX records. 3288 */ 3289 return 0; 3290 } 3291 3292 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ 3293 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) 3294 3295 /* 3296 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual 3297 * timestamps). 3298 */ 3299 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) 3300 { 3301 int j; 3302 3303 for (j = 0; j < num_cpu; j++) { 3304 switch (metadata[j][CS_ETM_MAGIC]) { 3305 case __perf_cs_etmv4_magic: 3306 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) 3307 return false; 3308 break; 3309 case __perf_cs_ete_magic: 3310 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) 3311 return false; 3312 break; 3313 default: 3314 /* Unknown / unsupported magic number. */ 3315 return false; 3316 } 3317 } 3318 return true; 3319 } 3320 3321 /* map trace ids to correct metadata block, from information in metadata */ 3322 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, 3323 u64 **metadata) 3324 { 3325 u64 cs_etm_magic; 3326 u8 trace_chan_id; 3327 int i, err; 3328 3329 for (i = 0; i < num_cpu; i++) { 3330 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3331 switch (cs_etm_magic) { 3332 case __perf_cs_etmv3_magic: 3333 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3334 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); 3335 break; 3336 case __perf_cs_etmv4_magic: 3337 case __perf_cs_ete_magic: 3338 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3339 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); 3340 break; 3341 default: 3342 /* unknown magic number */ 3343 return -EINVAL; 3344 } 3345 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); 3346 if (err) 3347 return err; 3348 } 3349 return 0; 3350 } 3351 3352 /* 3353 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX 3354 * (formatted or not) packets to create the decoders. 3355 */ 3356 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) 3357 { 3358 struct cs_etm_decoder_params d_params; 3359 struct cs_etm_trace_params *t_params; 3360 int decoders = intlist__nr_entries(etmq->traceid_list); 3361 3362 if (decoders == 0) 3363 return 0; 3364 3365 /* 3366 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 3367 * needed. 3368 */ 3369 if (etmq->format == UNFORMATTED) 3370 assert(decoders == 1); 3371 3372 /* Use metadata to fill in trace parameters for trace decoder */ 3373 t_params = zalloc(sizeof(*t_params) * decoders); 3374 3375 if (!t_params) 3376 goto out_free; 3377 3378 if (cs_etm__init_trace_params(t_params, etmq)) 3379 goto out_free; 3380 3381 /* Set decoder parameters to decode trace packets */ 3382 if (cs_etm__init_decoder_params(&d_params, etmq, 3383 dump_trace ? CS_ETM_OPERATION_PRINT : 3384 CS_ETM_OPERATION_DECODE)) 3385 goto out_free; 3386 3387 etmq->decoder = cs_etm_decoder__new(decoders, &d_params, 3388 t_params); 3389 3390 if (!etmq->decoder) 3391 goto out_free; 3392 3393 /* 3394 * Register a function to handle all memory accesses required by 3395 * the trace decoder library. 3396 */ 3397 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 3398 0x0L, ((u64) -1L), 3399 cs_etm__decoder_mem_access)) 3400 goto out_free_decoder; 3401 3402 zfree(&t_params); 3403 return 0; 3404 3405 out_free_decoder: 3406 cs_etm_decoder__free(etmq->decoder); 3407 out_free: 3408 zfree(&t_params); 3409 return -EINVAL; 3410 } 3411 3412 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) 3413 { 3414 struct auxtrace_queues *queues = &etm->queues; 3415 3416 for (unsigned int i = 0; i < queues->nr_queues; i++) { 3417 bool empty = list_empty(&queues->queue_array[i].head); 3418 struct cs_etm_queue *etmq = queues->queue_array[i].priv; 3419 int ret; 3420 3421 /* 3422 * Don't create decoders for empty queues, mainly because 3423 * etmq->format is unknown for empty queues. 3424 */ 3425 assert(empty || etmq->format != UNSET); 3426 if (empty) 3427 continue; 3428 3429 ret = cs_etm__create_queue_decoders(etmq); 3430 if (ret) 3431 return ret; 3432 } 3433 return 0; 3434 } 3435 3436 int cs_etm__process_auxtrace_info_full(union perf_event *event, 3437 struct perf_session *session) 3438 { 3439 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3440 struct cs_etm_auxtrace *etm = NULL; 3441 struct perf_record_time_conv *tc = &session->time_conv; 3442 int event_header_size = sizeof(struct perf_event_header); 3443 int total_size = auxtrace_info->header.size; 3444 int priv_size = 0; 3445 int num_cpu, max_cpu = 0; 3446 int err = 0; 3447 int aux_hw_id_found; 3448 int i; 3449 u64 *ptr = NULL; 3450 u64 **metadata = NULL; 3451 3452 /* First the global part */ 3453 ptr = (u64 *) auxtrace_info->priv; 3454 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; 3455 3456 /* 3457 * Bound num_cpu by the event size: the global header consumes 3458 * CS_ETM_HEADER_SIZE bytes, and each CPU needs at least one u64 3459 * metadata entry after that. 3460 */ 3461 priv_size = total_size - event_header_size - INFO_HEADER_SIZE - 3462 CS_ETM_HEADER_SIZE; 3463 if (num_cpu <= 0 || priv_size <= 0 || 3464 num_cpu > priv_size / (int)sizeof(u64)) 3465 return -EINVAL; 3466 3467 metadata = zalloc(sizeof(*metadata) * num_cpu); 3468 if (!metadata) 3469 return -ENOMEM; 3470 3471 /* Start parsing after the common part of the header */ 3472 i = CS_HEADER_VERSION_MAX; 3473 3474 /* 3475 * The metadata is stored in the auxtrace_info section and encodes 3476 * the configuration of the ARM embedded trace macrocell which is 3477 * required by the trace decoder to properly decode the trace due 3478 * to its highly compressed nature. 3479 */ 3480 for (int j = 0; j < num_cpu; j++) { 3481 if (ptr[i] == __perf_cs_etmv3_magic) { 3482 metadata[j] = 3483 cs_etm__create_meta_blk(ptr, &i, 3484 CS_ETM_PRIV_MAX, 3485 CS_ETM_NR_TRC_PARAMS_V0); 3486 } else if (ptr[i] == __perf_cs_etmv4_magic) { 3487 metadata[j] = 3488 cs_etm__create_meta_blk(ptr, &i, 3489 CS_ETMV4_PRIV_MAX, 3490 CS_ETMV4_NR_TRC_PARAMS_V0); 3491 } else if (ptr[i] == __perf_cs_ete_magic) { 3492 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); 3493 } else { 3494 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", 3495 ptr[i]); 3496 err = -EINVAL; 3497 goto err_free_metadata; 3498 } 3499 3500 if (!metadata[j]) { 3501 err = -ENOMEM; 3502 goto err_free_metadata; 3503 } 3504 3505 /* CPU id comes from perf.data and must fit max_cpu + 1 without overflow */ 3506 if (metadata[j][CS_ETM_CPU] >= INT_MAX) { 3507 err = -EINVAL; 3508 goto err_free_metadata; 3509 } 3510 3511 if ((int)metadata[j][CS_ETM_CPU] > max_cpu) 3512 max_cpu = metadata[j][CS_ETM_CPU]; 3513 } 3514 3515 /* 3516 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 3517 * CS_ETMV4_PRIV_MAX mark how many double words are in the 3518 * global metadata, and each cpu's metadata respectively. 3519 * The following tests if the correct number of double words was 3520 * present in the auxtrace info section. 3521 */ 3522 priv_size = total_size - event_header_size - INFO_HEADER_SIZE; 3523 if (i * 8 != priv_size) { 3524 err = -EINVAL; 3525 goto err_free_metadata; 3526 } 3527 3528 etm = zalloc(sizeof(*etm)); 3529 3530 if (!etm) { 3531 err = -ENOMEM; 3532 goto err_free_metadata; 3533 } 3534 3535 /* 3536 * As all the ETMs run at the same exception level, the system should 3537 * have the same PID format crossing CPUs. So cache the PID format 3538 * and reuse it for sequential decoding. 3539 */ 3540 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); 3541 3542 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); 3543 if (err) 3544 goto err_free_etm; 3545 3546 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { 3547 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); 3548 if (err) 3549 goto err_free_queues; 3550 } 3551 3552 if (session->itrace_synth_opts->set) { 3553 etm->synth_opts = *session->itrace_synth_opts; 3554 } else { 3555 itrace_synth_opts__set_default(&etm->synth_opts, 3556 session->itrace_synth_opts->default_no_sample); 3557 etm->synth_opts.callchain = false; 3558 } 3559 3560 etm->session = session; 3561 3562 etm->num_cpu = num_cpu; 3563 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); 3564 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); 3565 etm->metadata = metadata; 3566 etm->auxtrace_type = auxtrace_info->type; 3567 3568 if (etm->synth_opts.use_timestamp) 3569 /* 3570 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3571 * therefore the decoder cannot know if the timestamp trace is 3572 * same with the kernel time. 3573 * 3574 * If a user has knowledge for the working platform and can 3575 * specify itrace option 'T' to tell decoder to forcely use the 3576 * traced timestamp as the kernel time. 3577 */ 3578 etm->has_virtual_ts = true; 3579 else 3580 /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3581 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3582 3583 if (!etm->has_virtual_ts) 3584 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3585 "The time field of the samples will not be set accurately.\n" 3586 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3587 "you can specify the itrace option 'T' for timestamp decoding\n" 3588 "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3589 3590 etm->auxtrace.process_event = cs_etm__process_event; 3591 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 3592 etm->auxtrace.flush_events = cs_etm__flush_events; 3593 etm->auxtrace.free_events = cs_etm__free_events; 3594 etm->auxtrace.free = cs_etm__free; 3595 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; 3596 session->auxtrace = &etm->auxtrace; 3597 3598 cs_etm__setup_timeless_decoding(etm); 3599 3600 etm->tc.time_shift = tc->time_shift; 3601 etm->tc.time_mult = tc->time_mult; 3602 etm->tc.time_zero = tc->time_zero; 3603 if (event_contains(*tc, cap_user_time_short)) { 3604 etm->tc.time_cycles = tc->time_cycles; 3605 etm->tc.time_mask = tc->time_mask; 3606 etm->tc.cap_user_time_zero = tc->cap_user_time_zero; 3607 etm->tc.cap_user_time_short = tc->cap_user_time_short; 3608 } 3609 err = cs_etm__synth_events(etm, session); 3610 if (err) 3611 goto err_free_queues; 3612 3613 err = cs_etm__queue_aux_records(session); 3614 if (err) 3615 goto err_free_queues; 3616 3617 /* 3618 * Map Trace ID values to CPU metadata. 3619 * 3620 * Trace metadata will always contain Trace ID values from the legacy algorithm 3621 * in case it's read by a version of Perf that doesn't know about HW_ID packets 3622 * or the kernel doesn't emit them. 3623 * 3624 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3625 * the same IDs as the old algorithm as far as is possible, unless there are clashes 3626 * in which case a different value will be used. This means an older perf may still 3627 * be able to record and read files generate on a newer system. 3628 * 3629 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3630 * those packets. If they are there then the values will be mapped and plugged into 3631 * the metadata and decoders are only created for each mapping received. 3632 * 3633 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3634 * then we map Trace ID values to CPU directly from the metadata and create decoders 3635 * for all mappings. 3636 */ 3637 3638 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ 3639 aux_hw_id_found = 0; 3640 err = perf_session__peek_events(session, session->header.data_offset, 3641 session->header.data_size, 3642 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); 3643 if (err) 3644 goto err_free_queues; 3645 3646 /* if no HW ID found this is a file with metadata values only, map from metadata */ 3647 if (!aux_hw_id_found) { 3648 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); 3649 if (err) 3650 goto err_free_queues; 3651 } 3652 3653 err = cs_etm__create_decoders(etm); 3654 if (err) 3655 goto err_free_queues; 3656 3657 etm->data_queued = etm->queues.populated; 3658 return 0; 3659 3660 err_free_queues: 3661 auxtrace_queues__free(&etm->queues); 3662 session->auxtrace = NULL; 3663 err_free_etm: 3664 zfree(&etm); 3665 err_free_metadata: 3666 /* No need to check @metadata[j], free(NULL) is supported */ 3667 for (int j = 0; j < num_cpu; j++) 3668 zfree(&metadata[j]); 3669 zfree(&metadata); 3670 return err; 3671 } 3672