1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitfield.h> 10 #include <linux/bitops.h> 11 #include <linux/coresight-pmu.h> 12 #include <linux/err.h> 13 #include <linux/log2.h> 14 #include <linux/types.h> 15 #include <linux/zalloc.h> 16 17 #include <stdlib.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "cs-etm.h" 22 #include "cs-etm-decoder/cs-etm-decoder.h" 23 #include "debug.h" 24 #include "dso.h" 25 #include "evlist.h" 26 #include "intlist.h" 27 #include "machine.h" 28 #include "map.h" 29 #include "perf.h" 30 #include "session.h" 31 #include "map_symbol.h" 32 #include "branch.h" 33 #include "symbol.h" 34 #include "tool.h" 35 #include "thread.h" 36 #include "thread-stack.h" 37 #include "tsc.h" 38 #include <tools/libc_compat.h> 39 #include "util/synthetic-events.h" 40 #include "util/util.h" 41 42 struct cs_etm_auxtrace { 43 struct auxtrace auxtrace; 44 struct auxtrace_queues queues; 45 struct auxtrace_heap heap; 46 struct itrace_synth_opts synth_opts; 47 struct perf_session *session; 48 struct perf_tsc_conversion tc; 49 50 /* 51 * Timeless has no timestamps in the trace so overlapping mmap lookups 52 * are less accurate but produces smaller trace data. We use context IDs 53 * in the trace instead of matching timestamps with fork records so 54 * they're not really needed in the general case. Overlapping mmaps 55 * happen in cases like between a fork and an exec. 56 */ 57 bool timeless_decoding; 58 59 /* 60 * Per-thread ignores the trace channel ID and instead assumes that 61 * everything in a buffer comes from the same process regardless of 62 * which CPU it ran on. It also implies no context IDs so the TID is 63 * taken from the auxtrace buffer. 64 */ 65 bool per_thread_decoding; 66 bool snapshot_mode; 67 bool data_queued; 68 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ 69 70 int num_cpu; 71 u64 latest_kernel_timestamp; 72 u32 auxtrace_type; 73 u64 branches_sample_type; 74 u64 branches_id; 75 u64 instructions_sample_type; 76 u64 instructions_sample_period; 77 u64 instructions_id; 78 u64 **metadata; 79 unsigned int pmu_type; 80 enum cs_etm_pid_fmt pid_fmt; 81 }; 82 83 struct cs_etm_traceid_queue { 84 u8 trace_chan_id; 85 u64 period_instructions; 86 size_t last_branch_pos; 87 union perf_event *event_buf; 88 struct thread *thread; 89 struct thread *prev_packet_thread; 90 ocsd_ex_level prev_packet_el; 91 ocsd_ex_level el; 92 struct branch_stack *last_branch; 93 struct branch_stack *last_branch_rb; 94 struct cs_etm_packet *prev_packet; 95 struct cs_etm_packet *packet; 96 struct cs_etm_packet_queue packet_queue; 97 }; 98 99 enum cs_etm_format { 100 UNSET, 101 FORMATTED, 102 UNFORMATTED 103 }; 104 105 struct cs_etm_queue { 106 struct cs_etm_auxtrace *etm; 107 struct cs_etm_decoder *decoder; 108 struct auxtrace_buffer *buffer; 109 unsigned int queue_nr; 110 u8 pending_timestamp_chan_id; 111 enum cs_etm_format format; 112 u64 offset; 113 const unsigned char *buf; 114 size_t buf_len, buf_used; 115 /* Conversion between traceID and index in traceid_queues array */ 116 struct intlist *traceid_queues_list; 117 struct cs_etm_traceid_queue **traceid_queues; 118 /* Conversion between traceID and metadata pointers */ 119 struct intlist *traceid_list; 120 /* 121 * Same as traceid_list, but traceid_list may be a reference to another 122 * queue's which has a matching sink ID. 123 */ 124 struct intlist *own_traceid_list; 125 u32 sink_id; 126 }; 127 128 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); 129 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 130 pid_t tid); 131 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 132 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 133 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); 134 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); 135 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); 136 137 /* PTMs ETMIDR [11:8] set to b0011 */ 138 #define ETMIDR_PTM_VERSION 0x00000300 139 140 /* 141 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 142 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 143 * encode the etm queue number as the upper 16 bit and the channel as 144 * the lower 16 bit. 145 */ 146 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ 147 (queue_nr << 16 | trace_chan_id) 148 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 149 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 150 #define SINK_UNSET ((u32) -1) 151 152 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 153 { 154 etmidr &= ETMIDR_PTM_VERSION; 155 156 if (etmidr == ETMIDR_PTM_VERSION) 157 return CS_ETM_PROTO_PTM; 158 159 return CS_ETM_PROTO_ETMV3; 160 } 161 162 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) 163 { 164 struct int_node *inode; 165 u64 *metadata; 166 167 inode = intlist__find(etmq->traceid_list, trace_chan_id); 168 if (!inode) 169 return -EINVAL; 170 171 metadata = inode->priv; 172 *magic = metadata[CS_ETM_MAGIC]; 173 return 0; 174 } 175 176 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) 177 { 178 struct int_node *inode; 179 u64 *metadata; 180 181 inode = intlist__find(etmq->traceid_list, trace_chan_id); 182 if (!inode) 183 return -EINVAL; 184 185 metadata = inode->priv; 186 *cpu = (int)metadata[CS_ETM_CPU]; 187 return 0; 188 } 189 190 /* 191 * The returned PID format is presented as an enum: 192 * 193 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. 194 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. 195 * CS_ETM_PIDFMT_NONE: No context IDs 196 * 197 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 198 * are enabled at the same time when the session runs on an EL2 kernel. 199 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 200 * recorded in the trace data, the tool will selectively use 201 * CONTEXTIDR_EL2 as PID. 202 * 203 * The result is cached in etm->pid_fmt so this function only needs to be called 204 * when processing the aux info. 205 */ 206 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) 207 { 208 u64 val; 209 210 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 211 val = metadata[CS_ETM_ETMCR]; 212 /* CONTEXTIDR is traced */ 213 if (val & BIT(ETM_OPT_CTXTID)) 214 return CS_ETM_PIDFMT_CTXTID; 215 } else { 216 val = metadata[CS_ETMV4_TRCCONFIGR]; 217 /* CONTEXTIDR_EL2 is traced */ 218 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) 219 return CS_ETM_PIDFMT_CTXTID2; 220 /* CONTEXTIDR_EL1 is traced */ 221 else if (val & BIT(ETM4_CFG_BIT_CTXTID)) 222 return CS_ETM_PIDFMT_CTXTID; 223 } 224 225 return CS_ETM_PIDFMT_NONE; 226 } 227 228 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) 229 { 230 return etmq->etm->pid_fmt; 231 } 232 233 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, 234 u8 trace_chan_id, u64 *cpu_metadata) 235 { 236 /* Get an RB node for this CPU */ 237 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); 238 239 /* Something went wrong, no need to continue */ 240 if (!inode) 241 return -ENOMEM; 242 243 /* Disallow re-mapping a different traceID to metadata pair. */ 244 if (inode->priv) { 245 u64 *curr_cpu_data = inode->priv; 246 u8 curr_chan_id; 247 int err; 248 249 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { 250 /* 251 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs 252 * are expected (but not supported) in per-thread mode, 253 * rather than signifying an error. 254 */ 255 if (etmq->etm->per_thread_decoding) 256 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); 257 else 258 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); 259 260 return -EINVAL; 261 } 262 263 /* check that the mapped ID matches */ 264 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); 265 if (err) 266 return err; 267 268 if (curr_chan_id != trace_chan_id) { 269 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); 270 return -EINVAL; 271 } 272 273 /* Skip re-adding the same mappings if everything matched */ 274 return 0; 275 } 276 277 /* Not one we've seen before, associate the traceID with the metadata pointer */ 278 inode->priv = cpu_metadata; 279 280 return 0; 281 } 282 283 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) 284 { 285 if (etm->per_thread_decoding) 286 return etm->queues.queue_array[0].priv; 287 else 288 return etm->queues.queue_array[cpu].priv; 289 } 290 291 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, 292 u64 *cpu_metadata) 293 { 294 struct cs_etm_queue *etmq; 295 296 /* 297 * If the queue is unformatted then only save one mapping in the 298 * queue associated with that CPU so only one decoder is made. 299 */ 300 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); 301 if (etmq->format == UNFORMATTED) 302 return cs_etm__insert_trace_id_node(etmq, trace_chan_id, 303 cpu_metadata); 304 305 /* 306 * Otherwise, version 0 trace IDs are global so save them into every 307 * queue. 308 */ 309 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 310 int ret; 311 312 etmq = etm->queues.queue_array[i].priv; 313 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, 314 cpu_metadata); 315 if (ret) 316 return ret; 317 } 318 319 return 0; 320 } 321 322 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, 323 u64 hw_id) 324 { 325 int err; 326 u64 *cpu_data; 327 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 328 329 cpu_data = get_cpu_data(etm, cpu); 330 if (cpu_data == NULL) 331 return -EINVAL; 332 333 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); 334 if (err) 335 return err; 336 337 /* 338 * if we are picking up the association from the packet, need to plug 339 * the correct trace ID into the metadata for setting up decoders later. 340 */ 341 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); 342 } 343 344 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, 345 u64 hw_id) 346 { 347 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); 348 int ret; 349 u64 *cpu_data; 350 u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); 351 u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 352 353 /* 354 * Check sink id hasn't changed in per-cpu mode. In per-thread mode, 355 * let it pass for now until an actual overlapping trace ID is hit. In 356 * most cases IDs won't overlap even if the sink changes. 357 */ 358 if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && 359 etmq->sink_id != sink_id) { 360 pr_err("CS_ETM: mismatch between sink IDs\n"); 361 return -EINVAL; 362 } 363 364 etmq->sink_id = sink_id; 365 366 /* Find which other queues use this sink and link their ID maps */ 367 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 368 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; 369 370 /* Different sinks, skip */ 371 if (other_etmq->sink_id != etmq->sink_id) 372 continue; 373 374 /* Already linked, skip */ 375 if (other_etmq->traceid_list == etmq->traceid_list) 376 continue; 377 378 /* At the point of first linking, this one should be empty */ 379 if (!intlist__empty(etmq->traceid_list)) { 380 pr_err("CS_ETM: Can't link populated trace ID lists\n"); 381 return -EINVAL; 382 } 383 384 etmq->own_traceid_list = NULL; 385 intlist__delete(etmq->traceid_list); 386 etmq->traceid_list = other_etmq->traceid_list; 387 break; 388 } 389 390 cpu_data = get_cpu_data(etm, cpu); 391 ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); 392 if (ret) 393 return ret; 394 395 ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); 396 if (ret) 397 return ret; 398 399 return 0; 400 } 401 402 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) 403 { 404 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 405 406 switch (cs_etm_magic) { 407 case __perf_cs_etmv3_magic: 408 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & 409 CORESIGHT_TRACE_ID_VAL_MASK); 410 break; 411 case __perf_cs_etmv4_magic: 412 case __perf_cs_ete_magic: 413 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & 414 CORESIGHT_TRACE_ID_VAL_MASK); 415 break; 416 default: 417 return -EINVAL; 418 } 419 return 0; 420 } 421 422 /* 423 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 424 */ 425 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 426 { 427 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 428 429 switch (cs_etm_magic) { 430 case __perf_cs_etmv3_magic: 431 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; 432 break; 433 case __perf_cs_etmv4_magic: 434 case __perf_cs_ete_magic: 435 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; 436 break; 437 438 default: 439 return -EINVAL; 440 } 441 return 0; 442 } 443 444 /* 445 * Get a metadata index for a specific cpu from an array. 446 * 447 */ 448 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) 449 { 450 int i; 451 452 for (i = 0; i < etm->num_cpu; i++) { 453 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { 454 return i; 455 } 456 } 457 458 return -1; 459 } 460 461 /* 462 * Get a metadata for a specific cpu from an array. 463 * 464 */ 465 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) 466 { 467 int idx = get_cpu_data_idx(etm, cpu); 468 469 return (idx != -1) ? etm->metadata[idx] : NULL; 470 } 471 472 /* 473 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. 474 * 475 * The payload associates the Trace ID and the CPU. 476 * The routine is tolerant of seeing multiple packets with the same association, 477 * but a CPU / Trace ID association changing during a session is an error. 478 */ 479 static int cs_etm__process_aux_output_hw_id(struct perf_session *session, 480 union perf_event *event) 481 { 482 struct cs_etm_auxtrace *etm; 483 struct perf_sample sample; 484 struct evsel *evsel; 485 u64 hw_id; 486 int cpu, version, err; 487 488 /* extract and parse the HW ID */ 489 hw_id = event->aux_output_hw_id.hw_id; 490 version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); 491 492 /* check that we can handle this version */ 493 if (version > CS_AUX_HW_ID_MAJOR_VERSION) { 494 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", 495 version); 496 return -EINVAL; 497 } 498 499 /* get access to the etm metadata */ 500 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); 501 if (!etm || !etm->metadata) 502 return -EINVAL; 503 504 /* parse the sample to get the CPU */ 505 evsel = evlist__event2evsel(session->evlist, event); 506 if (!evsel) 507 return -EINVAL; 508 perf_sample__init(&sample, /*all=*/false); 509 err = evsel__parse_sample(evsel, event, &sample); 510 if (err) 511 goto out; 512 cpu = sample.cpu; 513 if (cpu == -1) { 514 /* no CPU in the sample - possibly recorded with an old version of perf */ 515 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); 516 err = -EINVAL; 517 goto out; 518 } 519 520 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) { 521 err = cs_etm__process_trace_id_v0(etm, cpu, hw_id); 522 goto out; 523 } 524 525 err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); 526 out: 527 perf_sample__exit(&sample); 528 return err; 529 } 530 531 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 532 u8 trace_chan_id) 533 { 534 /* 535 * When a timestamp packet is encountered the backend code 536 * is stopped so that the front end has time to process packets 537 * that were accumulated in the traceID queue. Since there can 538 * be more than one channel per cs_etm_queue, we need to specify 539 * what traceID queue needs servicing. 540 */ 541 etmq->pending_timestamp_chan_id = trace_chan_id; 542 } 543 544 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 545 u8 *trace_chan_id) 546 { 547 struct cs_etm_packet_queue *packet_queue; 548 549 if (!etmq->pending_timestamp_chan_id) 550 return 0; 551 552 if (trace_chan_id) 553 *trace_chan_id = etmq->pending_timestamp_chan_id; 554 555 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 556 etmq->pending_timestamp_chan_id); 557 if (!packet_queue) 558 return 0; 559 560 /* Acknowledge pending status */ 561 etmq->pending_timestamp_chan_id = 0; 562 563 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 564 return packet_queue->cs_timestamp; 565 } 566 567 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 568 { 569 int i; 570 571 queue->head = 0; 572 queue->tail = 0; 573 queue->packet_count = 0; 574 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 575 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 576 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 577 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 578 queue->packet_buffer[i].instr_count = 0; 579 queue->packet_buffer[i].last_instr_taken_branch = false; 580 queue->packet_buffer[i].last_instr_size = 0; 581 queue->packet_buffer[i].last_instr_type = 0; 582 queue->packet_buffer[i].last_instr_subtype = 0; 583 queue->packet_buffer[i].last_instr_cond = 0; 584 queue->packet_buffer[i].flags = 0; 585 queue->packet_buffer[i].exception_number = UINT32_MAX; 586 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 587 queue->packet_buffer[i].cpu = INT_MIN; 588 } 589 } 590 591 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 592 { 593 int idx; 594 struct int_node *inode; 595 struct cs_etm_traceid_queue *tidq; 596 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 597 598 intlist__for_each_entry(inode, traceid_queues_list) { 599 idx = (int)(intptr_t)inode->priv; 600 tidq = etmq->traceid_queues[idx]; 601 cs_etm__clear_packet_queue(&tidq->packet_queue); 602 } 603 } 604 605 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 606 struct cs_etm_traceid_queue *tidq, 607 u8 trace_chan_id) 608 { 609 int rc = -ENOMEM; 610 struct auxtrace_queue *queue; 611 struct cs_etm_auxtrace *etm = etmq->etm; 612 613 cs_etm__clear_packet_queue(&tidq->packet_queue); 614 615 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 616 tidq->trace_chan_id = trace_chan_id; 617 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; 618 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, 619 queue->tid); 620 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); 621 622 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 623 if (!tidq->packet) 624 goto out; 625 626 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 627 if (!tidq->prev_packet) 628 goto out_free; 629 630 if (etm->synth_opts.last_branch) { 631 size_t sz = sizeof(struct branch_stack); 632 633 sz += etm->synth_opts.last_branch_sz * 634 sizeof(struct branch_entry); 635 tidq->last_branch = zalloc(sz); 636 if (!tidq->last_branch) 637 goto out_free; 638 tidq->last_branch_rb = zalloc(sz); 639 if (!tidq->last_branch_rb) 640 goto out_free; 641 } 642 643 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 644 if (!tidq->event_buf) 645 goto out_free; 646 647 return 0; 648 649 out_free: 650 zfree(&tidq->last_branch_rb); 651 zfree(&tidq->last_branch); 652 zfree(&tidq->prev_packet); 653 zfree(&tidq->packet); 654 out: 655 return rc; 656 } 657 658 static struct cs_etm_traceid_queue 659 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 660 { 661 int idx; 662 struct int_node *inode; 663 struct intlist *traceid_queues_list; 664 struct cs_etm_traceid_queue *tidq, **traceid_queues; 665 struct cs_etm_auxtrace *etm = etmq->etm; 666 667 if (etm->per_thread_decoding) 668 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 669 670 traceid_queues_list = etmq->traceid_queues_list; 671 672 /* 673 * Check if the traceid_queue exist for this traceID by looking 674 * in the queue list. 675 */ 676 inode = intlist__find(traceid_queues_list, trace_chan_id); 677 if (inode) { 678 idx = (int)(intptr_t)inode->priv; 679 return etmq->traceid_queues[idx]; 680 } 681 682 /* We couldn't find a traceid_queue for this traceID, allocate one */ 683 tidq = malloc(sizeof(*tidq)); 684 if (!tidq) 685 return NULL; 686 687 memset(tidq, 0, sizeof(*tidq)); 688 689 /* Get a valid index for the new traceid_queue */ 690 idx = intlist__nr_entries(traceid_queues_list); 691 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 692 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 693 if (!inode) 694 goto out_free; 695 696 /* Associate this traceID with this index */ 697 inode->priv = (void *)(intptr_t)idx; 698 699 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 700 goto out_free; 701 702 /* Grow the traceid_queues array by one unit */ 703 traceid_queues = etmq->traceid_queues; 704 traceid_queues = reallocarray(traceid_queues, 705 idx + 1, 706 sizeof(*traceid_queues)); 707 708 /* 709 * On failure reallocarray() returns NULL and the original block of 710 * memory is left untouched. 711 */ 712 if (!traceid_queues) 713 goto out_free; 714 715 traceid_queues[idx] = tidq; 716 etmq->traceid_queues = traceid_queues; 717 718 return etmq->traceid_queues[idx]; 719 720 out_free: 721 /* 722 * Function intlist__remove() removes the inode from the list 723 * and delete the memory associated to it. 724 */ 725 intlist__remove(traceid_queues_list, inode); 726 free(tidq); 727 728 return NULL; 729 } 730 731 struct cs_etm_packet_queue 732 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 733 { 734 struct cs_etm_traceid_queue *tidq; 735 736 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 737 if (tidq) 738 return &tidq->packet_queue; 739 740 return NULL; 741 } 742 743 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 744 struct cs_etm_traceid_queue *tidq) 745 { 746 struct cs_etm_packet *tmp; 747 748 if (etm->synth_opts.branches || etm->synth_opts.last_branch || 749 etm->synth_opts.instructions) { 750 /* 751 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 752 * the next incoming packet. 753 * 754 * Threads and exception levels are also tracked for both the 755 * previous and current packets. This is because the previous 756 * packet is used for the 'from' IP for branch samples, so the 757 * thread at that time must also be assigned to that sample. 758 * Across discontinuity packets the thread can change, so by 759 * tracking the thread for the previous packet the branch sample 760 * will have the correct info. 761 */ 762 tmp = tidq->packet; 763 tidq->packet = tidq->prev_packet; 764 tidq->prev_packet = tmp; 765 tidq->prev_packet_el = tidq->el; 766 thread__put(tidq->prev_packet_thread); 767 tidq->prev_packet_thread = thread__get(tidq->thread); 768 } 769 } 770 771 static void cs_etm__packet_dump(const char *pkt_string, void *data) 772 { 773 const char *color = PERF_COLOR_BLUE; 774 int len = strlen(pkt_string); 775 struct cs_etm_queue *etmq = data; 776 char queue_nr[64]; 777 778 if (verbose) 779 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr); 780 else 781 queue_nr[0] = '\0'; 782 783 if (len && (pkt_string[len-1] == '\n')) 784 color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string); 785 else 786 color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string); 787 788 fflush(stdout); 789 } 790 791 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 792 u64 *metadata, u32 etmidr) 793 { 794 t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); 795 t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; 796 t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; 797 } 798 799 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 800 u64 *metadata) 801 { 802 t_params->protocol = CS_ETM_PROTO_ETMV4i; 803 t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; 804 t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; 805 t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; 806 t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; 807 t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; 808 t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; 809 } 810 811 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 812 u64 *metadata) 813 { 814 t_params->protocol = CS_ETM_PROTO_ETE; 815 t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; 816 t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; 817 t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; 818 t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; 819 t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; 820 t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; 821 t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; 822 } 823 824 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 825 struct cs_etm_queue *etmq) 826 { 827 struct int_node *inode; 828 829 intlist__for_each_entry(inode, etmq->traceid_list) { 830 u64 *metadata = inode->priv; 831 u64 architecture = metadata[CS_ETM_MAGIC]; 832 u32 etmidr; 833 834 switch (architecture) { 835 case __perf_cs_etmv3_magic: 836 etmidr = metadata[CS_ETM_ETMIDR]; 837 cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); 838 break; 839 case __perf_cs_etmv4_magic: 840 cs_etm__set_trace_param_etmv4(t_params++, metadata); 841 break; 842 case __perf_cs_ete_magic: 843 cs_etm__set_trace_param_ete(t_params++, metadata); 844 break; 845 default: 846 return -EINVAL; 847 } 848 } 849 850 return 0; 851 } 852 853 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 854 struct cs_etm_queue *etmq, 855 enum cs_etm_decoder_operation mode) 856 { 857 int ret = -EINVAL; 858 859 if (!(mode < CS_ETM_OPERATION_MAX)) 860 goto out; 861 862 d_params->packet_printer = cs_etm__packet_dump; 863 d_params->operation = mode; 864 d_params->data = etmq; 865 d_params->formatted = etmq->format == FORMATTED; 866 d_params->fsyncs = false; 867 d_params->hsyncs = false; 868 d_params->frame_aligned = true; 869 870 ret = 0; 871 out: 872 return ret; 873 } 874 875 static void cs_etm__dump_event(struct cs_etm_queue *etmq, 876 struct auxtrace_buffer *buffer) 877 { 878 int ret; 879 const char *color = PERF_COLOR_BLUE; 880 size_t buffer_used = 0; 881 882 fprintf(stdout, "\n"); 883 color_fprintf(stdout, color, 884 ". ... CoreSight %s Trace data: size %#zx bytes\n", 885 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 886 887 do { 888 size_t consumed; 889 890 ret = cs_etm_decoder__process_data_block( 891 etmq->decoder, buffer->offset, 892 &((u8 *)buffer->data)[buffer_used], 893 buffer->size - buffer_used, &consumed); 894 if (ret) 895 break; 896 897 buffer_used += consumed; 898 } while (buffer_used < buffer->size); 899 900 cs_etm_decoder__reset(etmq->decoder); 901 } 902 903 static int cs_etm__flush_events(struct perf_session *session, 904 const struct perf_tool *tool) 905 { 906 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 907 struct cs_etm_auxtrace, 908 auxtrace); 909 if (dump_trace) 910 return 0; 911 912 if (!tool->ordered_events) 913 return -EINVAL; 914 915 if (etm->timeless_decoding) { 916 /* 917 * Pass tid = -1 to process all queues. But likely they will have 918 * already been processed on PERF_RECORD_EXIT anyway. 919 */ 920 return cs_etm__process_timeless_queues(etm, -1); 921 } 922 923 return cs_etm__process_timestamped_queues(etm); 924 } 925 926 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 927 { 928 int idx; 929 uintptr_t priv; 930 struct int_node *inode, *tmp; 931 struct cs_etm_traceid_queue *tidq; 932 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 933 934 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 935 priv = (uintptr_t)inode->priv; 936 idx = priv; 937 938 /* Free this traceid_queue from the array */ 939 tidq = etmq->traceid_queues[idx]; 940 thread__zput(tidq->thread); 941 thread__zput(tidq->prev_packet_thread); 942 zfree(&tidq->event_buf); 943 zfree(&tidq->last_branch); 944 zfree(&tidq->last_branch_rb); 945 zfree(&tidq->prev_packet); 946 zfree(&tidq->packet); 947 zfree(&tidq); 948 949 /* 950 * Function intlist__remove() removes the inode from the list 951 * and delete the memory associated to it. 952 */ 953 intlist__remove(traceid_queues_list, inode); 954 } 955 956 /* Then the RB tree itself */ 957 intlist__delete(traceid_queues_list); 958 etmq->traceid_queues_list = NULL; 959 960 /* finally free the traceid_queues array */ 961 zfree(&etmq->traceid_queues); 962 } 963 964 static void cs_etm__free_queue(void *priv) 965 { 966 struct int_node *inode, *tmp; 967 struct cs_etm_queue *etmq = priv; 968 969 if (!etmq) 970 return; 971 972 cs_etm_decoder__free(etmq->decoder); 973 cs_etm__free_traceid_queues(etmq); 974 975 if (etmq->own_traceid_list) { 976 /* First remove all traceID/metadata nodes for the RB tree */ 977 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) 978 intlist__remove(etmq->own_traceid_list, inode); 979 980 /* Then the RB tree itself */ 981 intlist__delete(etmq->own_traceid_list); 982 } 983 984 free(etmq); 985 } 986 987 static void cs_etm__free_events(struct perf_session *session) 988 { 989 unsigned int i; 990 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 991 struct cs_etm_auxtrace, 992 auxtrace); 993 struct auxtrace_queues *queues = &aux->queues; 994 995 for (i = 0; i < queues->nr_queues; i++) { 996 cs_etm__free_queue(queues->queue_array[i].priv); 997 queues->queue_array[i].priv = NULL; 998 } 999 1000 auxtrace_queues__free(queues); 1001 } 1002 1003 static void cs_etm__free(struct perf_session *session) 1004 { 1005 int i; 1006 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1007 struct cs_etm_auxtrace, 1008 auxtrace); 1009 cs_etm__free_events(session); 1010 session->auxtrace = NULL; 1011 1012 for (i = 0; i < aux->num_cpu; i++) 1013 zfree(&aux->metadata[i]); 1014 1015 zfree(&aux->metadata); 1016 zfree(&aux); 1017 } 1018 1019 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, 1020 struct evsel *evsel) 1021 { 1022 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1023 struct cs_etm_auxtrace, 1024 auxtrace); 1025 1026 return evsel->core.attr.type == aux->pmu_type; 1027 } 1028 1029 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, 1030 ocsd_ex_level el) 1031 { 1032 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); 1033 1034 /* 1035 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels 1036 * running at EL1 assume everything is the host. 1037 */ 1038 if (pid_fmt == CS_ETM_PIDFMT_CTXTID) 1039 return &etmq->etm->session->machines.host; 1040 1041 /* 1042 * Not perfect, but otherwise assume anything in EL1 is the default 1043 * guest, and everything else is the host. Distinguishing between guest 1044 * and host userspaces isn't currently supported either. Neither is 1045 * multiple guest support. All this does is reduce the likeliness of 1046 * decode errors where we look into the host kernel maps when it should 1047 * have been the guest maps. 1048 */ 1049 switch (el) { 1050 case ocsd_EL1: 1051 return machines__find_guest(&etmq->etm->session->machines, 1052 DEFAULT_GUEST_KERNEL_ID); 1053 case ocsd_EL3: 1054 case ocsd_EL2: 1055 case ocsd_EL0: 1056 case ocsd_EL_unknown: 1057 default: 1058 return &etmq->etm->session->machines.host; 1059 } 1060 } 1061 1062 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, 1063 ocsd_ex_level el) 1064 { 1065 struct machine *machine = cs_etm__get_machine(etmq, el); 1066 1067 if (address >= machine__kernel_start(machine)) { 1068 if (machine__is_host(machine)) 1069 return PERF_RECORD_MISC_KERNEL; 1070 else 1071 return PERF_RECORD_MISC_GUEST_KERNEL; 1072 } else { 1073 if (machine__is_host(machine)) 1074 return PERF_RECORD_MISC_USER; 1075 else { 1076 /* 1077 * Can't really happen at the moment because 1078 * cs_etm__get_machine() will always return 1079 * machines.host for any non EL1 trace. 1080 */ 1081 return PERF_RECORD_MISC_GUEST_USER; 1082 } 1083 } 1084 } 1085 1086 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 1087 u64 address, size_t size, u8 *buffer, 1088 const ocsd_mem_space_acc_t mem_space) 1089 { 1090 u8 cpumode; 1091 u64 offset; 1092 int len; 1093 struct addr_location al; 1094 struct dso *dso; 1095 struct cs_etm_traceid_queue *tidq; 1096 int ret = 0; 1097 1098 if (!etmq) 1099 return 0; 1100 1101 addr_location__init(&al); 1102 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1103 if (!tidq) 1104 goto out; 1105 1106 /* 1107 * We've already tracked EL along side the PID in cs_etm__set_thread() 1108 * so double check that it matches what OpenCSD thinks as well. It 1109 * doesn't distinguish between EL0 and EL1 for this mem access callback 1110 * so we had to do the extra tracking. Skip validation if it's any of 1111 * the 'any' values. 1112 */ 1113 if (!(mem_space == OCSD_MEM_SPACE_ANY || 1114 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { 1115 if (mem_space & OCSD_MEM_SPACE_EL1N) { 1116 /* Includes both non secure EL1 and EL0 */ 1117 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); 1118 } else if (mem_space & OCSD_MEM_SPACE_EL2) 1119 assert(tidq->el == ocsd_EL2); 1120 else if (mem_space & OCSD_MEM_SPACE_EL3) 1121 assert(tidq->el == ocsd_EL3); 1122 } 1123 1124 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); 1125 1126 if (!thread__find_map(tidq->thread, cpumode, address, &al)) 1127 goto out; 1128 1129 dso = map__dso(al.map); 1130 if (!dso) 1131 goto out; 1132 1133 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && 1134 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) 1135 goto out; 1136 1137 offset = map__map_ip(al.map, address); 1138 1139 map__load(al.map); 1140 1141 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), 1142 offset, buffer, size); 1143 1144 if (len <= 0) { 1145 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" 1146 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); 1147 if (!dso__auxtrace_warned(dso)) { 1148 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", 1149 address, 1150 dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); 1151 dso__set_auxtrace_warned(dso); 1152 } 1153 goto out; 1154 } 1155 ret = len; 1156 out: 1157 addr_location__exit(&al); 1158 return ret; 1159 } 1160 1161 static struct cs_etm_queue *cs_etm__alloc_queue(void) 1162 { 1163 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); 1164 if (!etmq) 1165 return NULL; 1166 1167 etmq->traceid_queues_list = intlist__new(NULL); 1168 if (!etmq->traceid_queues_list) 1169 goto out_free; 1170 1171 /* 1172 * Create an RB tree for traceID-metadata tuple. Since the conversion 1173 * has to be made for each packet that gets decoded, optimizing access 1174 * in anything other than a sequential array is worth doing. 1175 */ 1176 etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); 1177 if (!etmq->traceid_list) 1178 goto out_free; 1179 1180 return etmq; 1181 1182 out_free: 1183 intlist__delete(etmq->traceid_queues_list); 1184 free(etmq); 1185 1186 return NULL; 1187 } 1188 1189 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 1190 struct auxtrace_queue *queue, 1191 unsigned int queue_nr) 1192 { 1193 struct cs_etm_queue *etmq = queue->priv; 1194 1195 if (etmq) 1196 return 0; 1197 1198 etmq = cs_etm__alloc_queue(); 1199 1200 if (!etmq) 1201 return -ENOMEM; 1202 1203 queue->priv = etmq; 1204 etmq->etm = etm; 1205 etmq->queue_nr = queue_nr; 1206 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ 1207 etmq->offset = 0; 1208 etmq->sink_id = SINK_UNSET; 1209 1210 return 0; 1211 } 1212 1213 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, 1214 struct cs_etm_queue *etmq, 1215 unsigned int queue_nr) 1216 { 1217 int ret = 0; 1218 unsigned int cs_queue_nr; 1219 u8 trace_chan_id; 1220 u64 cs_timestamp; 1221 1222 /* 1223 * We are under a CPU-wide trace scenario. As such we need to know 1224 * when the code that generated the traces started to execute so that 1225 * it can be correlated with execution on other CPUs. So we get a 1226 * handle on the beginning of traces and decode until we find a 1227 * timestamp. The timestamp is then added to the auxtrace min heap 1228 * in order to know what nibble (of all the etmqs) to decode first. 1229 */ 1230 while (1) { 1231 /* 1232 * Fetch an aux_buffer from this etmq. Bail if no more 1233 * blocks or an error has been encountered. 1234 */ 1235 ret = cs_etm__get_data_block(etmq); 1236 if (ret <= 0) 1237 goto out; 1238 1239 /* 1240 * Run decoder on the trace block. The decoder will stop when 1241 * encountering a CS timestamp, a full packet queue or the end of 1242 * trace for that block. 1243 */ 1244 ret = cs_etm__decode_data_block(etmq); 1245 if (ret) 1246 goto out; 1247 1248 /* 1249 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 1250 * the timestamp calculation for us. 1251 */ 1252 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1253 1254 /* We found a timestamp, no need to continue. */ 1255 if (cs_timestamp) 1256 break; 1257 1258 /* 1259 * We didn't find a timestamp so empty all the traceid packet 1260 * queues before looking for another timestamp packet, either 1261 * in the current data block or a new one. Packets that were 1262 * just decoded are useless since no timestamp has been 1263 * associated with them. As such simply discard them. 1264 */ 1265 cs_etm__clear_all_packet_queues(etmq); 1266 } 1267 1268 /* 1269 * We have a timestamp. Add it to the min heap to reflect when 1270 * instructions conveyed by the range packets of this traceID queue 1271 * started to execute. Once the same has been done for all the traceID 1272 * queues of each etmq, redenring and decoding can start in 1273 * chronological order. 1274 * 1275 * Note that packets decoded above are still in the traceID's packet 1276 * queue and will be processed in cs_etm__process_timestamped_queues(). 1277 */ 1278 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1279 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 1280 out: 1281 return ret; 1282 } 1283 1284 static inline 1285 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 1286 struct cs_etm_traceid_queue *tidq) 1287 { 1288 struct branch_stack *bs_src = tidq->last_branch_rb; 1289 struct branch_stack *bs_dst = tidq->last_branch; 1290 size_t nr = 0; 1291 1292 /* 1293 * Set the number of records before early exit: ->nr is used to 1294 * determine how many branches to copy from ->entries. 1295 */ 1296 bs_dst->nr = bs_src->nr; 1297 1298 /* 1299 * Early exit when there is nothing to copy. 1300 */ 1301 if (!bs_src->nr) 1302 return; 1303 1304 /* 1305 * As bs_src->entries is a circular buffer, we need to copy from it in 1306 * two steps. First, copy the branches from the most recently inserted 1307 * branch ->last_branch_pos until the end of bs_src->entries buffer. 1308 */ 1309 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 1310 memcpy(&bs_dst->entries[0], 1311 &bs_src->entries[tidq->last_branch_pos], 1312 sizeof(struct branch_entry) * nr); 1313 1314 /* 1315 * If we wrapped around at least once, the branches from the beginning 1316 * of the bs_src->entries buffer and until the ->last_branch_pos element 1317 * are older valid branches: copy them over. The total number of 1318 * branches copied over will be equal to the number of branches asked by 1319 * the user in last_branch_sz. 1320 */ 1321 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 1322 memcpy(&bs_dst->entries[nr], 1323 &bs_src->entries[0], 1324 sizeof(struct branch_entry) * tidq->last_branch_pos); 1325 } 1326 } 1327 1328 static inline 1329 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 1330 { 1331 tidq->last_branch_pos = 0; 1332 tidq->last_branch_rb->nr = 0; 1333 } 1334 1335 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 1336 u8 trace_chan_id, u64 addr) 1337 { 1338 u8 instrBytes[2]; 1339 1340 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), 1341 instrBytes, 0); 1342 /* 1343 * T32 instruction size is indicated by bits[15:11] of the first 1344 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 1345 * denote a 32-bit instruction. 1346 */ 1347 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 1348 } 1349 1350 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 1351 { 1352 /* 1353 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't 1354 * appear in samples. 1355 */ 1356 if (packet->sample_type == CS_ETM_DISCONTINUITY || 1357 packet->sample_type == CS_ETM_EXCEPTION) 1358 return 0; 1359 1360 return packet->start_addr; 1361 } 1362 1363 static inline 1364 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 1365 { 1366 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 1367 if (packet->sample_type == CS_ETM_DISCONTINUITY) 1368 return 0; 1369 1370 return packet->end_addr - packet->last_instr_size; 1371 } 1372 1373 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 1374 u64 trace_chan_id, 1375 const struct cs_etm_packet *packet, 1376 u64 offset) 1377 { 1378 if (packet->isa == CS_ETM_ISA_T32) { 1379 u64 addr = packet->start_addr; 1380 1381 while (offset) { 1382 addr += cs_etm__t32_instr_size(etmq, 1383 trace_chan_id, addr); 1384 offset--; 1385 } 1386 return addr; 1387 } 1388 1389 /* Assume a 4 byte instruction size (A32/A64) */ 1390 return packet->start_addr + offset * 4; 1391 } 1392 1393 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 1394 struct cs_etm_traceid_queue *tidq) 1395 { 1396 struct branch_stack *bs = tidq->last_branch_rb; 1397 struct branch_entry *be; 1398 1399 /* 1400 * The branches are recorded in a circular buffer in reverse 1401 * chronological order: we start recording from the last element of the 1402 * buffer down. After writing the first element of the stack, move the 1403 * insert position back to the end of the buffer. 1404 */ 1405 if (!tidq->last_branch_pos) 1406 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 1407 1408 tidq->last_branch_pos -= 1; 1409 1410 be = &bs->entries[tidq->last_branch_pos]; 1411 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 1412 be->to = cs_etm__first_executed_instr(tidq->packet); 1413 /* No support for mispredict */ 1414 be->flags.mispred = 0; 1415 be->flags.predicted = 1; 1416 1417 /* 1418 * Increment bs->nr until reaching the number of last branches asked by 1419 * the user on the command line. 1420 */ 1421 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 1422 bs->nr += 1; 1423 } 1424 1425 static int cs_etm__inject_event(union perf_event *event, 1426 struct perf_sample *sample, u64 type) 1427 { 1428 event->header.size = perf_event__sample_event_size(sample, type, 0); 1429 return perf_event__synthesize_sample(event, type, 0, sample); 1430 } 1431 1432 1433 static int 1434 cs_etm__get_trace(struct cs_etm_queue *etmq) 1435 { 1436 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1437 struct auxtrace_buffer *old_buffer = aux_buffer; 1438 struct auxtrace_queue *queue; 1439 1440 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1441 1442 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1443 1444 /* If no more data, drop the previous auxtrace_buffer and return */ 1445 if (!aux_buffer) { 1446 if (old_buffer) 1447 auxtrace_buffer__drop_data(old_buffer); 1448 etmq->buf_len = 0; 1449 return 0; 1450 } 1451 1452 etmq->buffer = aux_buffer; 1453 1454 /* If the aux_buffer doesn't have data associated, try to load it */ 1455 if (!aux_buffer->data) { 1456 /* get the file desc associated with the perf data file */ 1457 int fd = perf_data__fd(etmq->etm->session->data); 1458 1459 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1460 if (!aux_buffer->data) 1461 return -ENOMEM; 1462 } 1463 1464 /* If valid, drop the previous buffer */ 1465 if (old_buffer) 1466 auxtrace_buffer__drop_data(old_buffer); 1467 1468 etmq->buf_used = 0; 1469 etmq->buf_len = aux_buffer->size; 1470 etmq->buf = aux_buffer->data; 1471 1472 return etmq->buf_len; 1473 } 1474 1475 static void cs_etm__set_thread(struct cs_etm_queue *etmq, 1476 struct cs_etm_traceid_queue *tidq, pid_t tid, 1477 ocsd_ex_level el) 1478 { 1479 struct machine *machine = cs_etm__get_machine(etmq, el); 1480 1481 if (tid != -1) { 1482 thread__zput(tidq->thread); 1483 tidq->thread = machine__find_thread(machine, -1, tid); 1484 } 1485 1486 /* Couldn't find a known thread */ 1487 if (!tidq->thread) 1488 tidq->thread = machine__idle_thread(machine); 1489 1490 tidq->el = el; 1491 } 1492 1493 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, 1494 u8 trace_chan_id, ocsd_ex_level el) 1495 { 1496 struct cs_etm_traceid_queue *tidq; 1497 1498 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1499 if (!tidq) 1500 return -EINVAL; 1501 1502 cs_etm__set_thread(etmq, tidq, tid, el); 1503 return 0; 1504 } 1505 1506 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1507 { 1508 return !!etmq->etm->timeless_decoding; 1509 } 1510 1511 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1512 u64 trace_chan_id, 1513 const struct cs_etm_packet *packet, 1514 struct perf_sample *sample) 1515 { 1516 /* 1517 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1518 * packet, so directly bail out with 'insn_len' = 0. 1519 */ 1520 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1521 sample->insn_len = 0; 1522 return; 1523 } 1524 1525 /* 1526 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1527 * cs_etm__t32_instr_size(). 1528 */ 1529 if (packet->isa == CS_ETM_ISA_T32) 1530 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1531 sample->ip); 1532 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1533 else 1534 sample->insn_len = 4; 1535 1536 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, 1537 (void *)sample->insn, 0); 1538 } 1539 1540 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) 1541 { 1542 struct cs_etm_auxtrace *etm = etmq->etm; 1543 1544 if (etm->has_virtual_ts) 1545 return tsc_to_perf_time(cs_timestamp, &etm->tc); 1546 else 1547 return cs_timestamp; 1548 } 1549 1550 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, 1551 struct cs_etm_traceid_queue *tidq) 1552 { 1553 struct cs_etm_auxtrace *etm = etmq->etm; 1554 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; 1555 1556 if (!etm->timeless_decoding && etm->has_virtual_ts) 1557 return packet_queue->cs_timestamp; 1558 else 1559 return etm->latest_kernel_timestamp; 1560 } 1561 1562 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1563 struct cs_etm_traceid_queue *tidq, 1564 u64 addr, u64 period) 1565 { 1566 int ret = 0; 1567 struct cs_etm_auxtrace *etm = etmq->etm; 1568 union perf_event *event = tidq->event_buf; 1569 struct perf_sample sample; 1570 1571 perf_sample__init(&sample, /*all=*/true); 1572 event->sample.header.type = PERF_RECORD_SAMPLE; 1573 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); 1574 event->sample.header.size = sizeof(struct perf_event_header); 1575 1576 /* Set time field based on etm auxtrace config. */ 1577 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1578 1579 sample.ip = addr; 1580 sample.pid = thread__pid(tidq->thread); 1581 sample.tid = thread__tid(tidq->thread); 1582 sample.id = etmq->etm->instructions_id; 1583 sample.stream_id = etmq->etm->instructions_id; 1584 sample.period = period; 1585 sample.cpu = tidq->packet->cpu; 1586 sample.flags = tidq->prev_packet->flags; 1587 sample.cpumode = event->sample.header.misc; 1588 1589 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1590 1591 if (etm->synth_opts.last_branch) 1592 sample.branch_stack = tidq->last_branch; 1593 1594 if (etm->synth_opts.inject) { 1595 ret = cs_etm__inject_event(event, &sample, 1596 etm->instructions_sample_type); 1597 if (ret) 1598 return ret; 1599 } 1600 1601 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1602 1603 if (ret) 1604 pr_err( 1605 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1606 ret); 1607 1608 perf_sample__exit(&sample); 1609 return ret; 1610 } 1611 1612 /* 1613 * The cs etm packet encodes an instruction range between a branch target 1614 * and the next taken branch. Generate sample accordingly. 1615 */ 1616 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1617 struct cs_etm_traceid_queue *tidq) 1618 { 1619 int ret = 0; 1620 struct cs_etm_auxtrace *etm = etmq->etm; 1621 struct perf_sample sample = {.ip = 0,}; 1622 union perf_event *event = tidq->event_buf; 1623 struct dummy_branch_stack { 1624 u64 nr; 1625 u64 hw_idx; 1626 struct branch_entry entries; 1627 } dummy_bs; 1628 u64 ip; 1629 1630 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1631 1632 event->sample.header.type = PERF_RECORD_SAMPLE; 1633 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, 1634 tidq->prev_packet_el); 1635 event->sample.header.size = sizeof(struct perf_event_header); 1636 1637 /* Set time field based on etm auxtrace config. */ 1638 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1639 1640 sample.ip = ip; 1641 sample.pid = thread__pid(tidq->prev_packet_thread); 1642 sample.tid = thread__tid(tidq->prev_packet_thread); 1643 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1644 sample.id = etmq->etm->branches_id; 1645 sample.stream_id = etmq->etm->branches_id; 1646 sample.period = 1; 1647 sample.cpu = tidq->packet->cpu; 1648 sample.flags = tidq->prev_packet->flags; 1649 sample.cpumode = event->sample.header.misc; 1650 1651 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1652 &sample); 1653 1654 /* 1655 * perf report cannot handle events without a branch stack 1656 */ 1657 if (etm->synth_opts.last_branch) { 1658 dummy_bs = (struct dummy_branch_stack){ 1659 .nr = 1, 1660 .hw_idx = -1ULL, 1661 .entries = { 1662 .from = sample.ip, 1663 .to = sample.addr, 1664 }, 1665 }; 1666 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1667 } 1668 1669 if (etm->synth_opts.inject) { 1670 ret = cs_etm__inject_event(event, &sample, 1671 etm->branches_sample_type); 1672 if (ret) 1673 return ret; 1674 } 1675 1676 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1677 1678 if (ret) 1679 pr_err( 1680 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1681 ret); 1682 1683 return ret; 1684 } 1685 1686 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1687 struct perf_session *session) 1688 { 1689 struct evlist *evlist = session->evlist; 1690 struct evsel *evsel; 1691 struct perf_event_attr attr; 1692 bool found = false; 1693 u64 id; 1694 int err; 1695 1696 evlist__for_each_entry(evlist, evsel) { 1697 if (evsel->core.attr.type == etm->pmu_type) { 1698 found = true; 1699 break; 1700 } 1701 } 1702 1703 if (!found) { 1704 pr_debug("No selected events with CoreSight Trace data\n"); 1705 return 0; 1706 } 1707 1708 memset(&attr, 0, sizeof(struct perf_event_attr)); 1709 attr.size = sizeof(struct perf_event_attr); 1710 attr.type = PERF_TYPE_HARDWARE; 1711 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1712 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1713 PERF_SAMPLE_PERIOD; 1714 if (etm->timeless_decoding) 1715 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1716 else 1717 attr.sample_type |= PERF_SAMPLE_TIME; 1718 1719 attr.exclude_user = evsel->core.attr.exclude_user; 1720 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1721 attr.exclude_hv = evsel->core.attr.exclude_hv; 1722 attr.exclude_host = evsel->core.attr.exclude_host; 1723 attr.exclude_guest = evsel->core.attr.exclude_guest; 1724 attr.sample_id_all = evsel->core.attr.sample_id_all; 1725 attr.read_format = evsel->core.attr.read_format; 1726 1727 /* create new id val to be a fixed offset from evsel id */ 1728 id = auxtrace_synth_id_range_start(evsel); 1729 1730 if (etm->synth_opts.branches) { 1731 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1732 attr.sample_period = 1; 1733 attr.sample_type |= PERF_SAMPLE_ADDR; 1734 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1735 if (err) 1736 return err; 1737 etm->branches_sample_type = attr.sample_type; 1738 etm->branches_id = id; 1739 id += 1; 1740 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1741 } 1742 1743 if (etm->synth_opts.last_branch) { 1744 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1745 /* 1746 * We don't use the hardware index, but the sample generation 1747 * code uses the new format branch_stack with this field, 1748 * so the event attributes must indicate that it's present. 1749 */ 1750 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1751 } 1752 1753 if (etm->synth_opts.instructions) { 1754 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1755 attr.sample_period = etm->synth_opts.period; 1756 etm->instructions_sample_period = attr.sample_period; 1757 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1758 if (err) 1759 return err; 1760 etm->instructions_sample_type = attr.sample_type; 1761 etm->instructions_id = id; 1762 id += 1; 1763 } 1764 1765 return 0; 1766 } 1767 1768 static int cs_etm__sample(struct cs_etm_queue *etmq, 1769 struct cs_etm_traceid_queue *tidq) 1770 { 1771 struct cs_etm_auxtrace *etm = etmq->etm; 1772 int ret; 1773 u8 trace_chan_id = tidq->trace_chan_id; 1774 u64 instrs_prev; 1775 1776 /* Get instructions remainder from previous packet */ 1777 instrs_prev = tidq->period_instructions; 1778 1779 tidq->period_instructions += tidq->packet->instr_count; 1780 1781 /* 1782 * Record a branch when the last instruction in 1783 * PREV_PACKET is a branch. 1784 */ 1785 if (etm->synth_opts.last_branch && 1786 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1787 tidq->prev_packet->last_instr_taken_branch) 1788 cs_etm__update_last_branch_rb(etmq, tidq); 1789 1790 if (etm->synth_opts.instructions && 1791 tidq->period_instructions >= etm->instructions_sample_period) { 1792 /* 1793 * Emit instruction sample periodically 1794 * TODO: allow period to be defined in cycles and clock time 1795 */ 1796 1797 /* 1798 * Below diagram demonstrates the instruction samples 1799 * generation flows: 1800 * 1801 * Instrs Instrs Instrs Instrs 1802 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1803 * | | | | 1804 * V V V V 1805 * -------------------------------------------------- 1806 * ^ ^ 1807 * | | 1808 * Period Period 1809 * instructions(Pi) instructions(Pi') 1810 * 1811 * | | 1812 * \---------------- -----------------/ 1813 * V 1814 * tidq->packet->instr_count 1815 * 1816 * Instrs Sample(n...) are the synthesised samples occurring 1817 * every etm->instructions_sample_period instructions - as 1818 * defined on the perf command line. Sample(n) is being the 1819 * last sample before the current etm packet, n+1 to n+3 1820 * samples are generated from the current etm packet. 1821 * 1822 * tidq->packet->instr_count represents the number of 1823 * instructions in the current etm packet. 1824 * 1825 * Period instructions (Pi) contains the number of 1826 * instructions executed after the sample point(n) from the 1827 * previous etm packet. This will always be less than 1828 * etm->instructions_sample_period. 1829 * 1830 * When generate new samples, it combines with two parts 1831 * instructions, one is the tail of the old packet and another 1832 * is the head of the new coming packet, to generate 1833 * sample(n+1); sample(n+2) and sample(n+3) consume the 1834 * instructions with sample period. After sample(n+3), the rest 1835 * instructions will be used by later packet and it is assigned 1836 * to tidq->period_instructions for next round calculation. 1837 */ 1838 1839 /* 1840 * Get the initial offset into the current packet instructions; 1841 * entry conditions ensure that instrs_prev is less than 1842 * etm->instructions_sample_period. 1843 */ 1844 u64 offset = etm->instructions_sample_period - instrs_prev; 1845 u64 addr; 1846 1847 /* Prepare last branches for instruction sample */ 1848 if (etm->synth_opts.last_branch) 1849 cs_etm__copy_last_branch_rb(etmq, tidq); 1850 1851 while (tidq->period_instructions >= 1852 etm->instructions_sample_period) { 1853 /* 1854 * Calculate the address of the sampled instruction (-1 1855 * as sample is reported as though instruction has just 1856 * been executed, but PC has not advanced to next 1857 * instruction) 1858 */ 1859 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1860 tidq->packet, offset - 1); 1861 ret = cs_etm__synth_instruction_sample( 1862 etmq, tidq, addr, 1863 etm->instructions_sample_period); 1864 if (ret) 1865 return ret; 1866 1867 offset += etm->instructions_sample_period; 1868 tidq->period_instructions -= 1869 etm->instructions_sample_period; 1870 } 1871 } 1872 1873 if (etm->synth_opts.branches) { 1874 bool generate_sample = false; 1875 1876 /* Generate sample for tracing on packet */ 1877 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1878 generate_sample = true; 1879 1880 /* Generate sample for branch taken packet */ 1881 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1882 tidq->prev_packet->last_instr_taken_branch) 1883 generate_sample = true; 1884 1885 if (generate_sample) { 1886 ret = cs_etm__synth_branch_sample(etmq, tidq); 1887 if (ret) 1888 return ret; 1889 } 1890 } 1891 1892 cs_etm__packet_swap(etm, tidq); 1893 1894 return 0; 1895 } 1896 1897 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1898 { 1899 /* 1900 * When the exception packet is inserted, whether the last instruction 1901 * in previous range packet is taken branch or not, we need to force 1902 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1903 * to generate branch sample for the instruction range before the 1904 * exception is trapped to kernel or before the exception returning. 1905 * 1906 * The exception packet includes the dummy address values, so don't 1907 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1908 * for generating instruction and branch samples. 1909 */ 1910 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1911 tidq->prev_packet->last_instr_taken_branch = true; 1912 1913 return 0; 1914 } 1915 1916 static int cs_etm__flush(struct cs_etm_queue *etmq, 1917 struct cs_etm_traceid_queue *tidq) 1918 { 1919 int err = 0; 1920 struct cs_etm_auxtrace *etm = etmq->etm; 1921 1922 /* Handle start tracing packet */ 1923 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1924 goto swap_packet; 1925 1926 if (etmq->etm->synth_opts.last_branch && 1927 etmq->etm->synth_opts.instructions && 1928 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1929 u64 addr; 1930 1931 /* Prepare last branches for instruction sample */ 1932 cs_etm__copy_last_branch_rb(etmq, tidq); 1933 1934 /* 1935 * Generate a last branch event for the branches left in the 1936 * circular buffer at the end of the trace. 1937 * 1938 * Use the address of the end of the last reported execution 1939 * range 1940 */ 1941 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1942 1943 err = cs_etm__synth_instruction_sample( 1944 etmq, tidq, addr, 1945 tidq->period_instructions); 1946 if (err) 1947 return err; 1948 1949 tidq->period_instructions = 0; 1950 1951 } 1952 1953 if (etm->synth_opts.branches && 1954 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1955 err = cs_etm__synth_branch_sample(etmq, tidq); 1956 if (err) 1957 return err; 1958 } 1959 1960 swap_packet: 1961 cs_etm__packet_swap(etm, tidq); 1962 1963 /* Reset last branches after flush the trace */ 1964 if (etm->synth_opts.last_branch) 1965 cs_etm__reset_last_branch_rb(tidq); 1966 1967 return err; 1968 } 1969 1970 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1971 struct cs_etm_traceid_queue *tidq) 1972 { 1973 int err; 1974 1975 /* 1976 * It has no new packet coming and 'etmq->packet' contains the stale 1977 * packet which was set at the previous time with packets swapping; 1978 * so skip to generate branch sample to avoid stale packet. 1979 * 1980 * For this case only flush branch stack and generate a last branch 1981 * event for the branches left in the circular buffer at the end of 1982 * the trace. 1983 */ 1984 if (etmq->etm->synth_opts.last_branch && 1985 etmq->etm->synth_opts.instructions && 1986 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1987 u64 addr; 1988 1989 /* Prepare last branches for instruction sample */ 1990 cs_etm__copy_last_branch_rb(etmq, tidq); 1991 1992 /* 1993 * Use the address of the end of the last reported execution 1994 * range. 1995 */ 1996 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1997 1998 err = cs_etm__synth_instruction_sample( 1999 etmq, tidq, addr, 2000 tidq->period_instructions); 2001 if (err) 2002 return err; 2003 2004 tidq->period_instructions = 0; 2005 } 2006 2007 return 0; 2008 } 2009 /* 2010 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 2011 * if need be. 2012 * Returns: < 0 if error 2013 * = 0 if no more auxtrace_buffer to read 2014 * > 0 if the current buffer isn't empty yet 2015 */ 2016 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 2017 { 2018 int ret; 2019 2020 if (!etmq->buf_len) { 2021 ret = cs_etm__get_trace(etmq); 2022 if (ret <= 0) 2023 return ret; 2024 /* 2025 * We cannot assume consecutive blocks in the data file 2026 * are contiguous, reset the decoder to force re-sync. 2027 */ 2028 ret = cs_etm_decoder__reset(etmq->decoder); 2029 if (ret) 2030 return ret; 2031 } 2032 2033 return etmq->buf_len; 2034 } 2035 2036 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 2037 struct cs_etm_packet *packet, 2038 u64 end_addr) 2039 { 2040 /* Initialise to keep compiler happy */ 2041 u16 instr16 = 0; 2042 u32 instr32 = 0; 2043 u64 addr; 2044 2045 switch (packet->isa) { 2046 case CS_ETM_ISA_T32: 2047 /* 2048 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 2049 * 2050 * b'15 b'8 2051 * +-----------------+--------+ 2052 * | 1 1 0 1 1 1 1 1 | imm8 | 2053 * +-----------------+--------+ 2054 * 2055 * According to the specification, it only defines SVC for T32 2056 * with 16 bits instruction and has no definition for 32bits; 2057 * so below only read 2 bytes as instruction size for T32. 2058 */ 2059 addr = end_addr - 2; 2060 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), 2061 (u8 *)&instr16, 0); 2062 if ((instr16 & 0xFF00) == 0xDF00) 2063 return true; 2064 2065 break; 2066 case CS_ETM_ISA_A32: 2067 /* 2068 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 2069 * 2070 * b'31 b'28 b'27 b'24 2071 * +---------+---------+-------------------------+ 2072 * | !1111 | 1 1 1 1 | imm24 | 2073 * +---------+---------+-------------------------+ 2074 */ 2075 addr = end_addr - 4; 2076 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2077 (u8 *)&instr32, 0); 2078 if ((instr32 & 0x0F000000) == 0x0F000000 && 2079 (instr32 & 0xF0000000) != 0xF0000000) 2080 return true; 2081 2082 break; 2083 case CS_ETM_ISA_A64: 2084 /* 2085 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 2086 * 2087 * b'31 b'21 b'4 b'0 2088 * +-----------------------+---------+-----------+ 2089 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 2090 * +-----------------------+---------+-----------+ 2091 */ 2092 addr = end_addr - 4; 2093 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2094 (u8 *)&instr32, 0); 2095 if ((instr32 & 0xFFE0001F) == 0xd4000001) 2096 return true; 2097 2098 break; 2099 case CS_ETM_ISA_UNKNOWN: 2100 default: 2101 break; 2102 } 2103 2104 return false; 2105 } 2106 2107 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 2108 struct cs_etm_traceid_queue *tidq, u64 magic) 2109 { 2110 u8 trace_chan_id = tidq->trace_chan_id; 2111 struct cs_etm_packet *packet = tidq->packet; 2112 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2113 2114 if (magic == __perf_cs_etmv3_magic) 2115 if (packet->exception_number == CS_ETMV3_EXC_SVC) 2116 return true; 2117 2118 /* 2119 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 2120 * HVC cases; need to check if it's SVC instruction based on 2121 * packet address. 2122 */ 2123 if (magic == __perf_cs_etmv4_magic) { 2124 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2125 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2126 prev_packet->end_addr)) 2127 return true; 2128 } 2129 2130 return false; 2131 } 2132 2133 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 2134 u64 magic) 2135 { 2136 struct cs_etm_packet *packet = tidq->packet; 2137 2138 if (magic == __perf_cs_etmv3_magic) 2139 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 2140 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 2141 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 2142 packet->exception_number == CS_ETMV3_EXC_IRQ || 2143 packet->exception_number == CS_ETMV3_EXC_FIQ) 2144 return true; 2145 2146 if (magic == __perf_cs_etmv4_magic) 2147 if (packet->exception_number == CS_ETMV4_EXC_RESET || 2148 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 2149 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 2150 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 2151 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 2152 packet->exception_number == CS_ETMV4_EXC_IRQ || 2153 packet->exception_number == CS_ETMV4_EXC_FIQ) 2154 return true; 2155 2156 return false; 2157 } 2158 2159 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 2160 struct cs_etm_traceid_queue *tidq, 2161 u64 magic) 2162 { 2163 u8 trace_chan_id = tidq->trace_chan_id; 2164 struct cs_etm_packet *packet = tidq->packet; 2165 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2166 2167 if (magic == __perf_cs_etmv3_magic) 2168 if (packet->exception_number == CS_ETMV3_EXC_SMC || 2169 packet->exception_number == CS_ETMV3_EXC_HYP || 2170 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 2171 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 2172 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 2173 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 2174 packet->exception_number == CS_ETMV3_EXC_GENERIC) 2175 return true; 2176 2177 if (magic == __perf_cs_etmv4_magic) { 2178 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 2179 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 2180 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 2181 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 2182 return true; 2183 2184 /* 2185 * For CS_ETMV4_EXC_CALL, except SVC other instructions 2186 * (SMC, HVC) are taken as sync exceptions. 2187 */ 2188 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2189 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2190 prev_packet->end_addr)) 2191 return true; 2192 2193 /* 2194 * ETMv4 has 5 bits for exception number; if the numbers 2195 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 2196 * they are implementation defined exceptions. 2197 * 2198 * For this case, simply take it as sync exception. 2199 */ 2200 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 2201 packet->exception_number <= CS_ETMV4_EXC_END) 2202 return true; 2203 } 2204 2205 return false; 2206 } 2207 2208 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 2209 struct cs_etm_traceid_queue *tidq) 2210 { 2211 struct cs_etm_packet *packet = tidq->packet; 2212 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2213 u8 trace_chan_id = tidq->trace_chan_id; 2214 u64 magic; 2215 int ret; 2216 2217 switch (packet->sample_type) { 2218 case CS_ETM_RANGE: 2219 /* 2220 * Immediate branch instruction without neither link nor 2221 * return flag, it's normal branch instruction within 2222 * the function. 2223 */ 2224 if (packet->last_instr_type == OCSD_INSTR_BR && 2225 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 2226 packet->flags = PERF_IP_FLAG_BRANCH; 2227 2228 if (packet->last_instr_cond) 2229 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 2230 } 2231 2232 /* 2233 * Immediate branch instruction with link (e.g. BL), this is 2234 * branch instruction for function call. 2235 */ 2236 if (packet->last_instr_type == OCSD_INSTR_BR && 2237 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2238 packet->flags = PERF_IP_FLAG_BRANCH | 2239 PERF_IP_FLAG_CALL; 2240 2241 /* 2242 * Indirect branch instruction with link (e.g. BLR), this is 2243 * branch instruction for function call. 2244 */ 2245 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2246 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2247 packet->flags = PERF_IP_FLAG_BRANCH | 2248 PERF_IP_FLAG_CALL; 2249 2250 /* 2251 * Indirect branch instruction with subtype of 2252 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 2253 * function return for A32/T32. 2254 */ 2255 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2256 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 2257 packet->flags = PERF_IP_FLAG_BRANCH | 2258 PERF_IP_FLAG_RETURN; 2259 2260 /* 2261 * Indirect branch instruction without link (e.g. BR), usually 2262 * this is used for function return, especially for functions 2263 * within dynamic link lib. 2264 */ 2265 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2266 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 2267 packet->flags = PERF_IP_FLAG_BRANCH | 2268 PERF_IP_FLAG_RETURN; 2269 2270 /* Return instruction for function return. */ 2271 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2272 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 2273 packet->flags = PERF_IP_FLAG_BRANCH | 2274 PERF_IP_FLAG_RETURN; 2275 2276 /* 2277 * Decoder might insert a discontinuity in the middle of 2278 * instruction packets, fixup prev_packet with flag 2279 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 2280 */ 2281 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 2282 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2283 PERF_IP_FLAG_TRACE_BEGIN; 2284 2285 /* 2286 * If the previous packet is an exception return packet 2287 * and the return address just follows SVC instruction, 2288 * it needs to calibrate the previous packet sample flags 2289 * as PERF_IP_FLAG_SYSCALLRET. 2290 */ 2291 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 2292 PERF_IP_FLAG_RETURN | 2293 PERF_IP_FLAG_INTERRUPT) && 2294 cs_etm__is_svc_instr(etmq, trace_chan_id, 2295 packet, packet->start_addr)) 2296 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2297 PERF_IP_FLAG_RETURN | 2298 PERF_IP_FLAG_SYSCALLRET; 2299 break; 2300 case CS_ETM_DISCONTINUITY: 2301 /* 2302 * The trace is discontinuous, if the previous packet is 2303 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 2304 * for previous packet. 2305 */ 2306 if (prev_packet->sample_type == CS_ETM_RANGE) 2307 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2308 PERF_IP_FLAG_TRACE_END; 2309 break; 2310 case CS_ETM_EXCEPTION: 2311 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); 2312 if (ret) 2313 return ret; 2314 2315 /* The exception is for system call. */ 2316 if (cs_etm__is_syscall(etmq, tidq, magic)) 2317 packet->flags = PERF_IP_FLAG_BRANCH | 2318 PERF_IP_FLAG_CALL | 2319 PERF_IP_FLAG_SYSCALLRET; 2320 /* 2321 * The exceptions are triggered by external signals from bus, 2322 * interrupt controller, debug module, PE reset or halt. 2323 */ 2324 else if (cs_etm__is_async_exception(tidq, magic)) 2325 packet->flags = PERF_IP_FLAG_BRANCH | 2326 PERF_IP_FLAG_CALL | 2327 PERF_IP_FLAG_ASYNC | 2328 PERF_IP_FLAG_INTERRUPT; 2329 /* 2330 * Otherwise, exception is caused by trap, instruction & 2331 * data fault, or alignment errors. 2332 */ 2333 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 2334 packet->flags = PERF_IP_FLAG_BRANCH | 2335 PERF_IP_FLAG_CALL | 2336 PERF_IP_FLAG_INTERRUPT; 2337 2338 /* 2339 * When the exception packet is inserted, since exception 2340 * packet is not used standalone for generating samples 2341 * and it's affiliation to the previous instruction range 2342 * packet; so set previous range packet flags to tell perf 2343 * it is an exception taken branch. 2344 */ 2345 if (prev_packet->sample_type == CS_ETM_RANGE) 2346 prev_packet->flags = packet->flags; 2347 break; 2348 case CS_ETM_EXCEPTION_RET: 2349 /* 2350 * When the exception return packet is inserted, since 2351 * exception return packet is not used standalone for 2352 * generating samples and it's affiliation to the previous 2353 * instruction range packet; so set previous range packet 2354 * flags to tell perf it is an exception return branch. 2355 * 2356 * The exception return can be for either system call or 2357 * other exception types; unfortunately the packet doesn't 2358 * contain exception type related info so we cannot decide 2359 * the exception type purely based on exception return packet. 2360 * If we record the exception number from exception packet and 2361 * reuse it for exception return packet, this is not reliable 2362 * due the trace can be discontinuity or the interrupt can 2363 * be nested, thus the recorded exception number cannot be 2364 * used for exception return packet for these two cases. 2365 * 2366 * For exception return packet, we only need to distinguish the 2367 * packet is for system call or for other types. Thus the 2368 * decision can be deferred when receive the next packet which 2369 * contains the return address, based on the return address we 2370 * can read out the previous instruction and check if it's a 2371 * system call instruction and then calibrate the sample flag 2372 * as needed. 2373 */ 2374 if (prev_packet->sample_type == CS_ETM_RANGE) 2375 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2376 PERF_IP_FLAG_RETURN | 2377 PERF_IP_FLAG_INTERRUPT; 2378 break; 2379 case CS_ETM_EMPTY: 2380 default: 2381 break; 2382 } 2383 2384 return 0; 2385 } 2386 2387 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 2388 { 2389 int ret = 0; 2390 size_t processed = 0; 2391 2392 /* 2393 * Packets are decoded and added to the decoder's packet queue 2394 * until the decoder packet processing callback has requested that 2395 * processing stops or there is nothing left in the buffer. Normal 2396 * operations that stop processing are a timestamp packet or a full 2397 * decoder buffer queue. 2398 */ 2399 ret = cs_etm_decoder__process_data_block(etmq->decoder, 2400 etmq->offset, 2401 &etmq->buf[etmq->buf_used], 2402 etmq->buf_len, 2403 &processed); 2404 if (ret) 2405 goto out; 2406 2407 etmq->offset += processed; 2408 etmq->buf_used += processed; 2409 etmq->buf_len -= processed; 2410 2411 out: 2412 return ret; 2413 } 2414 2415 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 2416 struct cs_etm_traceid_queue *tidq) 2417 { 2418 int ret; 2419 struct cs_etm_packet_queue *packet_queue; 2420 2421 packet_queue = &tidq->packet_queue; 2422 2423 /* Process each packet in this chunk */ 2424 while (1) { 2425 ret = cs_etm_decoder__get_packet(packet_queue, 2426 tidq->packet); 2427 if (ret <= 0) 2428 /* 2429 * Stop processing this chunk on 2430 * end of data or error 2431 */ 2432 break; 2433 2434 /* 2435 * Since packet addresses are swapped in packet 2436 * handling within below switch() statements, 2437 * thus setting sample flags must be called 2438 * prior to switch() statement to use address 2439 * information before packets swapping. 2440 */ 2441 ret = cs_etm__set_sample_flags(etmq, tidq); 2442 if (ret < 0) 2443 break; 2444 2445 switch (tidq->packet->sample_type) { 2446 case CS_ETM_RANGE: 2447 /* 2448 * If the packet contains an instruction 2449 * range, generate instruction sequence 2450 * events. 2451 */ 2452 cs_etm__sample(etmq, tidq); 2453 break; 2454 case CS_ETM_EXCEPTION: 2455 case CS_ETM_EXCEPTION_RET: 2456 /* 2457 * If the exception packet is coming, 2458 * make sure the previous instruction 2459 * range packet to be handled properly. 2460 */ 2461 cs_etm__exception(tidq); 2462 break; 2463 case CS_ETM_DISCONTINUITY: 2464 /* 2465 * Discontinuity in trace, flush 2466 * previous branch stack 2467 */ 2468 cs_etm__flush(etmq, tidq); 2469 break; 2470 case CS_ETM_EMPTY: 2471 /* 2472 * Should not receive empty packet, 2473 * report error. 2474 */ 2475 pr_err("CS ETM Trace: empty packet\n"); 2476 return -EINVAL; 2477 default: 2478 break; 2479 } 2480 } 2481 2482 return ret; 2483 } 2484 2485 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2486 { 2487 int idx; 2488 struct int_node *inode; 2489 struct cs_etm_traceid_queue *tidq; 2490 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2491 2492 intlist__for_each_entry(inode, traceid_queues_list) { 2493 idx = (int)(intptr_t)inode->priv; 2494 tidq = etmq->traceid_queues[idx]; 2495 2496 /* Ignore return value */ 2497 cs_etm__process_traceid_queue(etmq, tidq); 2498 } 2499 } 2500 2501 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) 2502 { 2503 int err = 0; 2504 struct cs_etm_traceid_queue *tidq; 2505 2506 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2507 if (!tidq) 2508 return -EINVAL; 2509 2510 /* Go through each buffer in the queue and decode them one by one */ 2511 while (1) { 2512 err = cs_etm__get_data_block(etmq); 2513 if (err <= 0) 2514 return err; 2515 2516 /* Run trace decoder until buffer consumed or end of trace */ 2517 do { 2518 err = cs_etm__decode_data_block(etmq); 2519 if (err) 2520 return err; 2521 2522 /* 2523 * Process each packet in this chunk, nothing to do if 2524 * an error occurs other than hoping the next one will 2525 * be better. 2526 */ 2527 err = cs_etm__process_traceid_queue(etmq, tidq); 2528 2529 } while (etmq->buf_len); 2530 2531 if (err == 0) 2532 /* Flush any remaining branch stack entries */ 2533 err = cs_etm__end_block(etmq, tidq); 2534 } 2535 2536 return err; 2537 } 2538 2539 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) 2540 { 2541 int idx, err = 0; 2542 struct cs_etm_traceid_queue *tidq; 2543 struct int_node *inode; 2544 2545 /* Go through each buffer in the queue and decode them one by one */ 2546 while (1) { 2547 err = cs_etm__get_data_block(etmq); 2548 if (err <= 0) 2549 return err; 2550 2551 /* Run trace decoder until buffer consumed or end of trace */ 2552 do { 2553 err = cs_etm__decode_data_block(etmq); 2554 if (err) 2555 return err; 2556 2557 /* 2558 * cs_etm__run_per_thread_timeless_decoder() runs on a 2559 * single traceID queue because each TID has a separate 2560 * buffer. But here in per-cpu mode we need to iterate 2561 * over each channel instead. 2562 */ 2563 intlist__for_each_entry(inode, 2564 etmq->traceid_queues_list) { 2565 idx = (int)(intptr_t)inode->priv; 2566 tidq = etmq->traceid_queues[idx]; 2567 cs_etm__process_traceid_queue(etmq, tidq); 2568 } 2569 } while (etmq->buf_len); 2570 2571 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2572 idx = (int)(intptr_t)inode->priv; 2573 tidq = etmq->traceid_queues[idx]; 2574 /* Flush any remaining branch stack entries */ 2575 err = cs_etm__end_block(etmq, tidq); 2576 if (err) 2577 return err; 2578 } 2579 } 2580 2581 return err; 2582 } 2583 2584 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2585 pid_t tid) 2586 { 2587 unsigned int i; 2588 struct auxtrace_queues *queues = &etm->queues; 2589 2590 for (i = 0; i < queues->nr_queues; i++) { 2591 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2592 struct cs_etm_queue *etmq = queue->priv; 2593 struct cs_etm_traceid_queue *tidq; 2594 2595 if (!etmq) 2596 continue; 2597 2598 if (etm->per_thread_decoding) { 2599 tidq = cs_etm__etmq_get_traceid_queue( 2600 etmq, CS_ETM_PER_THREAD_TRACEID); 2601 2602 if (!tidq) 2603 continue; 2604 2605 if (tid == -1 || thread__tid(tidq->thread) == tid) 2606 cs_etm__run_per_thread_timeless_decoder(etmq); 2607 } else 2608 cs_etm__run_per_cpu_timeless_decoder(etmq); 2609 } 2610 2611 return 0; 2612 } 2613 2614 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) 2615 { 2616 int ret = 0; 2617 unsigned int cs_queue_nr, queue_nr, i; 2618 u8 trace_chan_id; 2619 u64 cs_timestamp; 2620 struct auxtrace_queue *queue; 2621 struct cs_etm_queue *etmq; 2622 struct cs_etm_traceid_queue *tidq; 2623 2624 /* 2625 * Pre-populate the heap with one entry from each queue so that we can 2626 * start processing in time order across all queues. 2627 */ 2628 for (i = 0; i < etm->queues.nr_queues; i++) { 2629 etmq = etm->queues.queue_array[i].priv; 2630 if (!etmq) 2631 continue; 2632 2633 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); 2634 if (ret) 2635 return ret; 2636 } 2637 2638 while (1) { 2639 if (!etm->heap.heap_cnt) 2640 break; 2641 2642 /* Take the entry at the top of the min heap */ 2643 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2644 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2645 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2646 queue = &etm->queues.queue_array[queue_nr]; 2647 etmq = queue->priv; 2648 2649 /* 2650 * Remove the top entry from the heap since we are about 2651 * to process it. 2652 */ 2653 auxtrace_heap__pop(&etm->heap); 2654 2655 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2656 if (!tidq) { 2657 /* 2658 * No traceID queue has been allocated for this traceID, 2659 * which means something somewhere went very wrong. No 2660 * other choice than simply exit. 2661 */ 2662 ret = -EINVAL; 2663 goto out; 2664 } 2665 2666 /* 2667 * Packets associated with this timestamp are already in 2668 * the etmq's traceID queue, so process them. 2669 */ 2670 ret = cs_etm__process_traceid_queue(etmq, tidq); 2671 if (ret < 0) 2672 goto out; 2673 2674 /* 2675 * Packets for this timestamp have been processed, time to 2676 * move on to the next timestamp, fetching a new auxtrace_buffer 2677 * if need be. 2678 */ 2679 refetch: 2680 ret = cs_etm__get_data_block(etmq); 2681 if (ret < 0) 2682 goto out; 2683 2684 /* 2685 * No more auxtrace_buffers to process in this etmq, simply 2686 * move on to another entry in the auxtrace_heap. 2687 */ 2688 if (!ret) 2689 continue; 2690 2691 ret = cs_etm__decode_data_block(etmq); 2692 if (ret) 2693 goto out; 2694 2695 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2696 2697 if (!cs_timestamp) { 2698 /* 2699 * Function cs_etm__decode_data_block() returns when 2700 * there is no more traces to decode in the current 2701 * auxtrace_buffer OR when a timestamp has been 2702 * encountered on any of the traceID queues. Since we 2703 * did not get a timestamp, there is no more traces to 2704 * process in this auxtrace_buffer. As such empty and 2705 * flush all traceID queues. 2706 */ 2707 cs_etm__clear_all_traceid_queues(etmq); 2708 2709 /* Fetch another auxtrace_buffer for this etmq */ 2710 goto refetch; 2711 } 2712 2713 /* 2714 * Add to the min heap the timestamp for packets that have 2715 * just been decoded. They will be processed and synthesized 2716 * during the next call to cs_etm__process_traceid_queue() for 2717 * this queue/traceID. 2718 */ 2719 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2720 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 2721 } 2722 2723 for (i = 0; i < etm->queues.nr_queues; i++) { 2724 struct int_node *inode; 2725 2726 etmq = etm->queues.queue_array[i].priv; 2727 if (!etmq) 2728 continue; 2729 2730 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2731 int idx = (int)(intptr_t)inode->priv; 2732 2733 /* Flush any remaining branch stack entries */ 2734 tidq = etmq->traceid_queues[idx]; 2735 ret = cs_etm__end_block(etmq, tidq); 2736 if (ret) 2737 return ret; 2738 } 2739 } 2740 out: 2741 return ret; 2742 } 2743 2744 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2745 union perf_event *event) 2746 { 2747 struct thread *th; 2748 2749 if (etm->timeless_decoding) 2750 return 0; 2751 2752 /* 2753 * Add the tid/pid to the log so that we can get a match when we get a 2754 * contextID from the decoder. Only track for the host: only kernel 2755 * trace is supported for guests which wouldn't need pids so this should 2756 * be fine. 2757 */ 2758 th = machine__findnew_thread(&etm->session->machines.host, 2759 event->itrace_start.pid, 2760 event->itrace_start.tid); 2761 if (!th) 2762 return -ENOMEM; 2763 2764 thread__put(th); 2765 2766 return 0; 2767 } 2768 2769 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2770 union perf_event *event) 2771 { 2772 struct thread *th; 2773 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2774 2775 /* 2776 * Context switch in per-thread mode are irrelevant since perf 2777 * will start/stop tracing as the process is scheduled. 2778 */ 2779 if (etm->timeless_decoding) 2780 return 0; 2781 2782 /* 2783 * SWITCH_IN events carry the next process to be switched out while 2784 * SWITCH_OUT events carry the process to be switched in. As such 2785 * we don't care about IN events. 2786 */ 2787 if (!out) 2788 return 0; 2789 2790 /* 2791 * Add the tid/pid to the log so that we can get a match when we get a 2792 * contextID from the decoder. Only track for the host: only kernel 2793 * trace is supported for guests which wouldn't need pids so this should 2794 * be fine. 2795 */ 2796 th = machine__findnew_thread(&etm->session->machines.host, 2797 event->context_switch.next_prev_pid, 2798 event->context_switch.next_prev_tid); 2799 if (!th) 2800 return -ENOMEM; 2801 2802 thread__put(th); 2803 2804 return 0; 2805 } 2806 2807 static int cs_etm__process_event(struct perf_session *session, 2808 union perf_event *event, 2809 struct perf_sample *sample, 2810 const struct perf_tool *tool) 2811 { 2812 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2813 struct cs_etm_auxtrace, 2814 auxtrace); 2815 2816 if (dump_trace) 2817 return 0; 2818 2819 if (!tool->ordered_events) { 2820 pr_err("CoreSight ETM Trace requires ordered events\n"); 2821 return -EINVAL; 2822 } 2823 2824 switch (event->header.type) { 2825 case PERF_RECORD_EXIT: 2826 /* 2827 * Don't need to wait for cs_etm__flush_events() in per-thread mode to 2828 * start the decode because we know there will be no more trace from 2829 * this thread. All this does is emit samples earlier than waiting for 2830 * the flush in other modes, but with timestamps it makes sense to wait 2831 * for flush so that events from different threads are interleaved 2832 * properly. 2833 */ 2834 if (etm->per_thread_decoding && etm->timeless_decoding) 2835 return cs_etm__process_timeless_queues(etm, 2836 event->fork.tid); 2837 break; 2838 2839 case PERF_RECORD_ITRACE_START: 2840 return cs_etm__process_itrace_start(etm, event); 2841 2842 case PERF_RECORD_SWITCH_CPU_WIDE: 2843 return cs_etm__process_switch_cpu_wide(etm, event); 2844 2845 case PERF_RECORD_AUX: 2846 /* 2847 * Record the latest kernel timestamp available in the header 2848 * for samples so that synthesised samples occur from this point 2849 * onwards. 2850 */ 2851 if (sample->time && (sample->time != (u64)-1)) 2852 etm->latest_kernel_timestamp = sample->time; 2853 break; 2854 2855 default: 2856 break; 2857 } 2858 2859 return 0; 2860 } 2861 2862 static void dump_queued_data(struct cs_etm_auxtrace *etm, 2863 struct perf_record_auxtrace *event) 2864 { 2865 struct auxtrace_buffer *buf; 2866 unsigned int i; 2867 /* 2868 * Find all buffers with same reference in the queues and dump them. 2869 * This is because the queues can contain multiple entries of the same 2870 * buffer that were split on aux records. 2871 */ 2872 for (i = 0; i < etm->queues.nr_queues; ++i) 2873 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) 2874 if (buf->reference == event->reference) 2875 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); 2876 } 2877 2878 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2879 union perf_event *event, 2880 const struct perf_tool *tool __maybe_unused) 2881 { 2882 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2883 struct cs_etm_auxtrace, 2884 auxtrace); 2885 if (!etm->data_queued) { 2886 struct auxtrace_buffer *buffer; 2887 off_t data_offset; 2888 int fd = perf_data__fd(session->data); 2889 bool is_pipe = perf_data__is_pipe(session->data); 2890 int err; 2891 int idx = event->auxtrace.idx; 2892 2893 if (is_pipe) 2894 data_offset = 0; 2895 else { 2896 data_offset = lseek(fd, 0, SEEK_CUR); 2897 if (data_offset == -1) 2898 return -errno; 2899 } 2900 2901 err = auxtrace_queues__add_event(&etm->queues, session, 2902 event, data_offset, &buffer); 2903 if (err) 2904 return err; 2905 2906 if (dump_trace) 2907 if (auxtrace_buffer__get_data(buffer, fd)) { 2908 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); 2909 auxtrace_buffer__put_data(buffer); 2910 } 2911 } else if (dump_trace) 2912 dump_queued_data(etm, &event->auxtrace); 2913 2914 return 0; 2915 } 2916 2917 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) 2918 { 2919 struct evsel *evsel; 2920 struct evlist *evlist = etm->session->evlist; 2921 2922 /* Override timeless mode with user input from --itrace=Z */ 2923 if (etm->synth_opts.timeless_decoding) { 2924 etm->timeless_decoding = true; 2925 return 0; 2926 } 2927 2928 /* 2929 * Find the cs_etm evsel and look at what its timestamp setting was 2930 */ 2931 evlist__for_each_entry(evlist, evsel) 2932 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { 2933 etm->timeless_decoding = 2934 !(evsel->core.attr.config & BIT(ETM_OPT_TS)); 2935 return 0; 2936 } 2937 2938 pr_err("CS ETM: Couldn't find ETM evsel\n"); 2939 return -EINVAL; 2940 } 2941 2942 /* 2943 * Read a single cpu parameter block from the auxtrace_info priv block. 2944 * 2945 * For version 1 there is a per cpu nr_params entry. If we are handling 2946 * version 1 file, then there may be less, the same, or more params 2947 * indicated by this value than the compile time number we understand. 2948 * 2949 * For a version 0 info block, there are a fixed number, and we need to 2950 * fill out the nr_param value in the metadata we create. 2951 */ 2952 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 2953 int out_blk_size, int nr_params_v0) 2954 { 2955 u64 *metadata = NULL; 2956 int hdr_version; 2957 int nr_in_params, nr_out_params, nr_cmn_params; 2958 int i, k; 2959 2960 metadata = zalloc(sizeof(*metadata) * out_blk_size); 2961 if (!metadata) 2962 return NULL; 2963 2964 /* read block current index & version */ 2965 i = *buff_in_offset; 2966 hdr_version = buff_in[CS_HEADER_VERSION]; 2967 2968 if (!hdr_version) { 2969 /* read version 0 info block into a version 1 metadata block */ 2970 nr_in_params = nr_params_v0; 2971 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 2972 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 2973 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 2974 /* remaining block params at offset +1 from source */ 2975 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 2976 metadata[k + 1] = buff_in[i + k]; 2977 /* version 0 has 2 common params */ 2978 nr_cmn_params = 2; 2979 } else { 2980 /* read version 1 info block - input and output nr_params may differ */ 2981 /* version 1 has 3 common params */ 2982 nr_cmn_params = 3; 2983 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 2984 2985 /* if input has more params than output - skip excess */ 2986 nr_out_params = nr_in_params + nr_cmn_params; 2987 if (nr_out_params > out_blk_size) 2988 nr_out_params = out_blk_size; 2989 2990 for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 2991 metadata[k] = buff_in[i + k]; 2992 2993 /* record the actual nr params we copied */ 2994 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 2995 } 2996 2997 /* adjust in offset by number of in params used */ 2998 i += nr_in_params + nr_cmn_params; 2999 *buff_in_offset = i; 3000 return metadata; 3001 } 3002 3003 /** 3004 * Puts a fragment of an auxtrace buffer into the auxtrace queues based 3005 * on the bounds of aux_event, if it matches with the buffer that's at 3006 * file_offset. 3007 * 3008 * Normally, whole auxtrace buffers would be added to the queue. But we 3009 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder 3010 * is reset across each buffer, so splitting the buffers up in advance has 3011 * the same effect. 3012 */ 3013 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, 3014 struct perf_record_aux *aux_event, struct perf_sample *sample) 3015 { 3016 int err; 3017 char buf[PERF_SAMPLE_MAX_SIZE]; 3018 union perf_event *auxtrace_event_union; 3019 struct perf_record_auxtrace *auxtrace_event; 3020 union perf_event auxtrace_fragment; 3021 __u64 aux_offset, aux_size; 3022 enum cs_etm_format format; 3023 3024 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 3025 struct cs_etm_auxtrace, 3026 auxtrace); 3027 3028 /* 3029 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got 3030 * from looping through the auxtrace index. 3031 */ 3032 err = perf_session__peek_event(session, file_offset, buf, 3033 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); 3034 if (err) 3035 return err; 3036 auxtrace_event = &auxtrace_event_union->auxtrace; 3037 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) 3038 return -EINVAL; 3039 3040 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || 3041 auxtrace_event->header.size != sz) { 3042 return -EINVAL; 3043 } 3044 3045 /* 3046 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See 3047 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a 3048 * CPU as we set this always for the AUX_OUTPUT_HW_ID event. 3049 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. 3050 * Return 'not found' if mismatch. 3051 */ 3052 if (auxtrace_event->cpu == (__u32) -1) { 3053 etm->per_thread_decoding = true; 3054 if (auxtrace_event->tid != sample->tid) 3055 return 1; 3056 } else if (auxtrace_event->cpu != sample->cpu) { 3057 if (etm->per_thread_decoding) { 3058 /* 3059 * Found a per-cpu buffer after a per-thread one was 3060 * already found 3061 */ 3062 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); 3063 return -EINVAL; 3064 } 3065 return 1; 3066 } 3067 3068 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { 3069 /* 3070 * Clamp size in snapshot mode. The buffer size is clamped in 3071 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect 3072 * the buffer size. 3073 */ 3074 aux_size = min(aux_event->aux_size, auxtrace_event->size); 3075 3076 /* 3077 * In this mode, the head also points to the end of the buffer so aux_offset 3078 * needs to have the size subtracted so it points to the beginning as in normal mode 3079 */ 3080 aux_offset = aux_event->aux_offset - aux_size; 3081 } else { 3082 aux_size = aux_event->aux_size; 3083 aux_offset = aux_event->aux_offset; 3084 } 3085 3086 if (aux_offset >= auxtrace_event->offset && 3087 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { 3088 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, auxtrace_event->cpu); 3089 3090 /* 3091 * If this AUX event was inside this buffer somewhere, create a new auxtrace event 3092 * based on the sizes of the aux event, and queue that fragment. 3093 */ 3094 auxtrace_fragment.auxtrace = *auxtrace_event; 3095 auxtrace_fragment.auxtrace.size = aux_size; 3096 auxtrace_fragment.auxtrace.offset = aux_offset; 3097 auxtrace_fragment.auxtrace.idx = etmq->queue_nr; 3098 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; 3099 3100 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 3101 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); 3102 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, 3103 file_offset, NULL); 3104 if (err) 3105 return err; 3106 3107 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? 3108 UNFORMATTED : FORMATTED; 3109 if (etmq->format != UNSET && format != etmq->format) { 3110 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); 3111 return -EINVAL; 3112 } 3113 etmq->format = format; 3114 return 0; 3115 } 3116 3117 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ 3118 return 1; 3119 } 3120 3121 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, 3122 u64 offset __maybe_unused, void *data __maybe_unused) 3123 { 3124 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ 3125 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { 3126 (*(int *)data)++; /* increment found count */ 3127 return cs_etm__process_aux_output_hw_id(session, event); 3128 } 3129 return 0; 3130 } 3131 3132 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, 3133 u64 offset __maybe_unused, void *data __maybe_unused) 3134 { 3135 struct perf_sample sample; 3136 int ret; 3137 struct auxtrace_index_entry *ent; 3138 struct auxtrace_index *auxtrace_index; 3139 struct evsel *evsel; 3140 size_t i; 3141 3142 /* Don't care about any other events, we're only queuing buffers for AUX events */ 3143 if (event->header.type != PERF_RECORD_AUX) 3144 return 0; 3145 3146 if (event->header.size < sizeof(struct perf_record_aux)) 3147 return -EINVAL; 3148 3149 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ 3150 if (!event->aux.aux_size) 3151 return 0; 3152 3153 /* 3154 * Parse the sample, we need the sample_id_all data that comes after the event so that the 3155 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. 3156 */ 3157 evsel = evlist__event2evsel(session->evlist, event); 3158 if (!evsel) 3159 return -EINVAL; 3160 perf_sample__init(&sample, /*all=*/false); 3161 ret = evsel__parse_sample(evsel, event, &sample); 3162 if (ret) 3163 goto out; 3164 3165 /* 3166 * Loop through the auxtrace index to find the buffer that matches up with this aux event. 3167 */ 3168 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { 3169 for (i = 0; i < auxtrace_index->nr; i++) { 3170 ent = &auxtrace_index->entries[i]; 3171 ret = cs_etm__queue_aux_fragment(session, ent->file_offset, 3172 ent->sz, &event->aux, &sample); 3173 /* 3174 * Stop search on error or successful values. Continue search on 3175 * 1 ('not found') 3176 */ 3177 if (ret != 1) 3178 goto out; 3179 } 3180 } 3181 3182 /* 3183 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but 3184 * don't exit with an error because it will still be possible to decode other aux records. 3185 */ 3186 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 3187 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); 3188 ret = 0; 3189 out: 3190 perf_sample__exit(&sample); 3191 return ret; 3192 } 3193 3194 static int cs_etm__queue_aux_records(struct perf_session *session) 3195 { 3196 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, 3197 struct auxtrace_index, list); 3198 if (index && index->nr > 0) 3199 return perf_session__peek_events(session, session->header.data_offset, 3200 session->header.data_size, 3201 cs_etm__queue_aux_records_cb, NULL); 3202 3203 /* 3204 * We would get here if there are no entries in the index (either no auxtrace 3205 * buffers or no index at all). Fail silently as there is the possibility of 3206 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still 3207 * false. 3208 * 3209 * In that scenario, buffers will not be split by AUX records. 3210 */ 3211 return 0; 3212 } 3213 3214 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ 3215 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) 3216 3217 /* 3218 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual 3219 * timestamps). 3220 */ 3221 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) 3222 { 3223 int j; 3224 3225 for (j = 0; j < num_cpu; j++) { 3226 switch (metadata[j][CS_ETM_MAGIC]) { 3227 case __perf_cs_etmv4_magic: 3228 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) 3229 return false; 3230 break; 3231 case __perf_cs_ete_magic: 3232 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) 3233 return false; 3234 break; 3235 default: 3236 /* Unknown / unsupported magic number. */ 3237 return false; 3238 } 3239 } 3240 return true; 3241 } 3242 3243 /* map trace ids to correct metadata block, from information in metadata */ 3244 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, 3245 u64 **metadata) 3246 { 3247 u64 cs_etm_magic; 3248 u8 trace_chan_id; 3249 int i, err; 3250 3251 for (i = 0; i < num_cpu; i++) { 3252 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3253 switch (cs_etm_magic) { 3254 case __perf_cs_etmv3_magic: 3255 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3256 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); 3257 break; 3258 case __perf_cs_etmv4_magic: 3259 case __perf_cs_ete_magic: 3260 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3261 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); 3262 break; 3263 default: 3264 /* unknown magic number */ 3265 return -EINVAL; 3266 } 3267 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); 3268 if (err) 3269 return err; 3270 } 3271 return 0; 3272 } 3273 3274 /* 3275 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX 3276 * (formatted or not) packets to create the decoders. 3277 */ 3278 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) 3279 { 3280 struct cs_etm_decoder_params d_params; 3281 struct cs_etm_trace_params *t_params; 3282 int decoders = intlist__nr_entries(etmq->traceid_list); 3283 3284 if (decoders == 0) 3285 return 0; 3286 3287 /* 3288 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 3289 * needed. 3290 */ 3291 if (etmq->format == UNFORMATTED) 3292 assert(decoders == 1); 3293 3294 /* Use metadata to fill in trace parameters for trace decoder */ 3295 t_params = zalloc(sizeof(*t_params) * decoders); 3296 3297 if (!t_params) 3298 goto out_free; 3299 3300 if (cs_etm__init_trace_params(t_params, etmq)) 3301 goto out_free; 3302 3303 /* Set decoder parameters to decode trace packets */ 3304 if (cs_etm__init_decoder_params(&d_params, etmq, 3305 dump_trace ? CS_ETM_OPERATION_PRINT : 3306 CS_ETM_OPERATION_DECODE)) 3307 goto out_free; 3308 3309 etmq->decoder = cs_etm_decoder__new(decoders, &d_params, 3310 t_params); 3311 3312 if (!etmq->decoder) 3313 goto out_free; 3314 3315 /* 3316 * Register a function to handle all memory accesses required by 3317 * the trace decoder library. 3318 */ 3319 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 3320 0x0L, ((u64) -1L), 3321 cs_etm__mem_access)) 3322 goto out_free_decoder; 3323 3324 zfree(&t_params); 3325 return 0; 3326 3327 out_free_decoder: 3328 cs_etm_decoder__free(etmq->decoder); 3329 out_free: 3330 zfree(&t_params); 3331 return -EINVAL; 3332 } 3333 3334 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) 3335 { 3336 struct auxtrace_queues *queues = &etm->queues; 3337 3338 for (unsigned int i = 0; i < queues->nr_queues; i++) { 3339 bool empty = list_empty(&queues->queue_array[i].head); 3340 struct cs_etm_queue *etmq = queues->queue_array[i].priv; 3341 int ret; 3342 3343 /* 3344 * Don't create decoders for empty queues, mainly because 3345 * etmq->format is unknown for empty queues. 3346 */ 3347 assert(empty || etmq->format != UNSET); 3348 if (empty) 3349 continue; 3350 3351 ret = cs_etm__create_queue_decoders(etmq); 3352 if (ret) 3353 return ret; 3354 } 3355 return 0; 3356 } 3357 3358 int cs_etm__process_auxtrace_info_full(union perf_event *event, 3359 struct perf_session *session) 3360 { 3361 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3362 struct cs_etm_auxtrace *etm = NULL; 3363 struct perf_record_time_conv *tc = &session->time_conv; 3364 int event_header_size = sizeof(struct perf_event_header); 3365 int total_size = auxtrace_info->header.size; 3366 int priv_size = 0; 3367 int num_cpu, max_cpu = 0; 3368 int err = 0; 3369 int aux_hw_id_found; 3370 int i; 3371 u64 *ptr = NULL; 3372 u64 **metadata = NULL; 3373 3374 /* First the global part */ 3375 ptr = (u64 *) auxtrace_info->priv; 3376 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; 3377 metadata = zalloc(sizeof(*metadata) * num_cpu); 3378 if (!metadata) 3379 return -ENOMEM; 3380 3381 /* Start parsing after the common part of the header */ 3382 i = CS_HEADER_VERSION_MAX; 3383 3384 /* 3385 * The metadata is stored in the auxtrace_info section and encodes 3386 * the configuration of the ARM embedded trace macrocell which is 3387 * required by the trace decoder to properly decode the trace due 3388 * to its highly compressed nature. 3389 */ 3390 for (int j = 0; j < num_cpu; j++) { 3391 if (ptr[i] == __perf_cs_etmv3_magic) { 3392 metadata[j] = 3393 cs_etm__create_meta_blk(ptr, &i, 3394 CS_ETM_PRIV_MAX, 3395 CS_ETM_NR_TRC_PARAMS_V0); 3396 } else if (ptr[i] == __perf_cs_etmv4_magic) { 3397 metadata[j] = 3398 cs_etm__create_meta_blk(ptr, &i, 3399 CS_ETMV4_PRIV_MAX, 3400 CS_ETMV4_NR_TRC_PARAMS_V0); 3401 } else if (ptr[i] == __perf_cs_ete_magic) { 3402 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); 3403 } else { 3404 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", 3405 ptr[i]); 3406 err = -EINVAL; 3407 goto err_free_metadata; 3408 } 3409 3410 if (!metadata[j]) { 3411 err = -ENOMEM; 3412 goto err_free_metadata; 3413 } 3414 3415 if ((int) metadata[j][CS_ETM_CPU] > max_cpu) 3416 max_cpu = metadata[j][CS_ETM_CPU]; 3417 } 3418 3419 /* 3420 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 3421 * CS_ETMV4_PRIV_MAX mark how many double words are in the 3422 * global metadata, and each cpu's metadata respectively. 3423 * The following tests if the correct number of double words was 3424 * present in the auxtrace info section. 3425 */ 3426 priv_size = total_size - event_header_size - INFO_HEADER_SIZE; 3427 if (i * 8 != priv_size) { 3428 err = -EINVAL; 3429 goto err_free_metadata; 3430 } 3431 3432 etm = zalloc(sizeof(*etm)); 3433 3434 if (!etm) { 3435 err = -ENOMEM; 3436 goto err_free_metadata; 3437 } 3438 3439 /* 3440 * As all the ETMs run at the same exception level, the system should 3441 * have the same PID format crossing CPUs. So cache the PID format 3442 * and reuse it for sequential decoding. 3443 */ 3444 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); 3445 3446 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); 3447 if (err) 3448 goto err_free_etm; 3449 3450 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { 3451 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); 3452 if (err) 3453 goto err_free_queues; 3454 } 3455 3456 if (session->itrace_synth_opts->set) { 3457 etm->synth_opts = *session->itrace_synth_opts; 3458 } else { 3459 itrace_synth_opts__set_default(&etm->synth_opts, 3460 session->itrace_synth_opts->default_no_sample); 3461 etm->synth_opts.callchain = false; 3462 } 3463 3464 etm->session = session; 3465 3466 etm->num_cpu = num_cpu; 3467 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); 3468 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); 3469 etm->metadata = metadata; 3470 etm->auxtrace_type = auxtrace_info->type; 3471 3472 if (etm->synth_opts.use_timestamp) 3473 /* 3474 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3475 * therefore the decoder cannot know if the timestamp trace is 3476 * same with the kernel time. 3477 * 3478 * If a user has knowledge for the working platform and can 3479 * specify itrace option 'T' to tell decoder to forcely use the 3480 * traced timestamp as the kernel time. 3481 */ 3482 etm->has_virtual_ts = true; 3483 else 3484 /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3485 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3486 3487 if (!etm->has_virtual_ts) 3488 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3489 "The time field of the samples will not be set accurately.\n" 3490 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3491 "you can specify the itrace option 'T' for timestamp decoding\n" 3492 "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3493 3494 etm->auxtrace.process_event = cs_etm__process_event; 3495 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 3496 etm->auxtrace.flush_events = cs_etm__flush_events; 3497 etm->auxtrace.free_events = cs_etm__free_events; 3498 etm->auxtrace.free = cs_etm__free; 3499 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; 3500 session->auxtrace = &etm->auxtrace; 3501 3502 err = cs_etm__setup_timeless_decoding(etm); 3503 if (err) 3504 return err; 3505 3506 etm->tc.time_shift = tc->time_shift; 3507 etm->tc.time_mult = tc->time_mult; 3508 etm->tc.time_zero = tc->time_zero; 3509 if (event_contains(*tc, time_cycles)) { 3510 etm->tc.time_cycles = tc->time_cycles; 3511 etm->tc.time_mask = tc->time_mask; 3512 etm->tc.cap_user_time_zero = tc->cap_user_time_zero; 3513 etm->tc.cap_user_time_short = tc->cap_user_time_short; 3514 } 3515 err = cs_etm__synth_events(etm, session); 3516 if (err) 3517 goto err_free_queues; 3518 3519 err = cs_etm__queue_aux_records(session); 3520 if (err) 3521 goto err_free_queues; 3522 3523 /* 3524 * Map Trace ID values to CPU metadata. 3525 * 3526 * Trace metadata will always contain Trace ID values from the legacy algorithm 3527 * in case it's read by a version of Perf that doesn't know about HW_ID packets 3528 * or the kernel doesn't emit them. 3529 * 3530 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3531 * the same IDs as the old algorithm as far as is possible, unless there are clashes 3532 * in which case a different value will be used. This means an older perf may still 3533 * be able to record and read files generate on a newer system. 3534 * 3535 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3536 * those packets. If they are there then the values will be mapped and plugged into 3537 * the metadata and decoders are only created for each mapping received. 3538 * 3539 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3540 * then we map Trace ID values to CPU directly from the metadata and create decoders 3541 * for all mappings. 3542 */ 3543 3544 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ 3545 aux_hw_id_found = 0; 3546 err = perf_session__peek_events(session, session->header.data_offset, 3547 session->header.data_size, 3548 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); 3549 if (err) 3550 goto err_free_queues; 3551 3552 /* if no HW ID found this is a file with metadata values only, map from metadata */ 3553 if (!aux_hw_id_found) { 3554 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); 3555 if (err) 3556 goto err_free_queues; 3557 } 3558 3559 err = cs_etm__create_decoders(etm); 3560 if (err) 3561 goto err_free_queues; 3562 3563 etm->data_queued = etm->queues.populated; 3564 return 0; 3565 3566 err_free_queues: 3567 auxtrace_queues__free(&etm->queues); 3568 session->auxtrace = NULL; 3569 err_free_etm: 3570 zfree(&etm); 3571 err_free_metadata: 3572 /* No need to check @metadata[j], free(NULL) is supported */ 3573 for (int j = 0; j < num_cpu; j++) 3574 zfree(&metadata[j]); 3575 zfree(&metadata); 3576 return err; 3577 } 3578