1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/bitfield.h> 11 #include <linux/bitops.h> 12 #include <linux/coresight-pmu.h> 13 #include <linux/err.h> 14 #include <linux/log2.h> 15 #include <linux/types.h> 16 #include <linux/zalloc.h> 17 18 #include <stdlib.h> 19 20 #include "auxtrace.h" 21 #include "color.h" 22 #include "cs-etm.h" 23 #include "cs-etm-decoder/cs-etm-decoder.h" 24 #include "debug.h" 25 #include "dso.h" 26 #include "evlist.h" 27 #include "intlist.h" 28 #include "machine.h" 29 #include "map.h" 30 #include "perf.h" 31 #include "session.h" 32 #include "map_symbol.h" 33 #include "branch.h" 34 #include "symbol.h" 35 #include "tool.h" 36 #include "thread.h" 37 #include "thread-stack.h" 38 #include "tsc.h" 39 #include <tools/libc_compat.h> 40 #include "util/synthetic-events.h" 41 #include "util/util.h" 42 43 struct cs_etm_auxtrace { 44 struct auxtrace auxtrace; 45 struct auxtrace_queues queues; 46 struct auxtrace_heap heap; 47 struct itrace_synth_opts synth_opts; 48 struct perf_session *session; 49 struct perf_tsc_conversion tc; 50 51 /* 52 * Timeless has no timestamps in the trace so overlapping mmap lookups 53 * are less accurate but produces smaller trace data. We use context IDs 54 * in the trace instead of matching timestamps with fork records so 55 * they're not really needed in the general case. Overlapping mmaps 56 * happen in cases like between a fork and an exec. 57 */ 58 bool timeless_decoding; 59 60 /* 61 * Per-thread ignores the trace channel ID and instead assumes that 62 * everything in a buffer comes from the same process regardless of 63 * which CPU it ran on. It also implies no context IDs so the TID is 64 * taken from the auxtrace buffer. 65 */ 66 bool per_thread_decoding; 67 bool snapshot_mode; 68 bool data_queued; 69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ 70 71 int num_cpu; 72 u64 latest_kernel_timestamp; 73 u32 auxtrace_type; 74 u64 branches_sample_type; 75 u64 branches_id; 76 u64 instructions_sample_type; 77 u64 instructions_sample_period; 78 u64 instructions_id; 79 u64 **metadata; 80 unsigned int pmu_type; 81 enum cs_etm_pid_fmt pid_fmt; 82 }; 83 84 struct cs_etm_traceid_queue { 85 u8 trace_chan_id; 86 u64 period_instructions; 87 size_t last_branch_pos; 88 union perf_event *event_buf; 89 struct thread *thread; 90 struct thread *prev_packet_thread; 91 ocsd_ex_level prev_packet_el; 92 ocsd_ex_level el; 93 struct branch_stack *last_branch; 94 struct branch_stack *last_branch_rb; 95 struct cs_etm_packet *prev_packet; 96 struct cs_etm_packet *packet; 97 struct cs_etm_packet_queue packet_queue; 98 }; 99 100 enum cs_etm_format { 101 UNSET, 102 FORMATTED, 103 UNFORMATTED 104 }; 105 106 struct cs_etm_queue { 107 struct cs_etm_auxtrace *etm; 108 struct cs_etm_decoder *decoder; 109 struct auxtrace_buffer *buffer; 110 unsigned int queue_nr; 111 u8 pending_timestamp_chan_id; 112 enum cs_etm_format format; 113 u64 offset; 114 const unsigned char *buf; 115 size_t buf_len, buf_used; 116 /* Conversion between traceID and index in traceid_queues array */ 117 struct intlist *traceid_queues_list; 118 struct cs_etm_traceid_queue **traceid_queues; 119 /* Conversion between traceID and metadata pointers */ 120 struct intlist *traceid_list; 121 /* 122 * Same as traceid_list, but traceid_list may be a reference to another 123 * queue's which has a matching sink ID. 124 */ 125 struct intlist *own_traceid_list; 126 u32 sink_id; 127 }; 128 129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); 130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 131 pid_t tid); 132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); 135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); 136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); 137 138 /* PTMs ETMIDR [11:8] set to b0011 */ 139 #define ETMIDR_PTM_VERSION 0x00000300 140 141 /* 142 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 143 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 144 * encode the etm queue number as the upper 16 bit and the channel as 145 * the lower 16 bit. 146 */ 147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ 148 (queue_nr << 16 | trace_chan_id) 149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 151 #define SINK_UNSET ((u32) -1) 152 153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 154 { 155 etmidr &= ETMIDR_PTM_VERSION; 156 157 if (etmidr == ETMIDR_PTM_VERSION) 158 return CS_ETM_PROTO_PTM; 159 160 return CS_ETM_PROTO_ETMV3; 161 } 162 163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) 164 { 165 struct int_node *inode; 166 u64 *metadata; 167 168 inode = intlist__find(etmq->traceid_list, trace_chan_id); 169 if (!inode) 170 return -EINVAL; 171 172 metadata = inode->priv; 173 *magic = metadata[CS_ETM_MAGIC]; 174 return 0; 175 } 176 177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) 178 { 179 struct int_node *inode; 180 u64 *metadata; 181 182 inode = intlist__find(etmq->traceid_list, trace_chan_id); 183 if (!inode) 184 return -EINVAL; 185 186 metadata = inode->priv; 187 *cpu = (int)metadata[CS_ETM_CPU]; 188 return 0; 189 } 190 191 /* 192 * The returned PID format is presented as an enum: 193 * 194 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. 195 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. 196 * CS_ETM_PIDFMT_NONE: No context IDs 197 * 198 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 199 * are enabled at the same time when the session runs on an EL2 kernel. 200 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 201 * recorded in the trace data, the tool will selectively use 202 * CONTEXTIDR_EL2 as PID. 203 * 204 * The result is cached in etm->pid_fmt so this function only needs to be called 205 * when processing the aux info. 206 */ 207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) 208 { 209 u64 val; 210 211 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 212 val = metadata[CS_ETM_ETMCR]; 213 /* CONTEXTIDR is traced */ 214 if (val & BIT(ETM_OPT_CTXTID)) 215 return CS_ETM_PIDFMT_CTXTID; 216 } else { 217 val = metadata[CS_ETMV4_TRCCONFIGR]; 218 /* CONTEXTIDR_EL2 is traced */ 219 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) 220 return CS_ETM_PIDFMT_CTXTID2; 221 /* CONTEXTIDR_EL1 is traced */ 222 else if (val & BIT(ETM4_CFG_BIT_CTXTID)) 223 return CS_ETM_PIDFMT_CTXTID; 224 } 225 226 return CS_ETM_PIDFMT_NONE; 227 } 228 229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) 230 { 231 return etmq->etm->pid_fmt; 232 } 233 234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, 235 u8 trace_chan_id, u64 *cpu_metadata) 236 { 237 /* Get an RB node for this CPU */ 238 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); 239 240 /* Something went wrong, no need to continue */ 241 if (!inode) 242 return -ENOMEM; 243 244 /* Disallow re-mapping a different traceID to metadata pair. */ 245 if (inode->priv) { 246 u64 *curr_cpu_data = inode->priv; 247 u8 curr_chan_id; 248 int err; 249 250 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { 251 /* 252 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs 253 * are expected (but not supported) in per-thread mode, 254 * rather than signifying an error. 255 */ 256 if (etmq->etm->per_thread_decoding) 257 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); 258 else 259 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); 260 261 return -EINVAL; 262 } 263 264 /* check that the mapped ID matches */ 265 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); 266 if (err) 267 return err; 268 269 if (curr_chan_id != trace_chan_id) { 270 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); 271 return -EINVAL; 272 } 273 274 /* Skip re-adding the same mappings if everything matched */ 275 return 0; 276 } 277 278 /* Not one we've seen before, associate the traceID with the metadata pointer */ 279 inode->priv = cpu_metadata; 280 281 return 0; 282 } 283 284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) 285 { 286 if (etm->per_thread_decoding) 287 return etm->queues.queue_array[0].priv; 288 else 289 return etm->queues.queue_array[cpu].priv; 290 } 291 292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, 293 u64 *cpu_metadata) 294 { 295 struct cs_etm_queue *etmq; 296 297 /* 298 * If the queue is unformatted then only save one mapping in the 299 * queue associated with that CPU so only one decoder is made. 300 */ 301 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); 302 if (etmq->format == UNFORMATTED) 303 return cs_etm__insert_trace_id_node(etmq, trace_chan_id, 304 cpu_metadata); 305 306 /* 307 * Otherwise, version 0 trace IDs are global so save them into every 308 * queue. 309 */ 310 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 311 int ret; 312 313 etmq = etm->queues.queue_array[i].priv; 314 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, 315 cpu_metadata); 316 if (ret) 317 return ret; 318 } 319 320 return 0; 321 } 322 323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, 324 u64 hw_id) 325 { 326 int err; 327 u64 *cpu_data; 328 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 329 330 cpu_data = get_cpu_data(etm, cpu); 331 if (cpu_data == NULL) 332 return -EINVAL; 333 334 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); 335 if (err) 336 return err; 337 338 /* 339 * if we are picking up the association from the packet, need to plug 340 * the correct trace ID into the metadata for setting up decoders later. 341 */ 342 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); 343 } 344 345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, 346 u64 hw_id) 347 { 348 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); 349 int ret; 350 u64 *cpu_data; 351 u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); 352 u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 353 354 /* 355 * Check sink id hasn't changed in per-cpu mode. In per-thread mode, 356 * let it pass for now until an actual overlapping trace ID is hit. In 357 * most cases IDs won't overlap even if the sink changes. 358 */ 359 if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && 360 etmq->sink_id != sink_id) { 361 pr_err("CS_ETM: mismatch between sink IDs\n"); 362 return -EINVAL; 363 } 364 365 etmq->sink_id = sink_id; 366 367 /* Find which other queues use this sink and link their ID maps */ 368 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 369 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; 370 371 /* Different sinks, skip */ 372 if (other_etmq->sink_id != etmq->sink_id) 373 continue; 374 375 /* Already linked, skip */ 376 if (other_etmq->traceid_list == etmq->traceid_list) 377 continue; 378 379 /* At the point of first linking, this one should be empty */ 380 if (!intlist__empty(etmq->traceid_list)) { 381 pr_err("CS_ETM: Can't link populated trace ID lists\n"); 382 return -EINVAL; 383 } 384 385 etmq->own_traceid_list = NULL; 386 intlist__delete(etmq->traceid_list); 387 etmq->traceid_list = other_etmq->traceid_list; 388 break; 389 } 390 391 cpu_data = get_cpu_data(etm, cpu); 392 ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); 393 if (ret) 394 return ret; 395 396 ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); 397 if (ret) 398 return ret; 399 400 return 0; 401 } 402 403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) 404 { 405 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 406 407 switch (cs_etm_magic) { 408 case __perf_cs_etmv3_magic: 409 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & 410 CORESIGHT_TRACE_ID_VAL_MASK); 411 break; 412 case __perf_cs_etmv4_magic: 413 case __perf_cs_ete_magic: 414 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & 415 CORESIGHT_TRACE_ID_VAL_MASK); 416 break; 417 default: 418 return -EINVAL; 419 } 420 return 0; 421 } 422 423 /* 424 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 425 */ 426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 427 { 428 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 429 430 switch (cs_etm_magic) { 431 case __perf_cs_etmv3_magic: 432 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; 433 break; 434 case __perf_cs_etmv4_magic: 435 case __perf_cs_ete_magic: 436 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; 437 break; 438 439 default: 440 return -EINVAL; 441 } 442 return 0; 443 } 444 445 /* 446 * Get a metadata index for a specific cpu from an array. 447 * 448 */ 449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) 450 { 451 int i; 452 453 for (i = 0; i < etm->num_cpu; i++) { 454 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { 455 return i; 456 } 457 } 458 459 return -1; 460 } 461 462 /* 463 * Get a metadata for a specific cpu from an array. 464 * 465 */ 466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) 467 { 468 int idx = get_cpu_data_idx(etm, cpu); 469 470 return (idx != -1) ? etm->metadata[idx] : NULL; 471 } 472 473 /* 474 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. 475 * 476 * The payload associates the Trace ID and the CPU. 477 * The routine is tolerant of seeing multiple packets with the same association, 478 * but a CPU / Trace ID association changing during a session is an error. 479 */ 480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session, 481 union perf_event *event) 482 { 483 struct cs_etm_auxtrace *etm; 484 struct perf_sample sample; 485 struct evsel *evsel; 486 u64 hw_id; 487 int cpu, version, err; 488 489 /* extract and parse the HW ID */ 490 hw_id = event->aux_output_hw_id.hw_id; 491 version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); 492 493 /* check that we can handle this version */ 494 if (version > CS_AUX_HW_ID_MAJOR_VERSION) { 495 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", 496 version); 497 return -EINVAL; 498 } 499 500 /* get access to the etm metadata */ 501 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); 502 if (!etm || !etm->metadata) 503 return -EINVAL; 504 505 /* parse the sample to get the CPU */ 506 evsel = evlist__event2evsel(session->evlist, event); 507 if (!evsel) 508 return -EINVAL; 509 perf_sample__init(&sample, /*all=*/false); 510 err = evsel__parse_sample(evsel, event, &sample); 511 if (err) 512 goto out; 513 cpu = sample.cpu; 514 if (cpu == -1) { 515 /* no CPU in the sample - possibly recorded with an old version of perf */ 516 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); 517 err = -EINVAL; 518 goto out; 519 } 520 521 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) { 522 err = cs_etm__process_trace_id_v0(etm, cpu, hw_id); 523 goto out; 524 } 525 526 err = cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); 527 out: 528 perf_sample__exit(&sample); 529 return err; 530 } 531 532 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 533 u8 trace_chan_id) 534 { 535 /* 536 * When a timestamp packet is encountered the backend code 537 * is stopped so that the front end has time to process packets 538 * that were accumulated in the traceID queue. Since there can 539 * be more than one channel per cs_etm_queue, we need to specify 540 * what traceID queue needs servicing. 541 */ 542 etmq->pending_timestamp_chan_id = trace_chan_id; 543 } 544 545 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 546 u8 *trace_chan_id) 547 { 548 struct cs_etm_packet_queue *packet_queue; 549 550 if (!etmq->pending_timestamp_chan_id) 551 return 0; 552 553 if (trace_chan_id) 554 *trace_chan_id = etmq->pending_timestamp_chan_id; 555 556 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 557 etmq->pending_timestamp_chan_id); 558 if (!packet_queue) 559 return 0; 560 561 /* Acknowledge pending status */ 562 etmq->pending_timestamp_chan_id = 0; 563 564 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 565 return packet_queue->cs_timestamp; 566 } 567 568 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 569 { 570 int i; 571 572 queue->head = 0; 573 queue->tail = 0; 574 queue->packet_count = 0; 575 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 576 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 577 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 578 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 579 queue->packet_buffer[i].instr_count = 0; 580 queue->packet_buffer[i].last_instr_taken_branch = false; 581 queue->packet_buffer[i].last_instr_size = 0; 582 queue->packet_buffer[i].last_instr_type = 0; 583 queue->packet_buffer[i].last_instr_subtype = 0; 584 queue->packet_buffer[i].last_instr_cond = 0; 585 queue->packet_buffer[i].flags = 0; 586 queue->packet_buffer[i].exception_number = UINT32_MAX; 587 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 588 queue->packet_buffer[i].cpu = INT_MIN; 589 } 590 } 591 592 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 593 { 594 int idx; 595 struct int_node *inode; 596 struct cs_etm_traceid_queue *tidq; 597 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 598 599 intlist__for_each_entry(inode, traceid_queues_list) { 600 idx = (int)(intptr_t)inode->priv; 601 tidq = etmq->traceid_queues[idx]; 602 cs_etm__clear_packet_queue(&tidq->packet_queue); 603 } 604 } 605 606 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 607 struct cs_etm_traceid_queue *tidq, 608 u8 trace_chan_id) 609 { 610 int rc = -ENOMEM; 611 struct auxtrace_queue *queue; 612 struct cs_etm_auxtrace *etm = etmq->etm; 613 614 cs_etm__clear_packet_queue(&tidq->packet_queue); 615 616 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 617 tidq->trace_chan_id = trace_chan_id; 618 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; 619 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, 620 queue->tid); 621 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); 622 623 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 624 if (!tidq->packet) 625 goto out; 626 627 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 628 if (!tidq->prev_packet) 629 goto out_free; 630 631 if (etm->synth_opts.last_branch) { 632 size_t sz = sizeof(struct branch_stack); 633 634 sz += etm->synth_opts.last_branch_sz * 635 sizeof(struct branch_entry); 636 tidq->last_branch = zalloc(sz); 637 if (!tidq->last_branch) 638 goto out_free; 639 tidq->last_branch_rb = zalloc(sz); 640 if (!tidq->last_branch_rb) 641 goto out_free; 642 } 643 644 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 645 if (!tidq->event_buf) 646 goto out_free; 647 648 return 0; 649 650 out_free: 651 zfree(&tidq->last_branch_rb); 652 zfree(&tidq->last_branch); 653 zfree(&tidq->prev_packet); 654 zfree(&tidq->packet); 655 out: 656 return rc; 657 } 658 659 static struct cs_etm_traceid_queue 660 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 661 { 662 int idx; 663 struct int_node *inode; 664 struct intlist *traceid_queues_list; 665 struct cs_etm_traceid_queue *tidq, **traceid_queues; 666 struct cs_etm_auxtrace *etm = etmq->etm; 667 668 if (etm->per_thread_decoding) 669 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 670 671 traceid_queues_list = etmq->traceid_queues_list; 672 673 /* 674 * Check if the traceid_queue exist for this traceID by looking 675 * in the queue list. 676 */ 677 inode = intlist__find(traceid_queues_list, trace_chan_id); 678 if (inode) { 679 idx = (int)(intptr_t)inode->priv; 680 return etmq->traceid_queues[idx]; 681 } 682 683 /* We couldn't find a traceid_queue for this traceID, allocate one */ 684 tidq = malloc(sizeof(*tidq)); 685 if (!tidq) 686 return NULL; 687 688 memset(tidq, 0, sizeof(*tidq)); 689 690 /* Get a valid index for the new traceid_queue */ 691 idx = intlist__nr_entries(traceid_queues_list); 692 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 693 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 694 if (!inode) 695 goto out_free; 696 697 /* Associate this traceID with this index */ 698 inode->priv = (void *)(intptr_t)idx; 699 700 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 701 goto out_free; 702 703 /* Grow the traceid_queues array by one unit */ 704 traceid_queues = etmq->traceid_queues; 705 traceid_queues = reallocarray(traceid_queues, 706 idx + 1, 707 sizeof(*traceid_queues)); 708 709 /* 710 * On failure reallocarray() returns NULL and the original block of 711 * memory is left untouched. 712 */ 713 if (!traceid_queues) 714 goto out_free; 715 716 traceid_queues[idx] = tidq; 717 etmq->traceid_queues = traceid_queues; 718 719 return etmq->traceid_queues[idx]; 720 721 out_free: 722 /* 723 * Function intlist__remove() removes the inode from the list 724 * and delete the memory associated to it. 725 */ 726 intlist__remove(traceid_queues_list, inode); 727 free(tidq); 728 729 return NULL; 730 } 731 732 struct cs_etm_packet_queue 733 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 734 { 735 struct cs_etm_traceid_queue *tidq; 736 737 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 738 if (tidq) 739 return &tidq->packet_queue; 740 741 return NULL; 742 } 743 744 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 745 struct cs_etm_traceid_queue *tidq) 746 { 747 struct cs_etm_packet *tmp; 748 749 if (etm->synth_opts.branches || etm->synth_opts.last_branch || 750 etm->synth_opts.instructions) { 751 /* 752 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 753 * the next incoming packet. 754 * 755 * Threads and exception levels are also tracked for both the 756 * previous and current packets. This is because the previous 757 * packet is used for the 'from' IP for branch samples, so the 758 * thread at that time must also be assigned to that sample. 759 * Across discontinuity packets the thread can change, so by 760 * tracking the thread for the previous packet the branch sample 761 * will have the correct info. 762 */ 763 tmp = tidq->packet; 764 tidq->packet = tidq->prev_packet; 765 tidq->prev_packet = tmp; 766 tidq->prev_packet_el = tidq->el; 767 thread__put(tidq->prev_packet_thread); 768 tidq->prev_packet_thread = thread__get(tidq->thread); 769 } 770 } 771 772 static void cs_etm__packet_dump(const char *pkt_string, void *data) 773 { 774 const char *color = PERF_COLOR_BLUE; 775 int len = strlen(pkt_string); 776 struct cs_etm_queue *etmq = data; 777 char queue_nr[64]; 778 779 if (verbose) 780 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr); 781 else 782 queue_nr[0] = '\0'; 783 784 if (len && (pkt_string[len-1] == '\n')) 785 color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string); 786 else 787 color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string); 788 789 fflush(stdout); 790 } 791 792 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 793 u64 *metadata, u32 etmidr) 794 { 795 t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); 796 t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; 797 t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; 798 } 799 800 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 801 u64 *metadata) 802 { 803 t_params->protocol = CS_ETM_PROTO_ETMV4i; 804 t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; 805 t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; 806 t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; 807 t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; 808 t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; 809 t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; 810 } 811 812 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 813 u64 *metadata) 814 { 815 t_params->protocol = CS_ETM_PROTO_ETE; 816 t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; 817 t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; 818 t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; 819 t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; 820 t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; 821 t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; 822 t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; 823 } 824 825 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 826 struct cs_etm_queue *etmq) 827 { 828 struct int_node *inode; 829 830 intlist__for_each_entry(inode, etmq->traceid_list) { 831 u64 *metadata = inode->priv; 832 u64 architecture = metadata[CS_ETM_MAGIC]; 833 u32 etmidr; 834 835 switch (architecture) { 836 case __perf_cs_etmv3_magic: 837 etmidr = metadata[CS_ETM_ETMIDR]; 838 cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); 839 break; 840 case __perf_cs_etmv4_magic: 841 cs_etm__set_trace_param_etmv4(t_params++, metadata); 842 break; 843 case __perf_cs_ete_magic: 844 cs_etm__set_trace_param_ete(t_params++, metadata); 845 break; 846 default: 847 return -EINVAL; 848 } 849 } 850 851 return 0; 852 } 853 854 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 855 struct cs_etm_queue *etmq, 856 enum cs_etm_decoder_operation mode) 857 { 858 int ret = -EINVAL; 859 860 if (!(mode < CS_ETM_OPERATION_MAX)) 861 goto out; 862 863 d_params->packet_printer = cs_etm__packet_dump; 864 d_params->operation = mode; 865 d_params->data = etmq; 866 d_params->formatted = etmq->format == FORMATTED; 867 d_params->fsyncs = false; 868 d_params->hsyncs = false; 869 d_params->frame_aligned = true; 870 871 ret = 0; 872 out: 873 return ret; 874 } 875 876 static void cs_etm__dump_event(struct cs_etm_queue *etmq, 877 struct auxtrace_buffer *buffer) 878 { 879 int ret; 880 const char *color = PERF_COLOR_BLUE; 881 size_t buffer_used = 0; 882 883 fprintf(stdout, "\n"); 884 color_fprintf(stdout, color, 885 ". ... CoreSight %s Trace data: size %#zx bytes\n", 886 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 887 888 do { 889 size_t consumed; 890 891 ret = cs_etm_decoder__process_data_block( 892 etmq->decoder, buffer->offset, 893 &((u8 *)buffer->data)[buffer_used], 894 buffer->size - buffer_used, &consumed); 895 if (ret) 896 break; 897 898 buffer_used += consumed; 899 } while (buffer_used < buffer->size); 900 901 cs_etm_decoder__reset(etmq->decoder); 902 } 903 904 static int cs_etm__flush_events(struct perf_session *session, 905 const struct perf_tool *tool) 906 { 907 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 908 struct cs_etm_auxtrace, 909 auxtrace); 910 if (dump_trace) 911 return 0; 912 913 if (!tool->ordered_events) 914 return -EINVAL; 915 916 if (etm->timeless_decoding) { 917 /* 918 * Pass tid = -1 to process all queues. But likely they will have 919 * already been processed on PERF_RECORD_EXIT anyway. 920 */ 921 return cs_etm__process_timeless_queues(etm, -1); 922 } 923 924 return cs_etm__process_timestamped_queues(etm); 925 } 926 927 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 928 { 929 int idx; 930 uintptr_t priv; 931 struct int_node *inode, *tmp; 932 struct cs_etm_traceid_queue *tidq; 933 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 934 935 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 936 priv = (uintptr_t)inode->priv; 937 idx = priv; 938 939 /* Free this traceid_queue from the array */ 940 tidq = etmq->traceid_queues[idx]; 941 thread__zput(tidq->thread); 942 thread__zput(tidq->prev_packet_thread); 943 zfree(&tidq->event_buf); 944 zfree(&tidq->last_branch); 945 zfree(&tidq->last_branch_rb); 946 zfree(&tidq->prev_packet); 947 zfree(&tidq->packet); 948 zfree(&tidq); 949 950 /* 951 * Function intlist__remove() removes the inode from the list 952 * and delete the memory associated to it. 953 */ 954 intlist__remove(traceid_queues_list, inode); 955 } 956 957 /* Then the RB tree itself */ 958 intlist__delete(traceid_queues_list); 959 etmq->traceid_queues_list = NULL; 960 961 /* finally free the traceid_queues array */ 962 zfree(&etmq->traceid_queues); 963 } 964 965 static void cs_etm__free_queue(void *priv) 966 { 967 struct int_node *inode, *tmp; 968 struct cs_etm_queue *etmq = priv; 969 970 if (!etmq) 971 return; 972 973 cs_etm_decoder__free(etmq->decoder); 974 cs_etm__free_traceid_queues(etmq); 975 976 if (etmq->own_traceid_list) { 977 /* First remove all traceID/metadata nodes for the RB tree */ 978 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) 979 intlist__remove(etmq->own_traceid_list, inode); 980 981 /* Then the RB tree itself */ 982 intlist__delete(etmq->own_traceid_list); 983 } 984 985 free(etmq); 986 } 987 988 static void cs_etm__free_events(struct perf_session *session) 989 { 990 unsigned int i; 991 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 992 struct cs_etm_auxtrace, 993 auxtrace); 994 struct auxtrace_queues *queues = &aux->queues; 995 996 for (i = 0; i < queues->nr_queues; i++) { 997 cs_etm__free_queue(queues->queue_array[i].priv); 998 queues->queue_array[i].priv = NULL; 999 } 1000 1001 auxtrace_queues__free(queues); 1002 } 1003 1004 static void cs_etm__free(struct perf_session *session) 1005 { 1006 int i; 1007 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1008 struct cs_etm_auxtrace, 1009 auxtrace); 1010 cs_etm__free_events(session); 1011 session->auxtrace = NULL; 1012 1013 for (i = 0; i < aux->num_cpu; i++) 1014 zfree(&aux->metadata[i]); 1015 1016 zfree(&aux->metadata); 1017 zfree(&aux); 1018 } 1019 1020 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, 1021 struct evsel *evsel) 1022 { 1023 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1024 struct cs_etm_auxtrace, 1025 auxtrace); 1026 1027 return evsel->core.attr.type == aux->pmu_type; 1028 } 1029 1030 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, 1031 ocsd_ex_level el) 1032 { 1033 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); 1034 1035 /* 1036 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels 1037 * running at EL1 assume everything is the host. 1038 */ 1039 if (pid_fmt == CS_ETM_PIDFMT_CTXTID) 1040 return &etmq->etm->session->machines.host; 1041 1042 /* 1043 * Not perfect, but otherwise assume anything in EL1 is the default 1044 * guest, and everything else is the host. Distinguishing between guest 1045 * and host userspaces isn't currently supported either. Neither is 1046 * multiple guest support. All this does is reduce the likeliness of 1047 * decode errors where we look into the host kernel maps when it should 1048 * have been the guest maps. 1049 */ 1050 switch (el) { 1051 case ocsd_EL1: 1052 return machines__find_guest(&etmq->etm->session->machines, 1053 DEFAULT_GUEST_KERNEL_ID); 1054 case ocsd_EL3: 1055 case ocsd_EL2: 1056 case ocsd_EL0: 1057 case ocsd_EL_unknown: 1058 default: 1059 return &etmq->etm->session->machines.host; 1060 } 1061 } 1062 1063 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, 1064 ocsd_ex_level el) 1065 { 1066 struct machine *machine = cs_etm__get_machine(etmq, el); 1067 1068 if (address >= machine__kernel_start(machine)) { 1069 if (machine__is_host(machine)) 1070 return PERF_RECORD_MISC_KERNEL; 1071 else 1072 return PERF_RECORD_MISC_GUEST_KERNEL; 1073 } else { 1074 if (machine__is_host(machine)) 1075 return PERF_RECORD_MISC_USER; 1076 else { 1077 /* 1078 * Can't really happen at the moment because 1079 * cs_etm__get_machine() will always return 1080 * machines.host for any non EL1 trace. 1081 */ 1082 return PERF_RECORD_MISC_GUEST_USER; 1083 } 1084 } 1085 } 1086 1087 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 1088 u64 address, size_t size, u8 *buffer, 1089 const ocsd_mem_space_acc_t mem_space) 1090 { 1091 u8 cpumode; 1092 u64 offset; 1093 int len; 1094 struct addr_location al; 1095 struct dso *dso; 1096 struct cs_etm_traceid_queue *tidq; 1097 int ret = 0; 1098 1099 if (!etmq) 1100 return 0; 1101 1102 addr_location__init(&al); 1103 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1104 if (!tidq) 1105 goto out; 1106 1107 /* 1108 * We've already tracked EL along side the PID in cs_etm__set_thread() 1109 * so double check that it matches what OpenCSD thinks as well. It 1110 * doesn't distinguish between EL0 and EL1 for this mem access callback 1111 * so we had to do the extra tracking. Skip validation if it's any of 1112 * the 'any' values. 1113 */ 1114 if (!(mem_space == OCSD_MEM_SPACE_ANY || 1115 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { 1116 if (mem_space & OCSD_MEM_SPACE_EL1N) { 1117 /* Includes both non secure EL1 and EL0 */ 1118 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); 1119 } else if (mem_space & OCSD_MEM_SPACE_EL2) 1120 assert(tidq->el == ocsd_EL2); 1121 else if (mem_space & OCSD_MEM_SPACE_EL3) 1122 assert(tidq->el == ocsd_EL3); 1123 } 1124 1125 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); 1126 1127 if (!thread__find_map(tidq->thread, cpumode, address, &al)) 1128 goto out; 1129 1130 dso = map__dso(al.map); 1131 if (!dso) 1132 goto out; 1133 1134 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && 1135 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) 1136 goto out; 1137 1138 offset = map__map_ip(al.map, address); 1139 1140 map__load(al.map); 1141 1142 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), 1143 offset, buffer, size); 1144 1145 if (len <= 0) { 1146 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" 1147 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); 1148 if (!dso__auxtrace_warned(dso)) { 1149 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", 1150 address, 1151 dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); 1152 dso__set_auxtrace_warned(dso); 1153 } 1154 goto out; 1155 } 1156 ret = len; 1157 out: 1158 addr_location__exit(&al); 1159 return ret; 1160 } 1161 1162 static struct cs_etm_queue *cs_etm__alloc_queue(void) 1163 { 1164 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); 1165 if (!etmq) 1166 return NULL; 1167 1168 etmq->traceid_queues_list = intlist__new(NULL); 1169 if (!etmq->traceid_queues_list) 1170 goto out_free; 1171 1172 /* 1173 * Create an RB tree for traceID-metadata tuple. Since the conversion 1174 * has to be made for each packet that gets decoded, optimizing access 1175 * in anything other than a sequential array is worth doing. 1176 */ 1177 etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); 1178 if (!etmq->traceid_list) 1179 goto out_free; 1180 1181 return etmq; 1182 1183 out_free: 1184 intlist__delete(etmq->traceid_queues_list); 1185 free(etmq); 1186 1187 return NULL; 1188 } 1189 1190 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 1191 struct auxtrace_queue *queue, 1192 unsigned int queue_nr) 1193 { 1194 struct cs_etm_queue *etmq = queue->priv; 1195 1196 if (etmq) 1197 return 0; 1198 1199 etmq = cs_etm__alloc_queue(); 1200 1201 if (!etmq) 1202 return -ENOMEM; 1203 1204 queue->priv = etmq; 1205 etmq->etm = etm; 1206 etmq->queue_nr = queue_nr; 1207 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ 1208 etmq->offset = 0; 1209 etmq->sink_id = SINK_UNSET; 1210 1211 return 0; 1212 } 1213 1214 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, 1215 struct cs_etm_queue *etmq, 1216 unsigned int queue_nr) 1217 { 1218 int ret = 0; 1219 unsigned int cs_queue_nr; 1220 u8 trace_chan_id; 1221 u64 cs_timestamp; 1222 1223 /* 1224 * We are under a CPU-wide trace scenario. As such we need to know 1225 * when the code that generated the traces started to execute so that 1226 * it can be correlated with execution on other CPUs. So we get a 1227 * handle on the beginning of traces and decode until we find a 1228 * timestamp. The timestamp is then added to the auxtrace min heap 1229 * in order to know what nibble (of all the etmqs) to decode first. 1230 */ 1231 while (1) { 1232 /* 1233 * Fetch an aux_buffer from this etmq. Bail if no more 1234 * blocks or an error has been encountered. 1235 */ 1236 ret = cs_etm__get_data_block(etmq); 1237 if (ret <= 0) 1238 goto out; 1239 1240 /* 1241 * Run decoder on the trace block. The decoder will stop when 1242 * encountering a CS timestamp, a full packet queue or the end of 1243 * trace for that block. 1244 */ 1245 ret = cs_etm__decode_data_block(etmq); 1246 if (ret) 1247 goto out; 1248 1249 /* 1250 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 1251 * the timestamp calculation for us. 1252 */ 1253 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1254 1255 /* We found a timestamp, no need to continue. */ 1256 if (cs_timestamp) 1257 break; 1258 1259 /* 1260 * We didn't find a timestamp so empty all the traceid packet 1261 * queues before looking for another timestamp packet, either 1262 * in the current data block or a new one. Packets that were 1263 * just decoded are useless since no timestamp has been 1264 * associated with them. As such simply discard them. 1265 */ 1266 cs_etm__clear_all_packet_queues(etmq); 1267 } 1268 1269 /* 1270 * We have a timestamp. Add it to the min heap to reflect when 1271 * instructions conveyed by the range packets of this traceID queue 1272 * started to execute. Once the same has been done for all the traceID 1273 * queues of each etmq, redenring and decoding can start in 1274 * chronological order. 1275 * 1276 * Note that packets decoded above are still in the traceID's packet 1277 * queue and will be processed in cs_etm__process_timestamped_queues(). 1278 */ 1279 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1280 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 1281 out: 1282 return ret; 1283 } 1284 1285 static inline 1286 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 1287 struct cs_etm_traceid_queue *tidq) 1288 { 1289 struct branch_stack *bs_src = tidq->last_branch_rb; 1290 struct branch_stack *bs_dst = tidq->last_branch; 1291 size_t nr = 0; 1292 1293 /* 1294 * Set the number of records before early exit: ->nr is used to 1295 * determine how many branches to copy from ->entries. 1296 */ 1297 bs_dst->nr = bs_src->nr; 1298 1299 /* 1300 * Early exit when there is nothing to copy. 1301 */ 1302 if (!bs_src->nr) 1303 return; 1304 1305 /* 1306 * As bs_src->entries is a circular buffer, we need to copy from it in 1307 * two steps. First, copy the branches from the most recently inserted 1308 * branch ->last_branch_pos until the end of bs_src->entries buffer. 1309 */ 1310 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 1311 memcpy(&bs_dst->entries[0], 1312 &bs_src->entries[tidq->last_branch_pos], 1313 sizeof(struct branch_entry) * nr); 1314 1315 /* 1316 * If we wrapped around at least once, the branches from the beginning 1317 * of the bs_src->entries buffer and until the ->last_branch_pos element 1318 * are older valid branches: copy them over. The total number of 1319 * branches copied over will be equal to the number of branches asked by 1320 * the user in last_branch_sz. 1321 */ 1322 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 1323 memcpy(&bs_dst->entries[nr], 1324 &bs_src->entries[0], 1325 sizeof(struct branch_entry) * tidq->last_branch_pos); 1326 } 1327 } 1328 1329 static inline 1330 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 1331 { 1332 tidq->last_branch_pos = 0; 1333 tidq->last_branch_rb->nr = 0; 1334 } 1335 1336 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 1337 u8 trace_chan_id, u64 addr) 1338 { 1339 u8 instrBytes[2]; 1340 1341 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), 1342 instrBytes, 0); 1343 /* 1344 * T32 instruction size is indicated by bits[15:11] of the first 1345 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 1346 * denote a 32-bit instruction. 1347 */ 1348 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 1349 } 1350 1351 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 1352 { 1353 /* 1354 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't 1355 * appear in samples. 1356 */ 1357 if (packet->sample_type == CS_ETM_DISCONTINUITY || 1358 packet->sample_type == CS_ETM_EXCEPTION) 1359 return 0; 1360 1361 return packet->start_addr; 1362 } 1363 1364 static inline 1365 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 1366 { 1367 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 1368 if (packet->sample_type == CS_ETM_DISCONTINUITY) 1369 return 0; 1370 1371 return packet->end_addr - packet->last_instr_size; 1372 } 1373 1374 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 1375 u64 trace_chan_id, 1376 const struct cs_etm_packet *packet, 1377 u64 offset) 1378 { 1379 if (packet->isa == CS_ETM_ISA_T32) { 1380 u64 addr = packet->start_addr; 1381 1382 while (offset) { 1383 addr += cs_etm__t32_instr_size(etmq, 1384 trace_chan_id, addr); 1385 offset--; 1386 } 1387 return addr; 1388 } 1389 1390 /* Assume a 4 byte instruction size (A32/A64) */ 1391 return packet->start_addr + offset * 4; 1392 } 1393 1394 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 1395 struct cs_etm_traceid_queue *tidq) 1396 { 1397 struct branch_stack *bs = tidq->last_branch_rb; 1398 struct branch_entry *be; 1399 1400 /* 1401 * The branches are recorded in a circular buffer in reverse 1402 * chronological order: we start recording from the last element of the 1403 * buffer down. After writing the first element of the stack, move the 1404 * insert position back to the end of the buffer. 1405 */ 1406 if (!tidq->last_branch_pos) 1407 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 1408 1409 tidq->last_branch_pos -= 1; 1410 1411 be = &bs->entries[tidq->last_branch_pos]; 1412 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 1413 be->to = cs_etm__first_executed_instr(tidq->packet); 1414 /* No support for mispredict */ 1415 be->flags.mispred = 0; 1416 be->flags.predicted = 1; 1417 1418 /* 1419 * Increment bs->nr until reaching the number of last branches asked by 1420 * the user on the command line. 1421 */ 1422 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 1423 bs->nr += 1; 1424 } 1425 1426 static int cs_etm__inject_event(union perf_event *event, 1427 struct perf_sample *sample, u64 type) 1428 { 1429 event->header.size = perf_event__sample_event_size(sample, type, 0); 1430 return perf_event__synthesize_sample(event, type, 0, sample); 1431 } 1432 1433 1434 static int 1435 cs_etm__get_trace(struct cs_etm_queue *etmq) 1436 { 1437 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1438 struct auxtrace_buffer *old_buffer = aux_buffer; 1439 struct auxtrace_queue *queue; 1440 1441 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1442 1443 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1444 1445 /* If no more data, drop the previous auxtrace_buffer and return */ 1446 if (!aux_buffer) { 1447 if (old_buffer) 1448 auxtrace_buffer__drop_data(old_buffer); 1449 etmq->buf_len = 0; 1450 return 0; 1451 } 1452 1453 etmq->buffer = aux_buffer; 1454 1455 /* If the aux_buffer doesn't have data associated, try to load it */ 1456 if (!aux_buffer->data) { 1457 /* get the file desc associated with the perf data file */ 1458 int fd = perf_data__fd(etmq->etm->session->data); 1459 1460 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1461 if (!aux_buffer->data) 1462 return -ENOMEM; 1463 } 1464 1465 /* If valid, drop the previous buffer */ 1466 if (old_buffer) 1467 auxtrace_buffer__drop_data(old_buffer); 1468 1469 etmq->buf_used = 0; 1470 etmq->buf_len = aux_buffer->size; 1471 etmq->buf = aux_buffer->data; 1472 1473 return etmq->buf_len; 1474 } 1475 1476 static void cs_etm__set_thread(struct cs_etm_queue *etmq, 1477 struct cs_etm_traceid_queue *tidq, pid_t tid, 1478 ocsd_ex_level el) 1479 { 1480 struct machine *machine = cs_etm__get_machine(etmq, el); 1481 1482 if (tid != -1) { 1483 thread__zput(tidq->thread); 1484 tidq->thread = machine__find_thread(machine, -1, tid); 1485 } 1486 1487 /* Couldn't find a known thread */ 1488 if (!tidq->thread) 1489 tidq->thread = machine__idle_thread(machine); 1490 1491 tidq->el = el; 1492 } 1493 1494 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, 1495 u8 trace_chan_id, ocsd_ex_level el) 1496 { 1497 struct cs_etm_traceid_queue *tidq; 1498 1499 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1500 if (!tidq) 1501 return -EINVAL; 1502 1503 cs_etm__set_thread(etmq, tidq, tid, el); 1504 return 0; 1505 } 1506 1507 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1508 { 1509 return !!etmq->etm->timeless_decoding; 1510 } 1511 1512 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1513 u64 trace_chan_id, 1514 const struct cs_etm_packet *packet, 1515 struct perf_sample *sample) 1516 { 1517 /* 1518 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1519 * packet, so directly bail out with 'insn_len' = 0. 1520 */ 1521 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1522 sample->insn_len = 0; 1523 return; 1524 } 1525 1526 /* 1527 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1528 * cs_etm__t32_instr_size(). 1529 */ 1530 if (packet->isa == CS_ETM_ISA_T32) 1531 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1532 sample->ip); 1533 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1534 else 1535 sample->insn_len = 4; 1536 1537 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, 1538 (void *)sample->insn, 0); 1539 } 1540 1541 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) 1542 { 1543 struct cs_etm_auxtrace *etm = etmq->etm; 1544 1545 if (etm->has_virtual_ts) 1546 return tsc_to_perf_time(cs_timestamp, &etm->tc); 1547 else 1548 return cs_timestamp; 1549 } 1550 1551 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, 1552 struct cs_etm_traceid_queue *tidq) 1553 { 1554 struct cs_etm_auxtrace *etm = etmq->etm; 1555 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; 1556 1557 if (!etm->timeless_decoding && etm->has_virtual_ts) 1558 return packet_queue->cs_timestamp; 1559 else 1560 return etm->latest_kernel_timestamp; 1561 } 1562 1563 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1564 struct cs_etm_traceid_queue *tidq, 1565 u64 addr, u64 period) 1566 { 1567 int ret = 0; 1568 struct cs_etm_auxtrace *etm = etmq->etm; 1569 union perf_event *event = tidq->event_buf; 1570 struct perf_sample sample; 1571 1572 perf_sample__init(&sample, /*all=*/true); 1573 event->sample.header.type = PERF_RECORD_SAMPLE; 1574 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); 1575 event->sample.header.size = sizeof(struct perf_event_header); 1576 1577 /* Set time field based on etm auxtrace config. */ 1578 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1579 1580 sample.ip = addr; 1581 sample.pid = thread__pid(tidq->thread); 1582 sample.tid = thread__tid(tidq->thread); 1583 sample.id = etmq->etm->instructions_id; 1584 sample.stream_id = etmq->etm->instructions_id; 1585 sample.period = period; 1586 sample.cpu = tidq->packet->cpu; 1587 sample.flags = tidq->prev_packet->flags; 1588 sample.cpumode = event->sample.header.misc; 1589 1590 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1591 1592 if (etm->synth_opts.last_branch) 1593 sample.branch_stack = tidq->last_branch; 1594 1595 if (etm->synth_opts.inject) { 1596 ret = cs_etm__inject_event(event, &sample, 1597 etm->instructions_sample_type); 1598 if (ret) 1599 return ret; 1600 } 1601 1602 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1603 1604 if (ret) 1605 pr_err( 1606 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1607 ret); 1608 1609 perf_sample__exit(&sample); 1610 return ret; 1611 } 1612 1613 /* 1614 * The cs etm packet encodes an instruction range between a branch target 1615 * and the next taken branch. Generate sample accordingly. 1616 */ 1617 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1618 struct cs_etm_traceid_queue *tidq) 1619 { 1620 int ret = 0; 1621 struct cs_etm_auxtrace *etm = etmq->etm; 1622 struct perf_sample sample = {.ip = 0,}; 1623 union perf_event *event = tidq->event_buf; 1624 struct dummy_branch_stack { 1625 u64 nr; 1626 u64 hw_idx; 1627 struct branch_entry entries; 1628 } dummy_bs; 1629 u64 ip; 1630 1631 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1632 1633 event->sample.header.type = PERF_RECORD_SAMPLE; 1634 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, 1635 tidq->prev_packet_el); 1636 event->sample.header.size = sizeof(struct perf_event_header); 1637 1638 /* Set time field based on etm auxtrace config. */ 1639 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1640 1641 sample.ip = ip; 1642 sample.pid = thread__pid(tidq->prev_packet_thread); 1643 sample.tid = thread__tid(tidq->prev_packet_thread); 1644 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1645 sample.id = etmq->etm->branches_id; 1646 sample.stream_id = etmq->etm->branches_id; 1647 sample.period = 1; 1648 sample.cpu = tidq->packet->cpu; 1649 sample.flags = tidq->prev_packet->flags; 1650 sample.cpumode = event->sample.header.misc; 1651 1652 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1653 &sample); 1654 1655 /* 1656 * perf report cannot handle events without a branch stack 1657 */ 1658 if (etm->synth_opts.last_branch) { 1659 dummy_bs = (struct dummy_branch_stack){ 1660 .nr = 1, 1661 .hw_idx = -1ULL, 1662 .entries = { 1663 .from = sample.ip, 1664 .to = sample.addr, 1665 }, 1666 }; 1667 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1668 } 1669 1670 if (etm->synth_opts.inject) { 1671 ret = cs_etm__inject_event(event, &sample, 1672 etm->branches_sample_type); 1673 if (ret) 1674 return ret; 1675 } 1676 1677 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1678 1679 if (ret) 1680 pr_err( 1681 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1682 ret); 1683 1684 return ret; 1685 } 1686 1687 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1688 struct perf_session *session) 1689 { 1690 struct evlist *evlist = session->evlist; 1691 struct evsel *evsel; 1692 struct perf_event_attr attr; 1693 bool found = false; 1694 u64 id; 1695 int err; 1696 1697 evlist__for_each_entry(evlist, evsel) { 1698 if (evsel->core.attr.type == etm->pmu_type) { 1699 found = true; 1700 break; 1701 } 1702 } 1703 1704 if (!found) { 1705 pr_debug("No selected events with CoreSight Trace data\n"); 1706 return 0; 1707 } 1708 1709 memset(&attr, 0, sizeof(struct perf_event_attr)); 1710 attr.size = sizeof(struct perf_event_attr); 1711 attr.type = PERF_TYPE_HARDWARE; 1712 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1713 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1714 PERF_SAMPLE_PERIOD; 1715 if (etm->timeless_decoding) 1716 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1717 else 1718 attr.sample_type |= PERF_SAMPLE_TIME; 1719 1720 attr.exclude_user = evsel->core.attr.exclude_user; 1721 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1722 attr.exclude_hv = evsel->core.attr.exclude_hv; 1723 attr.exclude_host = evsel->core.attr.exclude_host; 1724 attr.exclude_guest = evsel->core.attr.exclude_guest; 1725 attr.sample_id_all = evsel->core.attr.sample_id_all; 1726 attr.read_format = evsel->core.attr.read_format; 1727 1728 /* create new id val to be a fixed offset from evsel id */ 1729 id = evsel->core.id[0] + 1000000000; 1730 1731 if (!id) 1732 id = 1; 1733 1734 if (etm->synth_opts.branches) { 1735 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1736 attr.sample_period = 1; 1737 attr.sample_type |= PERF_SAMPLE_ADDR; 1738 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1739 if (err) 1740 return err; 1741 etm->branches_sample_type = attr.sample_type; 1742 etm->branches_id = id; 1743 id += 1; 1744 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1745 } 1746 1747 if (etm->synth_opts.last_branch) { 1748 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1749 /* 1750 * We don't use the hardware index, but the sample generation 1751 * code uses the new format branch_stack with this field, 1752 * so the event attributes must indicate that it's present. 1753 */ 1754 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1755 } 1756 1757 if (etm->synth_opts.instructions) { 1758 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1759 attr.sample_period = etm->synth_opts.period; 1760 etm->instructions_sample_period = attr.sample_period; 1761 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1762 if (err) 1763 return err; 1764 etm->instructions_sample_type = attr.sample_type; 1765 etm->instructions_id = id; 1766 id += 1; 1767 } 1768 1769 return 0; 1770 } 1771 1772 static int cs_etm__sample(struct cs_etm_queue *etmq, 1773 struct cs_etm_traceid_queue *tidq) 1774 { 1775 struct cs_etm_auxtrace *etm = etmq->etm; 1776 int ret; 1777 u8 trace_chan_id = tidq->trace_chan_id; 1778 u64 instrs_prev; 1779 1780 /* Get instructions remainder from previous packet */ 1781 instrs_prev = tidq->period_instructions; 1782 1783 tidq->period_instructions += tidq->packet->instr_count; 1784 1785 /* 1786 * Record a branch when the last instruction in 1787 * PREV_PACKET is a branch. 1788 */ 1789 if (etm->synth_opts.last_branch && 1790 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1791 tidq->prev_packet->last_instr_taken_branch) 1792 cs_etm__update_last_branch_rb(etmq, tidq); 1793 1794 if (etm->synth_opts.instructions && 1795 tidq->period_instructions >= etm->instructions_sample_period) { 1796 /* 1797 * Emit instruction sample periodically 1798 * TODO: allow period to be defined in cycles and clock time 1799 */ 1800 1801 /* 1802 * Below diagram demonstrates the instruction samples 1803 * generation flows: 1804 * 1805 * Instrs Instrs Instrs Instrs 1806 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1807 * | | | | 1808 * V V V V 1809 * -------------------------------------------------- 1810 * ^ ^ 1811 * | | 1812 * Period Period 1813 * instructions(Pi) instructions(Pi') 1814 * 1815 * | | 1816 * \---------------- -----------------/ 1817 * V 1818 * tidq->packet->instr_count 1819 * 1820 * Instrs Sample(n...) are the synthesised samples occurring 1821 * every etm->instructions_sample_period instructions - as 1822 * defined on the perf command line. Sample(n) is being the 1823 * last sample before the current etm packet, n+1 to n+3 1824 * samples are generated from the current etm packet. 1825 * 1826 * tidq->packet->instr_count represents the number of 1827 * instructions in the current etm packet. 1828 * 1829 * Period instructions (Pi) contains the number of 1830 * instructions executed after the sample point(n) from the 1831 * previous etm packet. This will always be less than 1832 * etm->instructions_sample_period. 1833 * 1834 * When generate new samples, it combines with two parts 1835 * instructions, one is the tail of the old packet and another 1836 * is the head of the new coming packet, to generate 1837 * sample(n+1); sample(n+2) and sample(n+3) consume the 1838 * instructions with sample period. After sample(n+3), the rest 1839 * instructions will be used by later packet and it is assigned 1840 * to tidq->period_instructions for next round calculation. 1841 */ 1842 1843 /* 1844 * Get the initial offset into the current packet instructions; 1845 * entry conditions ensure that instrs_prev is less than 1846 * etm->instructions_sample_period. 1847 */ 1848 u64 offset = etm->instructions_sample_period - instrs_prev; 1849 u64 addr; 1850 1851 /* Prepare last branches for instruction sample */ 1852 if (etm->synth_opts.last_branch) 1853 cs_etm__copy_last_branch_rb(etmq, tidq); 1854 1855 while (tidq->period_instructions >= 1856 etm->instructions_sample_period) { 1857 /* 1858 * Calculate the address of the sampled instruction (-1 1859 * as sample is reported as though instruction has just 1860 * been executed, but PC has not advanced to next 1861 * instruction) 1862 */ 1863 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1864 tidq->packet, offset - 1); 1865 ret = cs_etm__synth_instruction_sample( 1866 etmq, tidq, addr, 1867 etm->instructions_sample_period); 1868 if (ret) 1869 return ret; 1870 1871 offset += etm->instructions_sample_period; 1872 tidq->period_instructions -= 1873 etm->instructions_sample_period; 1874 } 1875 } 1876 1877 if (etm->synth_opts.branches) { 1878 bool generate_sample = false; 1879 1880 /* Generate sample for tracing on packet */ 1881 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1882 generate_sample = true; 1883 1884 /* Generate sample for branch taken packet */ 1885 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1886 tidq->prev_packet->last_instr_taken_branch) 1887 generate_sample = true; 1888 1889 if (generate_sample) { 1890 ret = cs_etm__synth_branch_sample(etmq, tidq); 1891 if (ret) 1892 return ret; 1893 } 1894 } 1895 1896 cs_etm__packet_swap(etm, tidq); 1897 1898 return 0; 1899 } 1900 1901 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1902 { 1903 /* 1904 * When the exception packet is inserted, whether the last instruction 1905 * in previous range packet is taken branch or not, we need to force 1906 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1907 * to generate branch sample for the instruction range before the 1908 * exception is trapped to kernel or before the exception returning. 1909 * 1910 * The exception packet includes the dummy address values, so don't 1911 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1912 * for generating instruction and branch samples. 1913 */ 1914 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1915 tidq->prev_packet->last_instr_taken_branch = true; 1916 1917 return 0; 1918 } 1919 1920 static int cs_etm__flush(struct cs_etm_queue *etmq, 1921 struct cs_etm_traceid_queue *tidq) 1922 { 1923 int err = 0; 1924 struct cs_etm_auxtrace *etm = etmq->etm; 1925 1926 /* Handle start tracing packet */ 1927 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1928 goto swap_packet; 1929 1930 if (etmq->etm->synth_opts.last_branch && 1931 etmq->etm->synth_opts.instructions && 1932 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1933 u64 addr; 1934 1935 /* Prepare last branches for instruction sample */ 1936 cs_etm__copy_last_branch_rb(etmq, tidq); 1937 1938 /* 1939 * Generate a last branch event for the branches left in the 1940 * circular buffer at the end of the trace. 1941 * 1942 * Use the address of the end of the last reported execution 1943 * range 1944 */ 1945 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1946 1947 err = cs_etm__synth_instruction_sample( 1948 etmq, tidq, addr, 1949 tidq->period_instructions); 1950 if (err) 1951 return err; 1952 1953 tidq->period_instructions = 0; 1954 1955 } 1956 1957 if (etm->synth_opts.branches && 1958 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1959 err = cs_etm__synth_branch_sample(etmq, tidq); 1960 if (err) 1961 return err; 1962 } 1963 1964 swap_packet: 1965 cs_etm__packet_swap(etm, tidq); 1966 1967 /* Reset last branches after flush the trace */ 1968 if (etm->synth_opts.last_branch) 1969 cs_etm__reset_last_branch_rb(tidq); 1970 1971 return err; 1972 } 1973 1974 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1975 struct cs_etm_traceid_queue *tidq) 1976 { 1977 int err; 1978 1979 /* 1980 * It has no new packet coming and 'etmq->packet' contains the stale 1981 * packet which was set at the previous time with packets swapping; 1982 * so skip to generate branch sample to avoid stale packet. 1983 * 1984 * For this case only flush branch stack and generate a last branch 1985 * event for the branches left in the circular buffer at the end of 1986 * the trace. 1987 */ 1988 if (etmq->etm->synth_opts.last_branch && 1989 etmq->etm->synth_opts.instructions && 1990 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1991 u64 addr; 1992 1993 /* Prepare last branches for instruction sample */ 1994 cs_etm__copy_last_branch_rb(etmq, tidq); 1995 1996 /* 1997 * Use the address of the end of the last reported execution 1998 * range. 1999 */ 2000 addr = cs_etm__last_executed_instr(tidq->prev_packet); 2001 2002 err = cs_etm__synth_instruction_sample( 2003 etmq, tidq, addr, 2004 tidq->period_instructions); 2005 if (err) 2006 return err; 2007 2008 tidq->period_instructions = 0; 2009 } 2010 2011 return 0; 2012 } 2013 /* 2014 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 2015 * if need be. 2016 * Returns: < 0 if error 2017 * = 0 if no more auxtrace_buffer to read 2018 * > 0 if the current buffer isn't empty yet 2019 */ 2020 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 2021 { 2022 int ret; 2023 2024 if (!etmq->buf_len) { 2025 ret = cs_etm__get_trace(etmq); 2026 if (ret <= 0) 2027 return ret; 2028 /* 2029 * We cannot assume consecutive blocks in the data file 2030 * are contiguous, reset the decoder to force re-sync. 2031 */ 2032 ret = cs_etm_decoder__reset(etmq->decoder); 2033 if (ret) 2034 return ret; 2035 } 2036 2037 return etmq->buf_len; 2038 } 2039 2040 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 2041 struct cs_etm_packet *packet, 2042 u64 end_addr) 2043 { 2044 /* Initialise to keep compiler happy */ 2045 u16 instr16 = 0; 2046 u32 instr32 = 0; 2047 u64 addr; 2048 2049 switch (packet->isa) { 2050 case CS_ETM_ISA_T32: 2051 /* 2052 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 2053 * 2054 * b'15 b'8 2055 * +-----------------+--------+ 2056 * | 1 1 0 1 1 1 1 1 | imm8 | 2057 * +-----------------+--------+ 2058 * 2059 * According to the specification, it only defines SVC for T32 2060 * with 16 bits instruction and has no definition for 32bits; 2061 * so below only read 2 bytes as instruction size for T32. 2062 */ 2063 addr = end_addr - 2; 2064 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), 2065 (u8 *)&instr16, 0); 2066 if ((instr16 & 0xFF00) == 0xDF00) 2067 return true; 2068 2069 break; 2070 case CS_ETM_ISA_A32: 2071 /* 2072 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 2073 * 2074 * b'31 b'28 b'27 b'24 2075 * +---------+---------+-------------------------+ 2076 * | !1111 | 1 1 1 1 | imm24 | 2077 * +---------+---------+-------------------------+ 2078 */ 2079 addr = end_addr - 4; 2080 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2081 (u8 *)&instr32, 0); 2082 if ((instr32 & 0x0F000000) == 0x0F000000 && 2083 (instr32 & 0xF0000000) != 0xF0000000) 2084 return true; 2085 2086 break; 2087 case CS_ETM_ISA_A64: 2088 /* 2089 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 2090 * 2091 * b'31 b'21 b'4 b'0 2092 * +-----------------------+---------+-----------+ 2093 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 2094 * +-----------------------+---------+-----------+ 2095 */ 2096 addr = end_addr - 4; 2097 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2098 (u8 *)&instr32, 0); 2099 if ((instr32 & 0xFFE0001F) == 0xd4000001) 2100 return true; 2101 2102 break; 2103 case CS_ETM_ISA_UNKNOWN: 2104 default: 2105 break; 2106 } 2107 2108 return false; 2109 } 2110 2111 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 2112 struct cs_etm_traceid_queue *tidq, u64 magic) 2113 { 2114 u8 trace_chan_id = tidq->trace_chan_id; 2115 struct cs_etm_packet *packet = tidq->packet; 2116 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2117 2118 if (magic == __perf_cs_etmv3_magic) 2119 if (packet->exception_number == CS_ETMV3_EXC_SVC) 2120 return true; 2121 2122 /* 2123 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 2124 * HVC cases; need to check if it's SVC instruction based on 2125 * packet address. 2126 */ 2127 if (magic == __perf_cs_etmv4_magic) { 2128 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2129 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2130 prev_packet->end_addr)) 2131 return true; 2132 } 2133 2134 return false; 2135 } 2136 2137 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 2138 u64 magic) 2139 { 2140 struct cs_etm_packet *packet = tidq->packet; 2141 2142 if (magic == __perf_cs_etmv3_magic) 2143 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 2144 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 2145 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 2146 packet->exception_number == CS_ETMV3_EXC_IRQ || 2147 packet->exception_number == CS_ETMV3_EXC_FIQ) 2148 return true; 2149 2150 if (magic == __perf_cs_etmv4_magic) 2151 if (packet->exception_number == CS_ETMV4_EXC_RESET || 2152 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 2153 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 2154 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 2155 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 2156 packet->exception_number == CS_ETMV4_EXC_IRQ || 2157 packet->exception_number == CS_ETMV4_EXC_FIQ) 2158 return true; 2159 2160 return false; 2161 } 2162 2163 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 2164 struct cs_etm_traceid_queue *tidq, 2165 u64 magic) 2166 { 2167 u8 trace_chan_id = tidq->trace_chan_id; 2168 struct cs_etm_packet *packet = tidq->packet; 2169 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2170 2171 if (magic == __perf_cs_etmv3_magic) 2172 if (packet->exception_number == CS_ETMV3_EXC_SMC || 2173 packet->exception_number == CS_ETMV3_EXC_HYP || 2174 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 2175 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 2176 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 2177 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 2178 packet->exception_number == CS_ETMV3_EXC_GENERIC) 2179 return true; 2180 2181 if (magic == __perf_cs_etmv4_magic) { 2182 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 2183 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 2184 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 2185 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 2186 return true; 2187 2188 /* 2189 * For CS_ETMV4_EXC_CALL, except SVC other instructions 2190 * (SMC, HVC) are taken as sync exceptions. 2191 */ 2192 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2193 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2194 prev_packet->end_addr)) 2195 return true; 2196 2197 /* 2198 * ETMv4 has 5 bits for exception number; if the numbers 2199 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 2200 * they are implementation defined exceptions. 2201 * 2202 * For this case, simply take it as sync exception. 2203 */ 2204 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 2205 packet->exception_number <= CS_ETMV4_EXC_END) 2206 return true; 2207 } 2208 2209 return false; 2210 } 2211 2212 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 2213 struct cs_etm_traceid_queue *tidq) 2214 { 2215 struct cs_etm_packet *packet = tidq->packet; 2216 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2217 u8 trace_chan_id = tidq->trace_chan_id; 2218 u64 magic; 2219 int ret; 2220 2221 switch (packet->sample_type) { 2222 case CS_ETM_RANGE: 2223 /* 2224 * Immediate branch instruction without neither link nor 2225 * return flag, it's normal branch instruction within 2226 * the function. 2227 */ 2228 if (packet->last_instr_type == OCSD_INSTR_BR && 2229 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 2230 packet->flags = PERF_IP_FLAG_BRANCH; 2231 2232 if (packet->last_instr_cond) 2233 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 2234 } 2235 2236 /* 2237 * Immediate branch instruction with link (e.g. BL), this is 2238 * branch instruction for function call. 2239 */ 2240 if (packet->last_instr_type == OCSD_INSTR_BR && 2241 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2242 packet->flags = PERF_IP_FLAG_BRANCH | 2243 PERF_IP_FLAG_CALL; 2244 2245 /* 2246 * Indirect branch instruction with link (e.g. BLR), this is 2247 * branch instruction for function call. 2248 */ 2249 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2250 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2251 packet->flags = PERF_IP_FLAG_BRANCH | 2252 PERF_IP_FLAG_CALL; 2253 2254 /* 2255 * Indirect branch instruction with subtype of 2256 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 2257 * function return for A32/T32. 2258 */ 2259 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2260 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 2261 packet->flags = PERF_IP_FLAG_BRANCH | 2262 PERF_IP_FLAG_RETURN; 2263 2264 /* 2265 * Indirect branch instruction without link (e.g. BR), usually 2266 * this is used for function return, especially for functions 2267 * within dynamic link lib. 2268 */ 2269 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2270 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 2271 packet->flags = PERF_IP_FLAG_BRANCH | 2272 PERF_IP_FLAG_RETURN; 2273 2274 /* Return instruction for function return. */ 2275 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2276 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 2277 packet->flags = PERF_IP_FLAG_BRANCH | 2278 PERF_IP_FLAG_RETURN; 2279 2280 /* 2281 * Decoder might insert a discontinuity in the middle of 2282 * instruction packets, fixup prev_packet with flag 2283 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 2284 */ 2285 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 2286 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2287 PERF_IP_FLAG_TRACE_BEGIN; 2288 2289 /* 2290 * If the previous packet is an exception return packet 2291 * and the return address just follows SVC instruction, 2292 * it needs to calibrate the previous packet sample flags 2293 * as PERF_IP_FLAG_SYSCALLRET. 2294 */ 2295 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 2296 PERF_IP_FLAG_RETURN | 2297 PERF_IP_FLAG_INTERRUPT) && 2298 cs_etm__is_svc_instr(etmq, trace_chan_id, 2299 packet, packet->start_addr)) 2300 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2301 PERF_IP_FLAG_RETURN | 2302 PERF_IP_FLAG_SYSCALLRET; 2303 break; 2304 case CS_ETM_DISCONTINUITY: 2305 /* 2306 * The trace is discontinuous, if the previous packet is 2307 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 2308 * for previous packet. 2309 */ 2310 if (prev_packet->sample_type == CS_ETM_RANGE) 2311 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2312 PERF_IP_FLAG_TRACE_END; 2313 break; 2314 case CS_ETM_EXCEPTION: 2315 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); 2316 if (ret) 2317 return ret; 2318 2319 /* The exception is for system call. */ 2320 if (cs_etm__is_syscall(etmq, tidq, magic)) 2321 packet->flags = PERF_IP_FLAG_BRANCH | 2322 PERF_IP_FLAG_CALL | 2323 PERF_IP_FLAG_SYSCALLRET; 2324 /* 2325 * The exceptions are triggered by external signals from bus, 2326 * interrupt controller, debug module, PE reset or halt. 2327 */ 2328 else if (cs_etm__is_async_exception(tidq, magic)) 2329 packet->flags = PERF_IP_FLAG_BRANCH | 2330 PERF_IP_FLAG_CALL | 2331 PERF_IP_FLAG_ASYNC | 2332 PERF_IP_FLAG_INTERRUPT; 2333 /* 2334 * Otherwise, exception is caused by trap, instruction & 2335 * data fault, or alignment errors. 2336 */ 2337 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 2338 packet->flags = PERF_IP_FLAG_BRANCH | 2339 PERF_IP_FLAG_CALL | 2340 PERF_IP_FLAG_INTERRUPT; 2341 2342 /* 2343 * When the exception packet is inserted, since exception 2344 * packet is not used standalone for generating samples 2345 * and it's affiliation to the previous instruction range 2346 * packet; so set previous range packet flags to tell perf 2347 * it is an exception taken branch. 2348 */ 2349 if (prev_packet->sample_type == CS_ETM_RANGE) 2350 prev_packet->flags = packet->flags; 2351 break; 2352 case CS_ETM_EXCEPTION_RET: 2353 /* 2354 * When the exception return packet is inserted, since 2355 * exception return packet is not used standalone for 2356 * generating samples and it's affiliation to the previous 2357 * instruction range packet; so set previous range packet 2358 * flags to tell perf it is an exception return branch. 2359 * 2360 * The exception return can be for either system call or 2361 * other exception types; unfortunately the packet doesn't 2362 * contain exception type related info so we cannot decide 2363 * the exception type purely based on exception return packet. 2364 * If we record the exception number from exception packet and 2365 * reuse it for exception return packet, this is not reliable 2366 * due the trace can be discontinuity or the interrupt can 2367 * be nested, thus the recorded exception number cannot be 2368 * used for exception return packet for these two cases. 2369 * 2370 * For exception return packet, we only need to distinguish the 2371 * packet is for system call or for other types. Thus the 2372 * decision can be deferred when receive the next packet which 2373 * contains the return address, based on the return address we 2374 * can read out the previous instruction and check if it's a 2375 * system call instruction and then calibrate the sample flag 2376 * as needed. 2377 */ 2378 if (prev_packet->sample_type == CS_ETM_RANGE) 2379 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2380 PERF_IP_FLAG_RETURN | 2381 PERF_IP_FLAG_INTERRUPT; 2382 break; 2383 case CS_ETM_EMPTY: 2384 default: 2385 break; 2386 } 2387 2388 return 0; 2389 } 2390 2391 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 2392 { 2393 int ret = 0; 2394 size_t processed = 0; 2395 2396 /* 2397 * Packets are decoded and added to the decoder's packet queue 2398 * until the decoder packet processing callback has requested that 2399 * processing stops or there is nothing left in the buffer. Normal 2400 * operations that stop processing are a timestamp packet or a full 2401 * decoder buffer queue. 2402 */ 2403 ret = cs_etm_decoder__process_data_block(etmq->decoder, 2404 etmq->offset, 2405 &etmq->buf[etmq->buf_used], 2406 etmq->buf_len, 2407 &processed); 2408 if (ret) 2409 goto out; 2410 2411 etmq->offset += processed; 2412 etmq->buf_used += processed; 2413 etmq->buf_len -= processed; 2414 2415 out: 2416 return ret; 2417 } 2418 2419 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 2420 struct cs_etm_traceid_queue *tidq) 2421 { 2422 int ret; 2423 struct cs_etm_packet_queue *packet_queue; 2424 2425 packet_queue = &tidq->packet_queue; 2426 2427 /* Process each packet in this chunk */ 2428 while (1) { 2429 ret = cs_etm_decoder__get_packet(packet_queue, 2430 tidq->packet); 2431 if (ret <= 0) 2432 /* 2433 * Stop processing this chunk on 2434 * end of data or error 2435 */ 2436 break; 2437 2438 /* 2439 * Since packet addresses are swapped in packet 2440 * handling within below switch() statements, 2441 * thus setting sample flags must be called 2442 * prior to switch() statement to use address 2443 * information before packets swapping. 2444 */ 2445 ret = cs_etm__set_sample_flags(etmq, tidq); 2446 if (ret < 0) 2447 break; 2448 2449 switch (tidq->packet->sample_type) { 2450 case CS_ETM_RANGE: 2451 /* 2452 * If the packet contains an instruction 2453 * range, generate instruction sequence 2454 * events. 2455 */ 2456 cs_etm__sample(etmq, tidq); 2457 break; 2458 case CS_ETM_EXCEPTION: 2459 case CS_ETM_EXCEPTION_RET: 2460 /* 2461 * If the exception packet is coming, 2462 * make sure the previous instruction 2463 * range packet to be handled properly. 2464 */ 2465 cs_etm__exception(tidq); 2466 break; 2467 case CS_ETM_DISCONTINUITY: 2468 /* 2469 * Discontinuity in trace, flush 2470 * previous branch stack 2471 */ 2472 cs_etm__flush(etmq, tidq); 2473 break; 2474 case CS_ETM_EMPTY: 2475 /* 2476 * Should not receive empty packet, 2477 * report error. 2478 */ 2479 pr_err("CS ETM Trace: empty packet\n"); 2480 return -EINVAL; 2481 default: 2482 break; 2483 } 2484 } 2485 2486 return ret; 2487 } 2488 2489 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2490 { 2491 int idx; 2492 struct int_node *inode; 2493 struct cs_etm_traceid_queue *tidq; 2494 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2495 2496 intlist__for_each_entry(inode, traceid_queues_list) { 2497 idx = (int)(intptr_t)inode->priv; 2498 tidq = etmq->traceid_queues[idx]; 2499 2500 /* Ignore return value */ 2501 cs_etm__process_traceid_queue(etmq, tidq); 2502 } 2503 } 2504 2505 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) 2506 { 2507 int err = 0; 2508 struct cs_etm_traceid_queue *tidq; 2509 2510 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2511 if (!tidq) 2512 return -EINVAL; 2513 2514 /* Go through each buffer in the queue and decode them one by one */ 2515 while (1) { 2516 err = cs_etm__get_data_block(etmq); 2517 if (err <= 0) 2518 return err; 2519 2520 /* Run trace decoder until buffer consumed or end of trace */ 2521 do { 2522 err = cs_etm__decode_data_block(etmq); 2523 if (err) 2524 return err; 2525 2526 /* 2527 * Process each packet in this chunk, nothing to do if 2528 * an error occurs other than hoping the next one will 2529 * be better. 2530 */ 2531 err = cs_etm__process_traceid_queue(etmq, tidq); 2532 2533 } while (etmq->buf_len); 2534 2535 if (err == 0) 2536 /* Flush any remaining branch stack entries */ 2537 err = cs_etm__end_block(etmq, tidq); 2538 } 2539 2540 return err; 2541 } 2542 2543 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) 2544 { 2545 int idx, err = 0; 2546 struct cs_etm_traceid_queue *tidq; 2547 struct int_node *inode; 2548 2549 /* Go through each buffer in the queue and decode them one by one */ 2550 while (1) { 2551 err = cs_etm__get_data_block(etmq); 2552 if (err <= 0) 2553 return err; 2554 2555 /* Run trace decoder until buffer consumed or end of trace */ 2556 do { 2557 err = cs_etm__decode_data_block(etmq); 2558 if (err) 2559 return err; 2560 2561 /* 2562 * cs_etm__run_per_thread_timeless_decoder() runs on a 2563 * single traceID queue because each TID has a separate 2564 * buffer. But here in per-cpu mode we need to iterate 2565 * over each channel instead. 2566 */ 2567 intlist__for_each_entry(inode, 2568 etmq->traceid_queues_list) { 2569 idx = (int)(intptr_t)inode->priv; 2570 tidq = etmq->traceid_queues[idx]; 2571 cs_etm__process_traceid_queue(etmq, tidq); 2572 } 2573 } while (etmq->buf_len); 2574 2575 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2576 idx = (int)(intptr_t)inode->priv; 2577 tidq = etmq->traceid_queues[idx]; 2578 /* Flush any remaining branch stack entries */ 2579 err = cs_etm__end_block(etmq, tidq); 2580 if (err) 2581 return err; 2582 } 2583 } 2584 2585 return err; 2586 } 2587 2588 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2589 pid_t tid) 2590 { 2591 unsigned int i; 2592 struct auxtrace_queues *queues = &etm->queues; 2593 2594 for (i = 0; i < queues->nr_queues; i++) { 2595 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2596 struct cs_etm_queue *etmq = queue->priv; 2597 struct cs_etm_traceid_queue *tidq; 2598 2599 if (!etmq) 2600 continue; 2601 2602 if (etm->per_thread_decoding) { 2603 tidq = cs_etm__etmq_get_traceid_queue( 2604 etmq, CS_ETM_PER_THREAD_TRACEID); 2605 2606 if (!tidq) 2607 continue; 2608 2609 if (tid == -1 || thread__tid(tidq->thread) == tid) 2610 cs_etm__run_per_thread_timeless_decoder(etmq); 2611 } else 2612 cs_etm__run_per_cpu_timeless_decoder(etmq); 2613 } 2614 2615 return 0; 2616 } 2617 2618 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) 2619 { 2620 int ret = 0; 2621 unsigned int cs_queue_nr, queue_nr, i; 2622 u8 trace_chan_id; 2623 u64 cs_timestamp; 2624 struct auxtrace_queue *queue; 2625 struct cs_etm_queue *etmq; 2626 struct cs_etm_traceid_queue *tidq; 2627 2628 /* 2629 * Pre-populate the heap with one entry from each queue so that we can 2630 * start processing in time order across all queues. 2631 */ 2632 for (i = 0; i < etm->queues.nr_queues; i++) { 2633 etmq = etm->queues.queue_array[i].priv; 2634 if (!etmq) 2635 continue; 2636 2637 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); 2638 if (ret) 2639 return ret; 2640 } 2641 2642 while (1) { 2643 if (!etm->heap.heap_cnt) 2644 break; 2645 2646 /* Take the entry at the top of the min heap */ 2647 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2648 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2649 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2650 queue = &etm->queues.queue_array[queue_nr]; 2651 etmq = queue->priv; 2652 2653 /* 2654 * Remove the top entry from the heap since we are about 2655 * to process it. 2656 */ 2657 auxtrace_heap__pop(&etm->heap); 2658 2659 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2660 if (!tidq) { 2661 /* 2662 * No traceID queue has been allocated for this traceID, 2663 * which means something somewhere went very wrong. No 2664 * other choice than simply exit. 2665 */ 2666 ret = -EINVAL; 2667 goto out; 2668 } 2669 2670 /* 2671 * Packets associated with this timestamp are already in 2672 * the etmq's traceID queue, so process them. 2673 */ 2674 ret = cs_etm__process_traceid_queue(etmq, tidq); 2675 if (ret < 0) 2676 goto out; 2677 2678 /* 2679 * Packets for this timestamp have been processed, time to 2680 * move on to the next timestamp, fetching a new auxtrace_buffer 2681 * if need be. 2682 */ 2683 refetch: 2684 ret = cs_etm__get_data_block(etmq); 2685 if (ret < 0) 2686 goto out; 2687 2688 /* 2689 * No more auxtrace_buffers to process in this etmq, simply 2690 * move on to another entry in the auxtrace_heap. 2691 */ 2692 if (!ret) 2693 continue; 2694 2695 ret = cs_etm__decode_data_block(etmq); 2696 if (ret) 2697 goto out; 2698 2699 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2700 2701 if (!cs_timestamp) { 2702 /* 2703 * Function cs_etm__decode_data_block() returns when 2704 * there is no more traces to decode in the current 2705 * auxtrace_buffer OR when a timestamp has been 2706 * encountered on any of the traceID queues. Since we 2707 * did not get a timestamp, there is no more traces to 2708 * process in this auxtrace_buffer. As such empty and 2709 * flush all traceID queues. 2710 */ 2711 cs_etm__clear_all_traceid_queues(etmq); 2712 2713 /* Fetch another auxtrace_buffer for this etmq */ 2714 goto refetch; 2715 } 2716 2717 /* 2718 * Add to the min heap the timestamp for packets that have 2719 * just been decoded. They will be processed and synthesized 2720 * during the next call to cs_etm__process_traceid_queue() for 2721 * this queue/traceID. 2722 */ 2723 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2724 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 2725 } 2726 2727 for (i = 0; i < etm->queues.nr_queues; i++) { 2728 struct int_node *inode; 2729 2730 etmq = etm->queues.queue_array[i].priv; 2731 if (!etmq) 2732 continue; 2733 2734 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2735 int idx = (int)(intptr_t)inode->priv; 2736 2737 /* Flush any remaining branch stack entries */ 2738 tidq = etmq->traceid_queues[idx]; 2739 ret = cs_etm__end_block(etmq, tidq); 2740 if (ret) 2741 return ret; 2742 } 2743 } 2744 out: 2745 return ret; 2746 } 2747 2748 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2749 union perf_event *event) 2750 { 2751 struct thread *th; 2752 2753 if (etm->timeless_decoding) 2754 return 0; 2755 2756 /* 2757 * Add the tid/pid to the log so that we can get a match when we get a 2758 * contextID from the decoder. Only track for the host: only kernel 2759 * trace is supported for guests which wouldn't need pids so this should 2760 * be fine. 2761 */ 2762 th = machine__findnew_thread(&etm->session->machines.host, 2763 event->itrace_start.pid, 2764 event->itrace_start.tid); 2765 if (!th) 2766 return -ENOMEM; 2767 2768 thread__put(th); 2769 2770 return 0; 2771 } 2772 2773 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2774 union perf_event *event) 2775 { 2776 struct thread *th; 2777 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2778 2779 /* 2780 * Context switch in per-thread mode are irrelevant since perf 2781 * will start/stop tracing as the process is scheduled. 2782 */ 2783 if (etm->timeless_decoding) 2784 return 0; 2785 2786 /* 2787 * SWITCH_IN events carry the next process to be switched out while 2788 * SWITCH_OUT events carry the process to be switched in. As such 2789 * we don't care about IN events. 2790 */ 2791 if (!out) 2792 return 0; 2793 2794 /* 2795 * Add the tid/pid to the log so that we can get a match when we get a 2796 * contextID from the decoder. Only track for the host: only kernel 2797 * trace is supported for guests which wouldn't need pids so this should 2798 * be fine. 2799 */ 2800 th = machine__findnew_thread(&etm->session->machines.host, 2801 event->context_switch.next_prev_pid, 2802 event->context_switch.next_prev_tid); 2803 if (!th) 2804 return -ENOMEM; 2805 2806 thread__put(th); 2807 2808 return 0; 2809 } 2810 2811 static int cs_etm__process_event(struct perf_session *session, 2812 union perf_event *event, 2813 struct perf_sample *sample, 2814 const struct perf_tool *tool) 2815 { 2816 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2817 struct cs_etm_auxtrace, 2818 auxtrace); 2819 2820 if (dump_trace) 2821 return 0; 2822 2823 if (!tool->ordered_events) { 2824 pr_err("CoreSight ETM Trace requires ordered events\n"); 2825 return -EINVAL; 2826 } 2827 2828 switch (event->header.type) { 2829 case PERF_RECORD_EXIT: 2830 /* 2831 * Don't need to wait for cs_etm__flush_events() in per-thread mode to 2832 * start the decode because we know there will be no more trace from 2833 * this thread. All this does is emit samples earlier than waiting for 2834 * the flush in other modes, but with timestamps it makes sense to wait 2835 * for flush so that events from different threads are interleaved 2836 * properly. 2837 */ 2838 if (etm->per_thread_decoding && etm->timeless_decoding) 2839 return cs_etm__process_timeless_queues(etm, 2840 event->fork.tid); 2841 break; 2842 2843 case PERF_RECORD_ITRACE_START: 2844 return cs_etm__process_itrace_start(etm, event); 2845 2846 case PERF_RECORD_SWITCH_CPU_WIDE: 2847 return cs_etm__process_switch_cpu_wide(etm, event); 2848 2849 case PERF_RECORD_AUX: 2850 /* 2851 * Record the latest kernel timestamp available in the header 2852 * for samples so that synthesised samples occur from this point 2853 * onwards. 2854 */ 2855 if (sample->time && (sample->time != (u64)-1)) 2856 etm->latest_kernel_timestamp = sample->time; 2857 break; 2858 2859 default: 2860 break; 2861 } 2862 2863 return 0; 2864 } 2865 2866 static void dump_queued_data(struct cs_etm_auxtrace *etm, 2867 struct perf_record_auxtrace *event) 2868 { 2869 struct auxtrace_buffer *buf; 2870 unsigned int i; 2871 /* 2872 * Find all buffers with same reference in the queues and dump them. 2873 * This is because the queues can contain multiple entries of the same 2874 * buffer that were split on aux records. 2875 */ 2876 for (i = 0; i < etm->queues.nr_queues; ++i) 2877 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) 2878 if (buf->reference == event->reference) 2879 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); 2880 } 2881 2882 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2883 union perf_event *event, 2884 const struct perf_tool *tool __maybe_unused) 2885 { 2886 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2887 struct cs_etm_auxtrace, 2888 auxtrace); 2889 if (!etm->data_queued) { 2890 struct auxtrace_buffer *buffer; 2891 off_t data_offset; 2892 int fd = perf_data__fd(session->data); 2893 bool is_pipe = perf_data__is_pipe(session->data); 2894 int err; 2895 int idx = event->auxtrace.idx; 2896 2897 if (is_pipe) 2898 data_offset = 0; 2899 else { 2900 data_offset = lseek(fd, 0, SEEK_CUR); 2901 if (data_offset == -1) 2902 return -errno; 2903 } 2904 2905 err = auxtrace_queues__add_event(&etm->queues, session, 2906 event, data_offset, &buffer); 2907 if (err) 2908 return err; 2909 2910 if (dump_trace) 2911 if (auxtrace_buffer__get_data(buffer, fd)) { 2912 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); 2913 auxtrace_buffer__put_data(buffer); 2914 } 2915 } else if (dump_trace) 2916 dump_queued_data(etm, &event->auxtrace); 2917 2918 return 0; 2919 } 2920 2921 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) 2922 { 2923 struct evsel *evsel; 2924 struct evlist *evlist = etm->session->evlist; 2925 2926 /* Override timeless mode with user input from --itrace=Z */ 2927 if (etm->synth_opts.timeless_decoding) { 2928 etm->timeless_decoding = true; 2929 return 0; 2930 } 2931 2932 /* 2933 * Find the cs_etm evsel and look at what its timestamp setting was 2934 */ 2935 evlist__for_each_entry(evlist, evsel) 2936 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { 2937 etm->timeless_decoding = 2938 !(evsel->core.attr.config & BIT(ETM_OPT_TS)); 2939 return 0; 2940 } 2941 2942 pr_err("CS ETM: Couldn't find ETM evsel\n"); 2943 return -EINVAL; 2944 } 2945 2946 /* 2947 * Read a single cpu parameter block from the auxtrace_info priv block. 2948 * 2949 * For version 1 there is a per cpu nr_params entry. If we are handling 2950 * version 1 file, then there may be less, the same, or more params 2951 * indicated by this value than the compile time number we understand. 2952 * 2953 * For a version 0 info block, there are a fixed number, and we need to 2954 * fill out the nr_param value in the metadata we create. 2955 */ 2956 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 2957 int out_blk_size, int nr_params_v0) 2958 { 2959 u64 *metadata = NULL; 2960 int hdr_version; 2961 int nr_in_params, nr_out_params, nr_cmn_params; 2962 int i, k; 2963 2964 metadata = zalloc(sizeof(*metadata) * out_blk_size); 2965 if (!metadata) 2966 return NULL; 2967 2968 /* read block current index & version */ 2969 i = *buff_in_offset; 2970 hdr_version = buff_in[CS_HEADER_VERSION]; 2971 2972 if (!hdr_version) { 2973 /* read version 0 info block into a version 1 metadata block */ 2974 nr_in_params = nr_params_v0; 2975 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 2976 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 2977 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 2978 /* remaining block params at offset +1 from source */ 2979 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 2980 metadata[k + 1] = buff_in[i + k]; 2981 /* version 0 has 2 common params */ 2982 nr_cmn_params = 2; 2983 } else { 2984 /* read version 1 info block - input and output nr_params may differ */ 2985 /* version 1 has 3 common params */ 2986 nr_cmn_params = 3; 2987 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 2988 2989 /* if input has more params than output - skip excess */ 2990 nr_out_params = nr_in_params + nr_cmn_params; 2991 if (nr_out_params > out_blk_size) 2992 nr_out_params = out_blk_size; 2993 2994 for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 2995 metadata[k] = buff_in[i + k]; 2996 2997 /* record the actual nr params we copied */ 2998 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 2999 } 3000 3001 /* adjust in offset by number of in params used */ 3002 i += nr_in_params + nr_cmn_params; 3003 *buff_in_offset = i; 3004 return metadata; 3005 } 3006 3007 /** 3008 * Puts a fragment of an auxtrace buffer into the auxtrace queues based 3009 * on the bounds of aux_event, if it matches with the buffer that's at 3010 * file_offset. 3011 * 3012 * Normally, whole auxtrace buffers would be added to the queue. But we 3013 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder 3014 * is reset across each buffer, so splitting the buffers up in advance has 3015 * the same effect. 3016 */ 3017 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, 3018 struct perf_record_aux *aux_event, struct perf_sample *sample) 3019 { 3020 int err; 3021 char buf[PERF_SAMPLE_MAX_SIZE]; 3022 union perf_event *auxtrace_event_union; 3023 struct perf_record_auxtrace *auxtrace_event; 3024 union perf_event auxtrace_fragment; 3025 __u64 aux_offset, aux_size; 3026 enum cs_etm_format format; 3027 3028 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 3029 struct cs_etm_auxtrace, 3030 auxtrace); 3031 3032 /* 3033 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got 3034 * from looping through the auxtrace index. 3035 */ 3036 err = perf_session__peek_event(session, file_offset, buf, 3037 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); 3038 if (err) 3039 return err; 3040 auxtrace_event = &auxtrace_event_union->auxtrace; 3041 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) 3042 return -EINVAL; 3043 3044 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || 3045 auxtrace_event->header.size != sz) { 3046 return -EINVAL; 3047 } 3048 3049 /* 3050 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See 3051 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a 3052 * CPU as we set this always for the AUX_OUTPUT_HW_ID event. 3053 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. 3054 * Return 'not found' if mismatch. 3055 */ 3056 if (auxtrace_event->cpu == (__u32) -1) { 3057 etm->per_thread_decoding = true; 3058 if (auxtrace_event->tid != sample->tid) 3059 return 1; 3060 } else if (auxtrace_event->cpu != sample->cpu) { 3061 if (etm->per_thread_decoding) { 3062 /* 3063 * Found a per-cpu buffer after a per-thread one was 3064 * already found 3065 */ 3066 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); 3067 return -EINVAL; 3068 } 3069 return 1; 3070 } 3071 3072 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { 3073 /* 3074 * Clamp size in snapshot mode. The buffer size is clamped in 3075 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect 3076 * the buffer size. 3077 */ 3078 aux_size = min(aux_event->aux_size, auxtrace_event->size); 3079 3080 /* 3081 * In this mode, the head also points to the end of the buffer so aux_offset 3082 * needs to have the size subtracted so it points to the beginning as in normal mode 3083 */ 3084 aux_offset = aux_event->aux_offset - aux_size; 3085 } else { 3086 aux_size = aux_event->aux_size; 3087 aux_offset = aux_event->aux_offset; 3088 } 3089 3090 if (aux_offset >= auxtrace_event->offset && 3091 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { 3092 struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv; 3093 3094 /* 3095 * If this AUX event was inside this buffer somewhere, create a new auxtrace event 3096 * based on the sizes of the aux event, and queue that fragment. 3097 */ 3098 auxtrace_fragment.auxtrace = *auxtrace_event; 3099 auxtrace_fragment.auxtrace.size = aux_size; 3100 auxtrace_fragment.auxtrace.offset = aux_offset; 3101 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; 3102 3103 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 3104 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); 3105 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, 3106 file_offset, NULL); 3107 if (err) 3108 return err; 3109 3110 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? 3111 UNFORMATTED : FORMATTED; 3112 if (etmq->format != UNSET && format != etmq->format) { 3113 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); 3114 return -EINVAL; 3115 } 3116 etmq->format = format; 3117 return 0; 3118 } 3119 3120 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ 3121 return 1; 3122 } 3123 3124 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, 3125 u64 offset __maybe_unused, void *data __maybe_unused) 3126 { 3127 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ 3128 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { 3129 (*(int *)data)++; /* increment found count */ 3130 return cs_etm__process_aux_output_hw_id(session, event); 3131 } 3132 return 0; 3133 } 3134 3135 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, 3136 u64 offset __maybe_unused, void *data __maybe_unused) 3137 { 3138 struct perf_sample sample; 3139 int ret; 3140 struct auxtrace_index_entry *ent; 3141 struct auxtrace_index *auxtrace_index; 3142 struct evsel *evsel; 3143 size_t i; 3144 3145 /* Don't care about any other events, we're only queuing buffers for AUX events */ 3146 if (event->header.type != PERF_RECORD_AUX) 3147 return 0; 3148 3149 if (event->header.size < sizeof(struct perf_record_aux)) 3150 return -EINVAL; 3151 3152 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ 3153 if (!event->aux.aux_size) 3154 return 0; 3155 3156 /* 3157 * Parse the sample, we need the sample_id_all data that comes after the event so that the 3158 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. 3159 */ 3160 evsel = evlist__event2evsel(session->evlist, event); 3161 if (!evsel) 3162 return -EINVAL; 3163 perf_sample__init(&sample, /*all=*/false); 3164 ret = evsel__parse_sample(evsel, event, &sample); 3165 if (ret) 3166 goto out; 3167 3168 /* 3169 * Loop through the auxtrace index to find the buffer that matches up with this aux event. 3170 */ 3171 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { 3172 for (i = 0; i < auxtrace_index->nr; i++) { 3173 ent = &auxtrace_index->entries[i]; 3174 ret = cs_etm__queue_aux_fragment(session, ent->file_offset, 3175 ent->sz, &event->aux, &sample); 3176 /* 3177 * Stop search on error or successful values. Continue search on 3178 * 1 ('not found') 3179 */ 3180 if (ret != 1) 3181 goto out; 3182 } 3183 } 3184 3185 /* 3186 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but 3187 * don't exit with an error because it will still be possible to decode other aux records. 3188 */ 3189 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 3190 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); 3191 ret = 0; 3192 out: 3193 perf_sample__exit(&sample); 3194 return ret; 3195 } 3196 3197 static int cs_etm__queue_aux_records(struct perf_session *session) 3198 { 3199 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, 3200 struct auxtrace_index, list); 3201 if (index && index->nr > 0) 3202 return perf_session__peek_events(session, session->header.data_offset, 3203 session->header.data_size, 3204 cs_etm__queue_aux_records_cb, NULL); 3205 3206 /* 3207 * We would get here if there are no entries in the index (either no auxtrace 3208 * buffers or no index at all). Fail silently as there is the possibility of 3209 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still 3210 * false. 3211 * 3212 * In that scenario, buffers will not be split by AUX records. 3213 */ 3214 return 0; 3215 } 3216 3217 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ 3218 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) 3219 3220 /* 3221 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual 3222 * timestamps). 3223 */ 3224 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) 3225 { 3226 int j; 3227 3228 for (j = 0; j < num_cpu; j++) { 3229 switch (metadata[j][CS_ETM_MAGIC]) { 3230 case __perf_cs_etmv4_magic: 3231 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) 3232 return false; 3233 break; 3234 case __perf_cs_ete_magic: 3235 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) 3236 return false; 3237 break; 3238 default: 3239 /* Unknown / unsupported magic number. */ 3240 return false; 3241 } 3242 } 3243 return true; 3244 } 3245 3246 /* map trace ids to correct metadata block, from information in metadata */ 3247 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, 3248 u64 **metadata) 3249 { 3250 u64 cs_etm_magic; 3251 u8 trace_chan_id; 3252 int i, err; 3253 3254 for (i = 0; i < num_cpu; i++) { 3255 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3256 switch (cs_etm_magic) { 3257 case __perf_cs_etmv3_magic: 3258 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3259 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); 3260 break; 3261 case __perf_cs_etmv4_magic: 3262 case __perf_cs_ete_magic: 3263 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3264 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); 3265 break; 3266 default: 3267 /* unknown magic number */ 3268 return -EINVAL; 3269 } 3270 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); 3271 if (err) 3272 return err; 3273 } 3274 return 0; 3275 } 3276 3277 /* 3278 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX 3279 * (formatted or not) packets to create the decoders. 3280 */ 3281 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) 3282 { 3283 struct cs_etm_decoder_params d_params; 3284 struct cs_etm_trace_params *t_params; 3285 int decoders = intlist__nr_entries(etmq->traceid_list); 3286 3287 if (decoders == 0) 3288 return 0; 3289 3290 /* 3291 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 3292 * needed. 3293 */ 3294 if (etmq->format == UNFORMATTED) 3295 assert(decoders == 1); 3296 3297 /* Use metadata to fill in trace parameters for trace decoder */ 3298 t_params = zalloc(sizeof(*t_params) * decoders); 3299 3300 if (!t_params) 3301 goto out_free; 3302 3303 if (cs_etm__init_trace_params(t_params, etmq)) 3304 goto out_free; 3305 3306 /* Set decoder parameters to decode trace packets */ 3307 if (cs_etm__init_decoder_params(&d_params, etmq, 3308 dump_trace ? CS_ETM_OPERATION_PRINT : 3309 CS_ETM_OPERATION_DECODE)) 3310 goto out_free; 3311 3312 etmq->decoder = cs_etm_decoder__new(decoders, &d_params, 3313 t_params); 3314 3315 if (!etmq->decoder) 3316 goto out_free; 3317 3318 /* 3319 * Register a function to handle all memory accesses required by 3320 * the trace decoder library. 3321 */ 3322 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 3323 0x0L, ((u64) -1L), 3324 cs_etm__mem_access)) 3325 goto out_free_decoder; 3326 3327 zfree(&t_params); 3328 return 0; 3329 3330 out_free_decoder: 3331 cs_etm_decoder__free(etmq->decoder); 3332 out_free: 3333 zfree(&t_params); 3334 return -EINVAL; 3335 } 3336 3337 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) 3338 { 3339 struct auxtrace_queues *queues = &etm->queues; 3340 3341 for (unsigned int i = 0; i < queues->nr_queues; i++) { 3342 bool empty = list_empty(&queues->queue_array[i].head); 3343 struct cs_etm_queue *etmq = queues->queue_array[i].priv; 3344 int ret; 3345 3346 /* 3347 * Don't create decoders for empty queues, mainly because 3348 * etmq->format is unknown for empty queues. 3349 */ 3350 assert(empty || etmq->format != UNSET); 3351 if (empty) 3352 continue; 3353 3354 ret = cs_etm__create_queue_decoders(etmq); 3355 if (ret) 3356 return ret; 3357 } 3358 return 0; 3359 } 3360 3361 int cs_etm__process_auxtrace_info_full(union perf_event *event, 3362 struct perf_session *session) 3363 { 3364 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3365 struct cs_etm_auxtrace *etm = NULL; 3366 struct perf_record_time_conv *tc = &session->time_conv; 3367 int event_header_size = sizeof(struct perf_event_header); 3368 int total_size = auxtrace_info->header.size; 3369 int priv_size = 0; 3370 int num_cpu, max_cpu = 0; 3371 int err = 0; 3372 int aux_hw_id_found; 3373 int i; 3374 u64 *ptr = NULL; 3375 u64 **metadata = NULL; 3376 3377 /* First the global part */ 3378 ptr = (u64 *) auxtrace_info->priv; 3379 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; 3380 metadata = zalloc(sizeof(*metadata) * num_cpu); 3381 if (!metadata) 3382 return -ENOMEM; 3383 3384 /* Start parsing after the common part of the header */ 3385 i = CS_HEADER_VERSION_MAX; 3386 3387 /* 3388 * The metadata is stored in the auxtrace_info section and encodes 3389 * the configuration of the ARM embedded trace macrocell which is 3390 * required by the trace decoder to properly decode the trace due 3391 * to its highly compressed nature. 3392 */ 3393 for (int j = 0; j < num_cpu; j++) { 3394 if (ptr[i] == __perf_cs_etmv3_magic) { 3395 metadata[j] = 3396 cs_etm__create_meta_blk(ptr, &i, 3397 CS_ETM_PRIV_MAX, 3398 CS_ETM_NR_TRC_PARAMS_V0); 3399 } else if (ptr[i] == __perf_cs_etmv4_magic) { 3400 metadata[j] = 3401 cs_etm__create_meta_blk(ptr, &i, 3402 CS_ETMV4_PRIV_MAX, 3403 CS_ETMV4_NR_TRC_PARAMS_V0); 3404 } else if (ptr[i] == __perf_cs_ete_magic) { 3405 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); 3406 } else { 3407 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", 3408 ptr[i]); 3409 err = -EINVAL; 3410 goto err_free_metadata; 3411 } 3412 3413 if (!metadata[j]) { 3414 err = -ENOMEM; 3415 goto err_free_metadata; 3416 } 3417 3418 if ((int) metadata[j][CS_ETM_CPU] > max_cpu) 3419 max_cpu = metadata[j][CS_ETM_CPU]; 3420 } 3421 3422 /* 3423 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 3424 * CS_ETMV4_PRIV_MAX mark how many double words are in the 3425 * global metadata, and each cpu's metadata respectively. 3426 * The following tests if the correct number of double words was 3427 * present in the auxtrace info section. 3428 */ 3429 priv_size = total_size - event_header_size - INFO_HEADER_SIZE; 3430 if (i * 8 != priv_size) { 3431 err = -EINVAL; 3432 goto err_free_metadata; 3433 } 3434 3435 etm = zalloc(sizeof(*etm)); 3436 3437 if (!etm) { 3438 err = -ENOMEM; 3439 goto err_free_metadata; 3440 } 3441 3442 /* 3443 * As all the ETMs run at the same exception level, the system should 3444 * have the same PID format crossing CPUs. So cache the PID format 3445 * and reuse it for sequential decoding. 3446 */ 3447 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); 3448 3449 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); 3450 if (err) 3451 goto err_free_etm; 3452 3453 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { 3454 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); 3455 if (err) 3456 goto err_free_queues; 3457 } 3458 3459 if (session->itrace_synth_opts->set) { 3460 etm->synth_opts = *session->itrace_synth_opts; 3461 } else { 3462 itrace_synth_opts__set_default(&etm->synth_opts, 3463 session->itrace_synth_opts->default_no_sample); 3464 etm->synth_opts.callchain = false; 3465 } 3466 3467 etm->session = session; 3468 3469 etm->num_cpu = num_cpu; 3470 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); 3471 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); 3472 etm->metadata = metadata; 3473 etm->auxtrace_type = auxtrace_info->type; 3474 3475 if (etm->synth_opts.use_timestamp) 3476 /* 3477 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3478 * therefore the decoder cannot know if the timestamp trace is 3479 * same with the kernel time. 3480 * 3481 * If a user has knowledge for the working platform and can 3482 * specify itrace option 'T' to tell decoder to forcely use the 3483 * traced timestamp as the kernel time. 3484 */ 3485 etm->has_virtual_ts = true; 3486 else 3487 /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3488 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3489 3490 if (!etm->has_virtual_ts) 3491 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3492 "The time field of the samples will not be set accurately.\n" 3493 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3494 "you can specify the itrace option 'T' for timestamp decoding\n" 3495 "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3496 3497 etm->auxtrace.process_event = cs_etm__process_event; 3498 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 3499 etm->auxtrace.flush_events = cs_etm__flush_events; 3500 etm->auxtrace.free_events = cs_etm__free_events; 3501 etm->auxtrace.free = cs_etm__free; 3502 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; 3503 session->auxtrace = &etm->auxtrace; 3504 3505 err = cs_etm__setup_timeless_decoding(etm); 3506 if (err) 3507 return err; 3508 3509 etm->tc.time_shift = tc->time_shift; 3510 etm->tc.time_mult = tc->time_mult; 3511 etm->tc.time_zero = tc->time_zero; 3512 if (event_contains(*tc, time_cycles)) { 3513 etm->tc.time_cycles = tc->time_cycles; 3514 etm->tc.time_mask = tc->time_mask; 3515 etm->tc.cap_user_time_zero = tc->cap_user_time_zero; 3516 etm->tc.cap_user_time_short = tc->cap_user_time_short; 3517 } 3518 err = cs_etm__synth_events(etm, session); 3519 if (err) 3520 goto err_free_queues; 3521 3522 err = cs_etm__queue_aux_records(session); 3523 if (err) 3524 goto err_free_queues; 3525 3526 /* 3527 * Map Trace ID values to CPU metadata. 3528 * 3529 * Trace metadata will always contain Trace ID values from the legacy algorithm 3530 * in case it's read by a version of Perf that doesn't know about HW_ID packets 3531 * or the kernel doesn't emit them. 3532 * 3533 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3534 * the same IDs as the old algorithm as far as is possible, unless there are clashes 3535 * in which case a different value will be used. This means an older perf may still 3536 * be able to record and read files generate on a newer system. 3537 * 3538 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3539 * those packets. If they are there then the values will be mapped and plugged into 3540 * the metadata and decoders are only created for each mapping received. 3541 * 3542 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3543 * then we map Trace ID values to CPU directly from the metadata and create decoders 3544 * for all mappings. 3545 */ 3546 3547 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ 3548 aux_hw_id_found = 0; 3549 err = perf_session__peek_events(session, session->header.data_offset, 3550 session->header.data_size, 3551 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); 3552 if (err) 3553 goto err_free_queues; 3554 3555 /* if no HW ID found this is a file with metadata values only, map from metadata */ 3556 if (!aux_hw_id_found) { 3557 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); 3558 if (err) 3559 goto err_free_queues; 3560 } 3561 3562 err = cs_etm__create_decoders(etm); 3563 if (err) 3564 goto err_free_queues; 3565 3566 etm->data_queued = etm->queues.populated; 3567 return 0; 3568 3569 err_free_queues: 3570 auxtrace_queues__free(&etm->queues); 3571 session->auxtrace = NULL; 3572 err_free_etm: 3573 zfree(&etm); 3574 err_free_metadata: 3575 /* No need to check @metadata[j], free(NULL) is supported */ 3576 for (int j = 0; j < num_cpu; j++) 3577 zfree(&metadata[j]); 3578 zfree(&metadata); 3579 return err; 3580 } 3581