1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/bitfield.h> 11 #include <linux/bitops.h> 12 #include <linux/coresight-pmu.h> 13 #include <linux/err.h> 14 #include <linux/log2.h> 15 #include <linux/types.h> 16 #include <linux/zalloc.h> 17 18 #include <stdlib.h> 19 20 #include "auxtrace.h" 21 #include "color.h" 22 #include "cs-etm.h" 23 #include "cs-etm-decoder/cs-etm-decoder.h" 24 #include "debug.h" 25 #include "dso.h" 26 #include "evlist.h" 27 #include "intlist.h" 28 #include "machine.h" 29 #include "map.h" 30 #include "perf.h" 31 #include "session.h" 32 #include "map_symbol.h" 33 #include "branch.h" 34 #include "symbol.h" 35 #include "tool.h" 36 #include "thread.h" 37 #include "thread-stack.h" 38 #include "tsc.h" 39 #include <tools/libc_compat.h> 40 #include "util/synthetic-events.h" 41 #include "util/util.h" 42 43 struct cs_etm_auxtrace { 44 struct auxtrace auxtrace; 45 struct auxtrace_queues queues; 46 struct auxtrace_heap heap; 47 struct itrace_synth_opts synth_opts; 48 struct perf_session *session; 49 struct perf_tsc_conversion tc; 50 51 /* 52 * Timeless has no timestamps in the trace so overlapping mmap lookups 53 * are less accurate but produces smaller trace data. We use context IDs 54 * in the trace instead of matching timestamps with fork records so 55 * they're not really needed in the general case. Overlapping mmaps 56 * happen in cases like between a fork and an exec. 57 */ 58 bool timeless_decoding; 59 60 /* 61 * Per-thread ignores the trace channel ID and instead assumes that 62 * everything in a buffer comes from the same process regardless of 63 * which CPU it ran on. It also implies no context IDs so the TID is 64 * taken from the auxtrace buffer. 65 */ 66 bool per_thread_decoding; 67 bool snapshot_mode; 68 bool data_queued; 69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ 70 71 int num_cpu; 72 u64 latest_kernel_timestamp; 73 u32 auxtrace_type; 74 u64 branches_sample_type; 75 u64 branches_id; 76 u64 instructions_sample_type; 77 u64 instructions_sample_period; 78 u64 instructions_id; 79 u64 **metadata; 80 unsigned int pmu_type; 81 enum cs_etm_pid_fmt pid_fmt; 82 }; 83 84 struct cs_etm_traceid_queue { 85 u8 trace_chan_id; 86 u64 period_instructions; 87 size_t last_branch_pos; 88 union perf_event *event_buf; 89 struct thread *thread; 90 struct thread *prev_packet_thread; 91 ocsd_ex_level prev_packet_el; 92 ocsd_ex_level el; 93 struct branch_stack *last_branch; 94 struct branch_stack *last_branch_rb; 95 struct cs_etm_packet *prev_packet; 96 struct cs_etm_packet *packet; 97 struct cs_etm_packet_queue packet_queue; 98 }; 99 100 enum cs_etm_format { 101 UNSET, 102 FORMATTED, 103 UNFORMATTED 104 }; 105 106 struct cs_etm_queue { 107 struct cs_etm_auxtrace *etm; 108 struct cs_etm_decoder *decoder; 109 struct auxtrace_buffer *buffer; 110 unsigned int queue_nr; 111 u8 pending_timestamp_chan_id; 112 enum cs_etm_format format; 113 u64 offset; 114 const unsigned char *buf; 115 size_t buf_len, buf_used; 116 /* Conversion between traceID and index in traceid_queues array */ 117 struct intlist *traceid_queues_list; 118 struct cs_etm_traceid_queue **traceid_queues; 119 /* Conversion between traceID and metadata pointers */ 120 struct intlist *traceid_list; 121 /* 122 * Same as traceid_list, but traceid_list may be a reference to another 123 * queue's which has a matching sink ID. 124 */ 125 struct intlist *own_traceid_list; 126 u32 sink_id; 127 }; 128 129 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); 130 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 131 pid_t tid); 132 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 133 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 134 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); 135 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); 136 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); 137 138 /* PTMs ETMIDR [11:8] set to b0011 */ 139 #define ETMIDR_PTM_VERSION 0x00000300 140 141 /* 142 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 143 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 144 * encode the etm queue number as the upper 16 bit and the channel as 145 * the lower 16 bit. 146 */ 147 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ 148 (queue_nr << 16 | trace_chan_id) 149 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 150 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 151 #define SINK_UNSET ((u32) -1) 152 153 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 154 { 155 etmidr &= ETMIDR_PTM_VERSION; 156 157 if (etmidr == ETMIDR_PTM_VERSION) 158 return CS_ETM_PROTO_PTM; 159 160 return CS_ETM_PROTO_ETMV3; 161 } 162 163 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) 164 { 165 struct int_node *inode; 166 u64 *metadata; 167 168 inode = intlist__find(etmq->traceid_list, trace_chan_id); 169 if (!inode) 170 return -EINVAL; 171 172 metadata = inode->priv; 173 *magic = metadata[CS_ETM_MAGIC]; 174 return 0; 175 } 176 177 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) 178 { 179 struct int_node *inode; 180 u64 *metadata; 181 182 inode = intlist__find(etmq->traceid_list, trace_chan_id); 183 if (!inode) 184 return -EINVAL; 185 186 metadata = inode->priv; 187 *cpu = (int)metadata[CS_ETM_CPU]; 188 return 0; 189 } 190 191 /* 192 * The returned PID format is presented as an enum: 193 * 194 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. 195 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. 196 * CS_ETM_PIDFMT_NONE: No context IDs 197 * 198 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 199 * are enabled at the same time when the session runs on an EL2 kernel. 200 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 201 * recorded in the trace data, the tool will selectively use 202 * CONTEXTIDR_EL2 as PID. 203 * 204 * The result is cached in etm->pid_fmt so this function only needs to be called 205 * when processing the aux info. 206 */ 207 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) 208 { 209 u64 val; 210 211 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 212 val = metadata[CS_ETM_ETMCR]; 213 /* CONTEXTIDR is traced */ 214 if (val & BIT(ETM_OPT_CTXTID)) 215 return CS_ETM_PIDFMT_CTXTID; 216 } else { 217 val = metadata[CS_ETMV4_TRCCONFIGR]; 218 /* CONTEXTIDR_EL2 is traced */ 219 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) 220 return CS_ETM_PIDFMT_CTXTID2; 221 /* CONTEXTIDR_EL1 is traced */ 222 else if (val & BIT(ETM4_CFG_BIT_CTXTID)) 223 return CS_ETM_PIDFMT_CTXTID; 224 } 225 226 return CS_ETM_PIDFMT_NONE; 227 } 228 229 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) 230 { 231 return etmq->etm->pid_fmt; 232 } 233 234 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, 235 u8 trace_chan_id, u64 *cpu_metadata) 236 { 237 /* Get an RB node for this CPU */ 238 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); 239 240 /* Something went wrong, no need to continue */ 241 if (!inode) 242 return -ENOMEM; 243 244 /* Disallow re-mapping a different traceID to metadata pair. */ 245 if (inode->priv) { 246 u64 *curr_cpu_data = inode->priv; 247 u8 curr_chan_id; 248 int err; 249 250 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { 251 /* 252 * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs 253 * are expected (but not supported) in per-thread mode, 254 * rather than signifying an error. 255 */ 256 if (etmq->etm->per_thread_decoding) 257 pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); 258 else 259 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); 260 261 return -EINVAL; 262 } 263 264 /* check that the mapped ID matches */ 265 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); 266 if (err) 267 return err; 268 269 if (curr_chan_id != trace_chan_id) { 270 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); 271 return -EINVAL; 272 } 273 274 /* Skip re-adding the same mappings if everything matched */ 275 return 0; 276 } 277 278 /* Not one we've seen before, associate the traceID with the metadata pointer */ 279 inode->priv = cpu_metadata; 280 281 return 0; 282 } 283 284 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) 285 { 286 if (etm->per_thread_decoding) 287 return etm->queues.queue_array[0].priv; 288 else 289 return etm->queues.queue_array[cpu].priv; 290 } 291 292 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, 293 u64 *cpu_metadata) 294 { 295 struct cs_etm_queue *etmq; 296 297 /* 298 * If the queue is unformatted then only save one mapping in the 299 * queue associated with that CPU so only one decoder is made. 300 */ 301 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); 302 if (etmq->format == UNFORMATTED) 303 return cs_etm__insert_trace_id_node(etmq, trace_chan_id, 304 cpu_metadata); 305 306 /* 307 * Otherwise, version 0 trace IDs are global so save them into every 308 * queue. 309 */ 310 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 311 int ret; 312 313 etmq = etm->queues.queue_array[i].priv; 314 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, 315 cpu_metadata); 316 if (ret) 317 return ret; 318 } 319 320 return 0; 321 } 322 323 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, 324 u64 hw_id) 325 { 326 int err; 327 u64 *cpu_data; 328 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 329 330 cpu_data = get_cpu_data(etm, cpu); 331 if (cpu_data == NULL) 332 return -EINVAL; 333 334 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); 335 if (err) 336 return err; 337 338 /* 339 * if we are picking up the association from the packet, need to plug 340 * the correct trace ID into the metadata for setting up decoders later. 341 */ 342 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); 343 } 344 345 static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, 346 u64 hw_id) 347 { 348 struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); 349 int ret; 350 u64 *cpu_data; 351 u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); 352 u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 353 354 /* 355 * Check sink id hasn't changed in per-cpu mode. In per-thread mode, 356 * let it pass for now until an actual overlapping trace ID is hit. In 357 * most cases IDs won't overlap even if the sink changes. 358 */ 359 if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && 360 etmq->sink_id != sink_id) { 361 pr_err("CS_ETM: mismatch between sink IDs\n"); 362 return -EINVAL; 363 } 364 365 etmq->sink_id = sink_id; 366 367 /* Find which other queues use this sink and link their ID maps */ 368 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 369 struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; 370 371 /* Different sinks, skip */ 372 if (other_etmq->sink_id != etmq->sink_id) 373 continue; 374 375 /* Already linked, skip */ 376 if (other_etmq->traceid_list == etmq->traceid_list) 377 continue; 378 379 /* At the point of first linking, this one should be empty */ 380 if (!intlist__empty(etmq->traceid_list)) { 381 pr_err("CS_ETM: Can't link populated trace ID lists\n"); 382 return -EINVAL; 383 } 384 385 etmq->own_traceid_list = NULL; 386 intlist__delete(etmq->traceid_list); 387 etmq->traceid_list = other_etmq->traceid_list; 388 break; 389 } 390 391 cpu_data = get_cpu_data(etm, cpu); 392 ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); 393 if (ret) 394 return ret; 395 396 ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); 397 if (ret) 398 return ret; 399 400 return 0; 401 } 402 403 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) 404 { 405 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 406 407 switch (cs_etm_magic) { 408 case __perf_cs_etmv3_magic: 409 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & 410 CORESIGHT_TRACE_ID_VAL_MASK); 411 break; 412 case __perf_cs_etmv4_magic: 413 case __perf_cs_ete_magic: 414 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & 415 CORESIGHT_TRACE_ID_VAL_MASK); 416 break; 417 default: 418 return -EINVAL; 419 } 420 return 0; 421 } 422 423 /* 424 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 425 */ 426 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 427 { 428 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 429 430 switch (cs_etm_magic) { 431 case __perf_cs_etmv3_magic: 432 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; 433 break; 434 case __perf_cs_etmv4_magic: 435 case __perf_cs_ete_magic: 436 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; 437 break; 438 439 default: 440 return -EINVAL; 441 } 442 return 0; 443 } 444 445 /* 446 * Get a metadata index for a specific cpu from an array. 447 * 448 */ 449 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) 450 { 451 int i; 452 453 for (i = 0; i < etm->num_cpu; i++) { 454 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { 455 return i; 456 } 457 } 458 459 return -1; 460 } 461 462 /* 463 * Get a metadata for a specific cpu from an array. 464 * 465 */ 466 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) 467 { 468 int idx = get_cpu_data_idx(etm, cpu); 469 470 return (idx != -1) ? etm->metadata[idx] : NULL; 471 } 472 473 /* 474 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. 475 * 476 * The payload associates the Trace ID and the CPU. 477 * The routine is tolerant of seeing multiple packets with the same association, 478 * but a CPU / Trace ID association changing during a session is an error. 479 */ 480 static int cs_etm__process_aux_output_hw_id(struct perf_session *session, 481 union perf_event *event) 482 { 483 struct cs_etm_auxtrace *etm; 484 struct perf_sample sample; 485 struct evsel *evsel; 486 u64 hw_id; 487 int cpu, version, err; 488 489 /* extract and parse the HW ID */ 490 hw_id = event->aux_output_hw_id.hw_id; 491 version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); 492 493 /* check that we can handle this version */ 494 if (version > CS_AUX_HW_ID_MAJOR_VERSION) { 495 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", 496 version); 497 return -EINVAL; 498 } 499 500 /* get access to the etm metadata */ 501 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); 502 if (!etm || !etm->metadata) 503 return -EINVAL; 504 505 /* parse the sample to get the CPU */ 506 evsel = evlist__event2evsel(session->evlist, event); 507 if (!evsel) 508 return -EINVAL; 509 err = evsel__parse_sample(evsel, event, &sample); 510 if (err) 511 return err; 512 cpu = sample.cpu; 513 if (cpu == -1) { 514 /* no CPU in the sample - possibly recorded with an old version of perf */ 515 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); 516 return -EINVAL; 517 } 518 519 if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) 520 return cs_etm__process_trace_id_v0(etm, cpu, hw_id); 521 522 return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); 523 } 524 525 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 526 u8 trace_chan_id) 527 { 528 /* 529 * When a timestamp packet is encountered the backend code 530 * is stopped so that the front end has time to process packets 531 * that were accumulated in the traceID queue. Since there can 532 * be more than one channel per cs_etm_queue, we need to specify 533 * what traceID queue needs servicing. 534 */ 535 etmq->pending_timestamp_chan_id = trace_chan_id; 536 } 537 538 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 539 u8 *trace_chan_id) 540 { 541 struct cs_etm_packet_queue *packet_queue; 542 543 if (!etmq->pending_timestamp_chan_id) 544 return 0; 545 546 if (trace_chan_id) 547 *trace_chan_id = etmq->pending_timestamp_chan_id; 548 549 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 550 etmq->pending_timestamp_chan_id); 551 if (!packet_queue) 552 return 0; 553 554 /* Acknowledge pending status */ 555 etmq->pending_timestamp_chan_id = 0; 556 557 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 558 return packet_queue->cs_timestamp; 559 } 560 561 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 562 { 563 int i; 564 565 queue->head = 0; 566 queue->tail = 0; 567 queue->packet_count = 0; 568 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 569 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 570 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 571 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 572 queue->packet_buffer[i].instr_count = 0; 573 queue->packet_buffer[i].last_instr_taken_branch = false; 574 queue->packet_buffer[i].last_instr_size = 0; 575 queue->packet_buffer[i].last_instr_type = 0; 576 queue->packet_buffer[i].last_instr_subtype = 0; 577 queue->packet_buffer[i].last_instr_cond = 0; 578 queue->packet_buffer[i].flags = 0; 579 queue->packet_buffer[i].exception_number = UINT32_MAX; 580 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 581 queue->packet_buffer[i].cpu = INT_MIN; 582 } 583 } 584 585 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 586 { 587 int idx; 588 struct int_node *inode; 589 struct cs_etm_traceid_queue *tidq; 590 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 591 592 intlist__for_each_entry(inode, traceid_queues_list) { 593 idx = (int)(intptr_t)inode->priv; 594 tidq = etmq->traceid_queues[idx]; 595 cs_etm__clear_packet_queue(&tidq->packet_queue); 596 } 597 } 598 599 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 600 struct cs_etm_traceid_queue *tidq, 601 u8 trace_chan_id) 602 { 603 int rc = -ENOMEM; 604 struct auxtrace_queue *queue; 605 struct cs_etm_auxtrace *etm = etmq->etm; 606 607 cs_etm__clear_packet_queue(&tidq->packet_queue); 608 609 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 610 tidq->trace_chan_id = trace_chan_id; 611 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; 612 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, 613 queue->tid); 614 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); 615 616 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 617 if (!tidq->packet) 618 goto out; 619 620 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 621 if (!tidq->prev_packet) 622 goto out_free; 623 624 if (etm->synth_opts.last_branch) { 625 size_t sz = sizeof(struct branch_stack); 626 627 sz += etm->synth_opts.last_branch_sz * 628 sizeof(struct branch_entry); 629 tidq->last_branch = zalloc(sz); 630 if (!tidq->last_branch) 631 goto out_free; 632 tidq->last_branch_rb = zalloc(sz); 633 if (!tidq->last_branch_rb) 634 goto out_free; 635 } 636 637 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 638 if (!tidq->event_buf) 639 goto out_free; 640 641 return 0; 642 643 out_free: 644 zfree(&tidq->last_branch_rb); 645 zfree(&tidq->last_branch); 646 zfree(&tidq->prev_packet); 647 zfree(&tidq->packet); 648 out: 649 return rc; 650 } 651 652 static struct cs_etm_traceid_queue 653 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 654 { 655 int idx; 656 struct int_node *inode; 657 struct intlist *traceid_queues_list; 658 struct cs_etm_traceid_queue *tidq, **traceid_queues; 659 struct cs_etm_auxtrace *etm = etmq->etm; 660 661 if (etm->per_thread_decoding) 662 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 663 664 traceid_queues_list = etmq->traceid_queues_list; 665 666 /* 667 * Check if the traceid_queue exist for this traceID by looking 668 * in the queue list. 669 */ 670 inode = intlist__find(traceid_queues_list, trace_chan_id); 671 if (inode) { 672 idx = (int)(intptr_t)inode->priv; 673 return etmq->traceid_queues[idx]; 674 } 675 676 /* We couldn't find a traceid_queue for this traceID, allocate one */ 677 tidq = malloc(sizeof(*tidq)); 678 if (!tidq) 679 return NULL; 680 681 memset(tidq, 0, sizeof(*tidq)); 682 683 /* Get a valid index for the new traceid_queue */ 684 idx = intlist__nr_entries(traceid_queues_list); 685 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 686 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 687 if (!inode) 688 goto out_free; 689 690 /* Associate this traceID with this index */ 691 inode->priv = (void *)(intptr_t)idx; 692 693 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 694 goto out_free; 695 696 /* Grow the traceid_queues array by one unit */ 697 traceid_queues = etmq->traceid_queues; 698 traceid_queues = reallocarray(traceid_queues, 699 idx + 1, 700 sizeof(*traceid_queues)); 701 702 /* 703 * On failure reallocarray() returns NULL and the original block of 704 * memory is left untouched. 705 */ 706 if (!traceid_queues) 707 goto out_free; 708 709 traceid_queues[idx] = tidq; 710 etmq->traceid_queues = traceid_queues; 711 712 return etmq->traceid_queues[idx]; 713 714 out_free: 715 /* 716 * Function intlist__remove() removes the inode from the list 717 * and delete the memory associated to it. 718 */ 719 intlist__remove(traceid_queues_list, inode); 720 free(tidq); 721 722 return NULL; 723 } 724 725 struct cs_etm_packet_queue 726 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 727 { 728 struct cs_etm_traceid_queue *tidq; 729 730 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 731 if (tidq) 732 return &tidq->packet_queue; 733 734 return NULL; 735 } 736 737 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 738 struct cs_etm_traceid_queue *tidq) 739 { 740 struct cs_etm_packet *tmp; 741 742 if (etm->synth_opts.branches || etm->synth_opts.last_branch || 743 etm->synth_opts.instructions) { 744 /* 745 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 746 * the next incoming packet. 747 * 748 * Threads and exception levels are also tracked for both the 749 * previous and current packets. This is because the previous 750 * packet is used for the 'from' IP for branch samples, so the 751 * thread at that time must also be assigned to that sample. 752 * Across discontinuity packets the thread can change, so by 753 * tracking the thread for the previous packet the branch sample 754 * will have the correct info. 755 */ 756 tmp = tidq->packet; 757 tidq->packet = tidq->prev_packet; 758 tidq->prev_packet = tmp; 759 tidq->prev_packet_el = tidq->el; 760 thread__put(tidq->prev_packet_thread); 761 tidq->prev_packet_thread = thread__get(tidq->thread); 762 } 763 } 764 765 static void cs_etm__packet_dump(const char *pkt_string, void *data) 766 { 767 const char *color = PERF_COLOR_BLUE; 768 int len = strlen(pkt_string); 769 struct cs_etm_queue *etmq = data; 770 char queue_nr[64]; 771 772 if (verbose) 773 snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr); 774 else 775 queue_nr[0] = '\0'; 776 777 if (len && (pkt_string[len-1] == '\n')) 778 color_fprintf(stdout, color, " %s%s", queue_nr, pkt_string); 779 else 780 color_fprintf(stdout, color, " %s%s\n", queue_nr, pkt_string); 781 782 fflush(stdout); 783 } 784 785 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 786 u64 *metadata, u32 etmidr) 787 { 788 t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); 789 t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; 790 t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; 791 } 792 793 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 794 u64 *metadata) 795 { 796 t_params->protocol = CS_ETM_PROTO_ETMV4i; 797 t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; 798 t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; 799 t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; 800 t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; 801 t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; 802 t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; 803 } 804 805 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 806 u64 *metadata) 807 { 808 t_params->protocol = CS_ETM_PROTO_ETE; 809 t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; 810 t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; 811 t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; 812 t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; 813 t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; 814 t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; 815 t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; 816 } 817 818 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 819 struct cs_etm_queue *etmq) 820 { 821 struct int_node *inode; 822 823 intlist__for_each_entry(inode, etmq->traceid_list) { 824 u64 *metadata = inode->priv; 825 u64 architecture = metadata[CS_ETM_MAGIC]; 826 u32 etmidr; 827 828 switch (architecture) { 829 case __perf_cs_etmv3_magic: 830 etmidr = metadata[CS_ETM_ETMIDR]; 831 cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); 832 break; 833 case __perf_cs_etmv4_magic: 834 cs_etm__set_trace_param_etmv4(t_params++, metadata); 835 break; 836 case __perf_cs_ete_magic: 837 cs_etm__set_trace_param_ete(t_params++, metadata); 838 break; 839 default: 840 return -EINVAL; 841 } 842 } 843 844 return 0; 845 } 846 847 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 848 struct cs_etm_queue *etmq, 849 enum cs_etm_decoder_operation mode) 850 { 851 int ret = -EINVAL; 852 853 if (!(mode < CS_ETM_OPERATION_MAX)) 854 goto out; 855 856 d_params->packet_printer = cs_etm__packet_dump; 857 d_params->operation = mode; 858 d_params->data = etmq; 859 d_params->formatted = etmq->format == FORMATTED; 860 d_params->fsyncs = false; 861 d_params->hsyncs = false; 862 d_params->frame_aligned = true; 863 864 ret = 0; 865 out: 866 return ret; 867 } 868 869 static void cs_etm__dump_event(struct cs_etm_queue *etmq, 870 struct auxtrace_buffer *buffer) 871 { 872 int ret; 873 const char *color = PERF_COLOR_BLUE; 874 size_t buffer_used = 0; 875 876 fprintf(stdout, "\n"); 877 color_fprintf(stdout, color, 878 ". ... CoreSight %s Trace data: size %#zx bytes\n", 879 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 880 881 do { 882 size_t consumed; 883 884 ret = cs_etm_decoder__process_data_block( 885 etmq->decoder, buffer->offset, 886 &((u8 *)buffer->data)[buffer_used], 887 buffer->size - buffer_used, &consumed); 888 if (ret) 889 break; 890 891 buffer_used += consumed; 892 } while (buffer_used < buffer->size); 893 894 cs_etm_decoder__reset(etmq->decoder); 895 } 896 897 static int cs_etm__flush_events(struct perf_session *session, 898 const struct perf_tool *tool) 899 { 900 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 901 struct cs_etm_auxtrace, 902 auxtrace); 903 if (dump_trace) 904 return 0; 905 906 if (!tool->ordered_events) 907 return -EINVAL; 908 909 if (etm->timeless_decoding) { 910 /* 911 * Pass tid = -1 to process all queues. But likely they will have 912 * already been processed on PERF_RECORD_EXIT anyway. 913 */ 914 return cs_etm__process_timeless_queues(etm, -1); 915 } 916 917 return cs_etm__process_timestamped_queues(etm); 918 } 919 920 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 921 { 922 int idx; 923 uintptr_t priv; 924 struct int_node *inode, *tmp; 925 struct cs_etm_traceid_queue *tidq; 926 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 927 928 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 929 priv = (uintptr_t)inode->priv; 930 idx = priv; 931 932 /* Free this traceid_queue from the array */ 933 tidq = etmq->traceid_queues[idx]; 934 thread__zput(tidq->thread); 935 thread__zput(tidq->prev_packet_thread); 936 zfree(&tidq->event_buf); 937 zfree(&tidq->last_branch); 938 zfree(&tidq->last_branch_rb); 939 zfree(&tidq->prev_packet); 940 zfree(&tidq->packet); 941 zfree(&tidq); 942 943 /* 944 * Function intlist__remove() removes the inode from the list 945 * and delete the memory associated to it. 946 */ 947 intlist__remove(traceid_queues_list, inode); 948 } 949 950 /* Then the RB tree itself */ 951 intlist__delete(traceid_queues_list); 952 etmq->traceid_queues_list = NULL; 953 954 /* finally free the traceid_queues array */ 955 zfree(&etmq->traceid_queues); 956 } 957 958 static void cs_etm__free_queue(void *priv) 959 { 960 struct int_node *inode, *tmp; 961 struct cs_etm_queue *etmq = priv; 962 963 if (!etmq) 964 return; 965 966 cs_etm_decoder__free(etmq->decoder); 967 cs_etm__free_traceid_queues(etmq); 968 969 if (etmq->own_traceid_list) { 970 /* First remove all traceID/metadata nodes for the RB tree */ 971 intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) 972 intlist__remove(etmq->own_traceid_list, inode); 973 974 /* Then the RB tree itself */ 975 intlist__delete(etmq->own_traceid_list); 976 } 977 978 free(etmq); 979 } 980 981 static void cs_etm__free_events(struct perf_session *session) 982 { 983 unsigned int i; 984 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 985 struct cs_etm_auxtrace, 986 auxtrace); 987 struct auxtrace_queues *queues = &aux->queues; 988 989 for (i = 0; i < queues->nr_queues; i++) { 990 cs_etm__free_queue(queues->queue_array[i].priv); 991 queues->queue_array[i].priv = NULL; 992 } 993 994 auxtrace_queues__free(queues); 995 } 996 997 static void cs_etm__free(struct perf_session *session) 998 { 999 int i; 1000 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1001 struct cs_etm_auxtrace, 1002 auxtrace); 1003 cs_etm__free_events(session); 1004 session->auxtrace = NULL; 1005 1006 for (i = 0; i < aux->num_cpu; i++) 1007 zfree(&aux->metadata[i]); 1008 1009 zfree(&aux->metadata); 1010 zfree(&aux); 1011 } 1012 1013 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, 1014 struct evsel *evsel) 1015 { 1016 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 1017 struct cs_etm_auxtrace, 1018 auxtrace); 1019 1020 return evsel->core.attr.type == aux->pmu_type; 1021 } 1022 1023 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, 1024 ocsd_ex_level el) 1025 { 1026 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); 1027 1028 /* 1029 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels 1030 * running at EL1 assume everything is the host. 1031 */ 1032 if (pid_fmt == CS_ETM_PIDFMT_CTXTID) 1033 return &etmq->etm->session->machines.host; 1034 1035 /* 1036 * Not perfect, but otherwise assume anything in EL1 is the default 1037 * guest, and everything else is the host. Distinguishing between guest 1038 * and host userspaces isn't currently supported either. Neither is 1039 * multiple guest support. All this does is reduce the likeliness of 1040 * decode errors where we look into the host kernel maps when it should 1041 * have been the guest maps. 1042 */ 1043 switch (el) { 1044 case ocsd_EL1: 1045 return machines__find_guest(&etmq->etm->session->machines, 1046 DEFAULT_GUEST_KERNEL_ID); 1047 case ocsd_EL3: 1048 case ocsd_EL2: 1049 case ocsd_EL0: 1050 case ocsd_EL_unknown: 1051 default: 1052 return &etmq->etm->session->machines.host; 1053 } 1054 } 1055 1056 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, 1057 ocsd_ex_level el) 1058 { 1059 struct machine *machine = cs_etm__get_machine(etmq, el); 1060 1061 if (address >= machine__kernel_start(machine)) { 1062 if (machine__is_host(machine)) 1063 return PERF_RECORD_MISC_KERNEL; 1064 else 1065 return PERF_RECORD_MISC_GUEST_KERNEL; 1066 } else { 1067 if (machine__is_host(machine)) 1068 return PERF_RECORD_MISC_USER; 1069 else { 1070 /* 1071 * Can't really happen at the moment because 1072 * cs_etm__get_machine() will always return 1073 * machines.host for any non EL1 trace. 1074 */ 1075 return PERF_RECORD_MISC_GUEST_USER; 1076 } 1077 } 1078 } 1079 1080 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 1081 u64 address, size_t size, u8 *buffer, 1082 const ocsd_mem_space_acc_t mem_space) 1083 { 1084 u8 cpumode; 1085 u64 offset; 1086 int len; 1087 struct addr_location al; 1088 struct dso *dso; 1089 struct cs_etm_traceid_queue *tidq; 1090 int ret = 0; 1091 1092 if (!etmq) 1093 return 0; 1094 1095 addr_location__init(&al); 1096 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1097 if (!tidq) 1098 goto out; 1099 1100 /* 1101 * We've already tracked EL along side the PID in cs_etm__set_thread() 1102 * so double check that it matches what OpenCSD thinks as well. It 1103 * doesn't distinguish between EL0 and EL1 for this mem access callback 1104 * so we had to do the extra tracking. Skip validation if it's any of 1105 * the 'any' values. 1106 */ 1107 if (!(mem_space == OCSD_MEM_SPACE_ANY || 1108 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { 1109 if (mem_space & OCSD_MEM_SPACE_EL1N) { 1110 /* Includes both non secure EL1 and EL0 */ 1111 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); 1112 } else if (mem_space & OCSD_MEM_SPACE_EL2) 1113 assert(tidq->el == ocsd_EL2); 1114 else if (mem_space & OCSD_MEM_SPACE_EL3) 1115 assert(tidq->el == ocsd_EL3); 1116 } 1117 1118 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); 1119 1120 if (!thread__find_map(tidq->thread, cpumode, address, &al)) 1121 goto out; 1122 1123 dso = map__dso(al.map); 1124 if (!dso) 1125 goto out; 1126 1127 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && 1128 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) 1129 goto out; 1130 1131 offset = map__map_ip(al.map, address); 1132 1133 map__load(al.map); 1134 1135 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), 1136 offset, buffer, size); 1137 1138 if (len <= 0) { 1139 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" 1140 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); 1141 if (!dso__auxtrace_warned(dso)) { 1142 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", 1143 address, 1144 dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); 1145 dso__set_auxtrace_warned(dso); 1146 } 1147 goto out; 1148 } 1149 ret = len; 1150 out: 1151 addr_location__exit(&al); 1152 return ret; 1153 } 1154 1155 static struct cs_etm_queue *cs_etm__alloc_queue(void) 1156 { 1157 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); 1158 if (!etmq) 1159 return NULL; 1160 1161 etmq->traceid_queues_list = intlist__new(NULL); 1162 if (!etmq->traceid_queues_list) 1163 goto out_free; 1164 1165 /* 1166 * Create an RB tree for traceID-metadata tuple. Since the conversion 1167 * has to be made for each packet that gets decoded, optimizing access 1168 * in anything other than a sequential array is worth doing. 1169 */ 1170 etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); 1171 if (!etmq->traceid_list) 1172 goto out_free; 1173 1174 return etmq; 1175 1176 out_free: 1177 intlist__delete(etmq->traceid_queues_list); 1178 free(etmq); 1179 1180 return NULL; 1181 } 1182 1183 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 1184 struct auxtrace_queue *queue, 1185 unsigned int queue_nr) 1186 { 1187 struct cs_etm_queue *etmq = queue->priv; 1188 1189 if (etmq) 1190 return 0; 1191 1192 etmq = cs_etm__alloc_queue(); 1193 1194 if (!etmq) 1195 return -ENOMEM; 1196 1197 queue->priv = etmq; 1198 etmq->etm = etm; 1199 etmq->queue_nr = queue_nr; 1200 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ 1201 etmq->offset = 0; 1202 etmq->sink_id = SINK_UNSET; 1203 1204 return 0; 1205 } 1206 1207 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, 1208 struct cs_etm_queue *etmq, 1209 unsigned int queue_nr) 1210 { 1211 int ret = 0; 1212 unsigned int cs_queue_nr; 1213 u8 trace_chan_id; 1214 u64 cs_timestamp; 1215 1216 /* 1217 * We are under a CPU-wide trace scenario. As such we need to know 1218 * when the code that generated the traces started to execute so that 1219 * it can be correlated with execution on other CPUs. So we get a 1220 * handle on the beginning of traces and decode until we find a 1221 * timestamp. The timestamp is then added to the auxtrace min heap 1222 * in order to know what nibble (of all the etmqs) to decode first. 1223 */ 1224 while (1) { 1225 /* 1226 * Fetch an aux_buffer from this etmq. Bail if no more 1227 * blocks or an error has been encountered. 1228 */ 1229 ret = cs_etm__get_data_block(etmq); 1230 if (ret <= 0) 1231 goto out; 1232 1233 /* 1234 * Run decoder on the trace block. The decoder will stop when 1235 * encountering a CS timestamp, a full packet queue or the end of 1236 * trace for that block. 1237 */ 1238 ret = cs_etm__decode_data_block(etmq); 1239 if (ret) 1240 goto out; 1241 1242 /* 1243 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 1244 * the timestamp calculation for us. 1245 */ 1246 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1247 1248 /* We found a timestamp, no need to continue. */ 1249 if (cs_timestamp) 1250 break; 1251 1252 /* 1253 * We didn't find a timestamp so empty all the traceid packet 1254 * queues before looking for another timestamp packet, either 1255 * in the current data block or a new one. Packets that were 1256 * just decoded are useless since no timestamp has been 1257 * associated with them. As such simply discard them. 1258 */ 1259 cs_etm__clear_all_packet_queues(etmq); 1260 } 1261 1262 /* 1263 * We have a timestamp. Add it to the min heap to reflect when 1264 * instructions conveyed by the range packets of this traceID queue 1265 * started to execute. Once the same has been done for all the traceID 1266 * queues of each etmq, redenring and decoding can start in 1267 * chronological order. 1268 * 1269 * Note that packets decoded above are still in the traceID's packet 1270 * queue and will be processed in cs_etm__process_timestamped_queues(). 1271 */ 1272 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1273 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 1274 out: 1275 return ret; 1276 } 1277 1278 static inline 1279 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 1280 struct cs_etm_traceid_queue *tidq) 1281 { 1282 struct branch_stack *bs_src = tidq->last_branch_rb; 1283 struct branch_stack *bs_dst = tidq->last_branch; 1284 size_t nr = 0; 1285 1286 /* 1287 * Set the number of records before early exit: ->nr is used to 1288 * determine how many branches to copy from ->entries. 1289 */ 1290 bs_dst->nr = bs_src->nr; 1291 1292 /* 1293 * Early exit when there is nothing to copy. 1294 */ 1295 if (!bs_src->nr) 1296 return; 1297 1298 /* 1299 * As bs_src->entries is a circular buffer, we need to copy from it in 1300 * two steps. First, copy the branches from the most recently inserted 1301 * branch ->last_branch_pos until the end of bs_src->entries buffer. 1302 */ 1303 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 1304 memcpy(&bs_dst->entries[0], 1305 &bs_src->entries[tidq->last_branch_pos], 1306 sizeof(struct branch_entry) * nr); 1307 1308 /* 1309 * If we wrapped around at least once, the branches from the beginning 1310 * of the bs_src->entries buffer and until the ->last_branch_pos element 1311 * are older valid branches: copy them over. The total number of 1312 * branches copied over will be equal to the number of branches asked by 1313 * the user in last_branch_sz. 1314 */ 1315 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 1316 memcpy(&bs_dst->entries[nr], 1317 &bs_src->entries[0], 1318 sizeof(struct branch_entry) * tidq->last_branch_pos); 1319 } 1320 } 1321 1322 static inline 1323 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 1324 { 1325 tidq->last_branch_pos = 0; 1326 tidq->last_branch_rb->nr = 0; 1327 } 1328 1329 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 1330 u8 trace_chan_id, u64 addr) 1331 { 1332 u8 instrBytes[2]; 1333 1334 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), 1335 instrBytes, 0); 1336 /* 1337 * T32 instruction size is indicated by bits[15:11] of the first 1338 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 1339 * denote a 32-bit instruction. 1340 */ 1341 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 1342 } 1343 1344 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 1345 { 1346 /* 1347 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't 1348 * appear in samples. 1349 */ 1350 if (packet->sample_type == CS_ETM_DISCONTINUITY || 1351 packet->sample_type == CS_ETM_EXCEPTION) 1352 return 0; 1353 1354 return packet->start_addr; 1355 } 1356 1357 static inline 1358 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 1359 { 1360 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 1361 if (packet->sample_type == CS_ETM_DISCONTINUITY) 1362 return 0; 1363 1364 return packet->end_addr - packet->last_instr_size; 1365 } 1366 1367 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 1368 u64 trace_chan_id, 1369 const struct cs_etm_packet *packet, 1370 u64 offset) 1371 { 1372 if (packet->isa == CS_ETM_ISA_T32) { 1373 u64 addr = packet->start_addr; 1374 1375 while (offset) { 1376 addr += cs_etm__t32_instr_size(etmq, 1377 trace_chan_id, addr); 1378 offset--; 1379 } 1380 return addr; 1381 } 1382 1383 /* Assume a 4 byte instruction size (A32/A64) */ 1384 return packet->start_addr + offset * 4; 1385 } 1386 1387 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 1388 struct cs_etm_traceid_queue *tidq) 1389 { 1390 struct branch_stack *bs = tidq->last_branch_rb; 1391 struct branch_entry *be; 1392 1393 /* 1394 * The branches are recorded in a circular buffer in reverse 1395 * chronological order: we start recording from the last element of the 1396 * buffer down. After writing the first element of the stack, move the 1397 * insert position back to the end of the buffer. 1398 */ 1399 if (!tidq->last_branch_pos) 1400 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 1401 1402 tidq->last_branch_pos -= 1; 1403 1404 be = &bs->entries[tidq->last_branch_pos]; 1405 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 1406 be->to = cs_etm__first_executed_instr(tidq->packet); 1407 /* No support for mispredict */ 1408 be->flags.mispred = 0; 1409 be->flags.predicted = 1; 1410 1411 /* 1412 * Increment bs->nr until reaching the number of last branches asked by 1413 * the user on the command line. 1414 */ 1415 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 1416 bs->nr += 1; 1417 } 1418 1419 static int cs_etm__inject_event(union perf_event *event, 1420 struct perf_sample *sample, u64 type) 1421 { 1422 event->header.size = perf_event__sample_event_size(sample, type, 0); 1423 return perf_event__synthesize_sample(event, type, 0, sample); 1424 } 1425 1426 1427 static int 1428 cs_etm__get_trace(struct cs_etm_queue *etmq) 1429 { 1430 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1431 struct auxtrace_buffer *old_buffer = aux_buffer; 1432 struct auxtrace_queue *queue; 1433 1434 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1435 1436 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1437 1438 /* If no more data, drop the previous auxtrace_buffer and return */ 1439 if (!aux_buffer) { 1440 if (old_buffer) 1441 auxtrace_buffer__drop_data(old_buffer); 1442 etmq->buf_len = 0; 1443 return 0; 1444 } 1445 1446 etmq->buffer = aux_buffer; 1447 1448 /* If the aux_buffer doesn't have data associated, try to load it */ 1449 if (!aux_buffer->data) { 1450 /* get the file desc associated with the perf data file */ 1451 int fd = perf_data__fd(etmq->etm->session->data); 1452 1453 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1454 if (!aux_buffer->data) 1455 return -ENOMEM; 1456 } 1457 1458 /* If valid, drop the previous buffer */ 1459 if (old_buffer) 1460 auxtrace_buffer__drop_data(old_buffer); 1461 1462 etmq->buf_used = 0; 1463 etmq->buf_len = aux_buffer->size; 1464 etmq->buf = aux_buffer->data; 1465 1466 return etmq->buf_len; 1467 } 1468 1469 static void cs_etm__set_thread(struct cs_etm_queue *etmq, 1470 struct cs_etm_traceid_queue *tidq, pid_t tid, 1471 ocsd_ex_level el) 1472 { 1473 struct machine *machine = cs_etm__get_machine(etmq, el); 1474 1475 if (tid != -1) { 1476 thread__zput(tidq->thread); 1477 tidq->thread = machine__find_thread(machine, -1, tid); 1478 } 1479 1480 /* Couldn't find a known thread */ 1481 if (!tidq->thread) 1482 tidq->thread = machine__idle_thread(machine); 1483 1484 tidq->el = el; 1485 } 1486 1487 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, 1488 u8 trace_chan_id, ocsd_ex_level el) 1489 { 1490 struct cs_etm_traceid_queue *tidq; 1491 1492 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1493 if (!tidq) 1494 return -EINVAL; 1495 1496 cs_etm__set_thread(etmq, tidq, tid, el); 1497 return 0; 1498 } 1499 1500 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1501 { 1502 return !!etmq->etm->timeless_decoding; 1503 } 1504 1505 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1506 u64 trace_chan_id, 1507 const struct cs_etm_packet *packet, 1508 struct perf_sample *sample) 1509 { 1510 /* 1511 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1512 * packet, so directly bail out with 'insn_len' = 0. 1513 */ 1514 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1515 sample->insn_len = 0; 1516 return; 1517 } 1518 1519 /* 1520 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1521 * cs_etm__t32_instr_size(). 1522 */ 1523 if (packet->isa == CS_ETM_ISA_T32) 1524 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1525 sample->ip); 1526 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1527 else 1528 sample->insn_len = 4; 1529 1530 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, 1531 (void *)sample->insn, 0); 1532 } 1533 1534 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) 1535 { 1536 struct cs_etm_auxtrace *etm = etmq->etm; 1537 1538 if (etm->has_virtual_ts) 1539 return tsc_to_perf_time(cs_timestamp, &etm->tc); 1540 else 1541 return cs_timestamp; 1542 } 1543 1544 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, 1545 struct cs_etm_traceid_queue *tidq) 1546 { 1547 struct cs_etm_auxtrace *etm = etmq->etm; 1548 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; 1549 1550 if (!etm->timeless_decoding && etm->has_virtual_ts) 1551 return packet_queue->cs_timestamp; 1552 else 1553 return etm->latest_kernel_timestamp; 1554 } 1555 1556 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1557 struct cs_etm_traceid_queue *tidq, 1558 u64 addr, u64 period) 1559 { 1560 int ret = 0; 1561 struct cs_etm_auxtrace *etm = etmq->etm; 1562 union perf_event *event = tidq->event_buf; 1563 struct perf_sample sample = {.ip = 0,}; 1564 1565 event->sample.header.type = PERF_RECORD_SAMPLE; 1566 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); 1567 event->sample.header.size = sizeof(struct perf_event_header); 1568 1569 /* Set time field based on etm auxtrace config. */ 1570 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1571 1572 sample.ip = addr; 1573 sample.pid = thread__pid(tidq->thread); 1574 sample.tid = thread__tid(tidq->thread); 1575 sample.id = etmq->etm->instructions_id; 1576 sample.stream_id = etmq->etm->instructions_id; 1577 sample.period = period; 1578 sample.cpu = tidq->packet->cpu; 1579 sample.flags = tidq->prev_packet->flags; 1580 sample.cpumode = event->sample.header.misc; 1581 1582 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1583 1584 if (etm->synth_opts.last_branch) 1585 sample.branch_stack = tidq->last_branch; 1586 1587 if (etm->synth_opts.inject) { 1588 ret = cs_etm__inject_event(event, &sample, 1589 etm->instructions_sample_type); 1590 if (ret) 1591 return ret; 1592 } 1593 1594 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1595 1596 if (ret) 1597 pr_err( 1598 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1599 ret); 1600 1601 return ret; 1602 } 1603 1604 /* 1605 * The cs etm packet encodes an instruction range between a branch target 1606 * and the next taken branch. Generate sample accordingly. 1607 */ 1608 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1609 struct cs_etm_traceid_queue *tidq) 1610 { 1611 int ret = 0; 1612 struct cs_etm_auxtrace *etm = etmq->etm; 1613 struct perf_sample sample = {.ip = 0,}; 1614 union perf_event *event = tidq->event_buf; 1615 struct dummy_branch_stack { 1616 u64 nr; 1617 u64 hw_idx; 1618 struct branch_entry entries; 1619 } dummy_bs; 1620 u64 ip; 1621 1622 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1623 1624 event->sample.header.type = PERF_RECORD_SAMPLE; 1625 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, 1626 tidq->prev_packet_el); 1627 event->sample.header.size = sizeof(struct perf_event_header); 1628 1629 /* Set time field based on etm auxtrace config. */ 1630 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1631 1632 sample.ip = ip; 1633 sample.pid = thread__pid(tidq->prev_packet_thread); 1634 sample.tid = thread__tid(tidq->prev_packet_thread); 1635 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1636 sample.id = etmq->etm->branches_id; 1637 sample.stream_id = etmq->etm->branches_id; 1638 sample.period = 1; 1639 sample.cpu = tidq->packet->cpu; 1640 sample.flags = tidq->prev_packet->flags; 1641 sample.cpumode = event->sample.header.misc; 1642 1643 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1644 &sample); 1645 1646 /* 1647 * perf report cannot handle events without a branch stack 1648 */ 1649 if (etm->synth_opts.last_branch) { 1650 dummy_bs = (struct dummy_branch_stack){ 1651 .nr = 1, 1652 .hw_idx = -1ULL, 1653 .entries = { 1654 .from = sample.ip, 1655 .to = sample.addr, 1656 }, 1657 }; 1658 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1659 } 1660 1661 if (etm->synth_opts.inject) { 1662 ret = cs_etm__inject_event(event, &sample, 1663 etm->branches_sample_type); 1664 if (ret) 1665 return ret; 1666 } 1667 1668 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1669 1670 if (ret) 1671 pr_err( 1672 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1673 ret); 1674 1675 return ret; 1676 } 1677 1678 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1679 struct perf_session *session) 1680 { 1681 struct evlist *evlist = session->evlist; 1682 struct evsel *evsel; 1683 struct perf_event_attr attr; 1684 bool found = false; 1685 u64 id; 1686 int err; 1687 1688 evlist__for_each_entry(evlist, evsel) { 1689 if (evsel->core.attr.type == etm->pmu_type) { 1690 found = true; 1691 break; 1692 } 1693 } 1694 1695 if (!found) { 1696 pr_debug("No selected events with CoreSight Trace data\n"); 1697 return 0; 1698 } 1699 1700 memset(&attr, 0, sizeof(struct perf_event_attr)); 1701 attr.size = sizeof(struct perf_event_attr); 1702 attr.type = PERF_TYPE_HARDWARE; 1703 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1704 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1705 PERF_SAMPLE_PERIOD; 1706 if (etm->timeless_decoding) 1707 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1708 else 1709 attr.sample_type |= PERF_SAMPLE_TIME; 1710 1711 attr.exclude_user = evsel->core.attr.exclude_user; 1712 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1713 attr.exclude_hv = evsel->core.attr.exclude_hv; 1714 attr.exclude_host = evsel->core.attr.exclude_host; 1715 attr.exclude_guest = evsel->core.attr.exclude_guest; 1716 attr.sample_id_all = evsel->core.attr.sample_id_all; 1717 attr.read_format = evsel->core.attr.read_format; 1718 1719 /* create new id val to be a fixed offset from evsel id */ 1720 id = evsel->core.id[0] + 1000000000; 1721 1722 if (!id) 1723 id = 1; 1724 1725 if (etm->synth_opts.branches) { 1726 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1727 attr.sample_period = 1; 1728 attr.sample_type |= PERF_SAMPLE_ADDR; 1729 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1730 if (err) 1731 return err; 1732 etm->branches_sample_type = attr.sample_type; 1733 etm->branches_id = id; 1734 id += 1; 1735 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1736 } 1737 1738 if (etm->synth_opts.last_branch) { 1739 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1740 /* 1741 * We don't use the hardware index, but the sample generation 1742 * code uses the new format branch_stack with this field, 1743 * so the event attributes must indicate that it's present. 1744 */ 1745 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1746 } 1747 1748 if (etm->synth_opts.instructions) { 1749 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1750 attr.sample_period = etm->synth_opts.period; 1751 etm->instructions_sample_period = attr.sample_period; 1752 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1753 if (err) 1754 return err; 1755 etm->instructions_sample_type = attr.sample_type; 1756 etm->instructions_id = id; 1757 id += 1; 1758 } 1759 1760 return 0; 1761 } 1762 1763 static int cs_etm__sample(struct cs_etm_queue *etmq, 1764 struct cs_etm_traceid_queue *tidq) 1765 { 1766 struct cs_etm_auxtrace *etm = etmq->etm; 1767 int ret; 1768 u8 trace_chan_id = tidq->trace_chan_id; 1769 u64 instrs_prev; 1770 1771 /* Get instructions remainder from previous packet */ 1772 instrs_prev = tidq->period_instructions; 1773 1774 tidq->period_instructions += tidq->packet->instr_count; 1775 1776 /* 1777 * Record a branch when the last instruction in 1778 * PREV_PACKET is a branch. 1779 */ 1780 if (etm->synth_opts.last_branch && 1781 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1782 tidq->prev_packet->last_instr_taken_branch) 1783 cs_etm__update_last_branch_rb(etmq, tidq); 1784 1785 if (etm->synth_opts.instructions && 1786 tidq->period_instructions >= etm->instructions_sample_period) { 1787 /* 1788 * Emit instruction sample periodically 1789 * TODO: allow period to be defined in cycles and clock time 1790 */ 1791 1792 /* 1793 * Below diagram demonstrates the instruction samples 1794 * generation flows: 1795 * 1796 * Instrs Instrs Instrs Instrs 1797 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1798 * | | | | 1799 * V V V V 1800 * -------------------------------------------------- 1801 * ^ ^ 1802 * | | 1803 * Period Period 1804 * instructions(Pi) instructions(Pi') 1805 * 1806 * | | 1807 * \---------------- -----------------/ 1808 * V 1809 * tidq->packet->instr_count 1810 * 1811 * Instrs Sample(n...) are the synthesised samples occurring 1812 * every etm->instructions_sample_period instructions - as 1813 * defined on the perf command line. Sample(n) is being the 1814 * last sample before the current etm packet, n+1 to n+3 1815 * samples are generated from the current etm packet. 1816 * 1817 * tidq->packet->instr_count represents the number of 1818 * instructions in the current etm packet. 1819 * 1820 * Period instructions (Pi) contains the number of 1821 * instructions executed after the sample point(n) from the 1822 * previous etm packet. This will always be less than 1823 * etm->instructions_sample_period. 1824 * 1825 * When generate new samples, it combines with two parts 1826 * instructions, one is the tail of the old packet and another 1827 * is the head of the new coming packet, to generate 1828 * sample(n+1); sample(n+2) and sample(n+3) consume the 1829 * instructions with sample period. After sample(n+3), the rest 1830 * instructions will be used by later packet and it is assigned 1831 * to tidq->period_instructions for next round calculation. 1832 */ 1833 1834 /* 1835 * Get the initial offset into the current packet instructions; 1836 * entry conditions ensure that instrs_prev is less than 1837 * etm->instructions_sample_period. 1838 */ 1839 u64 offset = etm->instructions_sample_period - instrs_prev; 1840 u64 addr; 1841 1842 /* Prepare last branches for instruction sample */ 1843 if (etm->synth_opts.last_branch) 1844 cs_etm__copy_last_branch_rb(etmq, tidq); 1845 1846 while (tidq->period_instructions >= 1847 etm->instructions_sample_period) { 1848 /* 1849 * Calculate the address of the sampled instruction (-1 1850 * as sample is reported as though instruction has just 1851 * been executed, but PC has not advanced to next 1852 * instruction) 1853 */ 1854 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1855 tidq->packet, offset - 1); 1856 ret = cs_etm__synth_instruction_sample( 1857 etmq, tidq, addr, 1858 etm->instructions_sample_period); 1859 if (ret) 1860 return ret; 1861 1862 offset += etm->instructions_sample_period; 1863 tidq->period_instructions -= 1864 etm->instructions_sample_period; 1865 } 1866 } 1867 1868 if (etm->synth_opts.branches) { 1869 bool generate_sample = false; 1870 1871 /* Generate sample for tracing on packet */ 1872 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1873 generate_sample = true; 1874 1875 /* Generate sample for branch taken packet */ 1876 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1877 tidq->prev_packet->last_instr_taken_branch) 1878 generate_sample = true; 1879 1880 if (generate_sample) { 1881 ret = cs_etm__synth_branch_sample(etmq, tidq); 1882 if (ret) 1883 return ret; 1884 } 1885 } 1886 1887 cs_etm__packet_swap(etm, tidq); 1888 1889 return 0; 1890 } 1891 1892 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1893 { 1894 /* 1895 * When the exception packet is inserted, whether the last instruction 1896 * in previous range packet is taken branch or not, we need to force 1897 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1898 * to generate branch sample for the instruction range before the 1899 * exception is trapped to kernel or before the exception returning. 1900 * 1901 * The exception packet includes the dummy address values, so don't 1902 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1903 * for generating instruction and branch samples. 1904 */ 1905 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1906 tidq->prev_packet->last_instr_taken_branch = true; 1907 1908 return 0; 1909 } 1910 1911 static int cs_etm__flush(struct cs_etm_queue *etmq, 1912 struct cs_etm_traceid_queue *tidq) 1913 { 1914 int err = 0; 1915 struct cs_etm_auxtrace *etm = etmq->etm; 1916 1917 /* Handle start tracing packet */ 1918 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1919 goto swap_packet; 1920 1921 if (etmq->etm->synth_opts.last_branch && 1922 etmq->etm->synth_opts.instructions && 1923 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1924 u64 addr; 1925 1926 /* Prepare last branches for instruction sample */ 1927 cs_etm__copy_last_branch_rb(etmq, tidq); 1928 1929 /* 1930 * Generate a last branch event for the branches left in the 1931 * circular buffer at the end of the trace. 1932 * 1933 * Use the address of the end of the last reported execution 1934 * range 1935 */ 1936 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1937 1938 err = cs_etm__synth_instruction_sample( 1939 etmq, tidq, addr, 1940 tidq->period_instructions); 1941 if (err) 1942 return err; 1943 1944 tidq->period_instructions = 0; 1945 1946 } 1947 1948 if (etm->synth_opts.branches && 1949 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1950 err = cs_etm__synth_branch_sample(etmq, tidq); 1951 if (err) 1952 return err; 1953 } 1954 1955 swap_packet: 1956 cs_etm__packet_swap(etm, tidq); 1957 1958 /* Reset last branches after flush the trace */ 1959 if (etm->synth_opts.last_branch) 1960 cs_etm__reset_last_branch_rb(tidq); 1961 1962 return err; 1963 } 1964 1965 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1966 struct cs_etm_traceid_queue *tidq) 1967 { 1968 int err; 1969 1970 /* 1971 * It has no new packet coming and 'etmq->packet' contains the stale 1972 * packet which was set at the previous time with packets swapping; 1973 * so skip to generate branch sample to avoid stale packet. 1974 * 1975 * For this case only flush branch stack and generate a last branch 1976 * event for the branches left in the circular buffer at the end of 1977 * the trace. 1978 */ 1979 if (etmq->etm->synth_opts.last_branch && 1980 etmq->etm->synth_opts.instructions && 1981 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1982 u64 addr; 1983 1984 /* Prepare last branches for instruction sample */ 1985 cs_etm__copy_last_branch_rb(etmq, tidq); 1986 1987 /* 1988 * Use the address of the end of the last reported execution 1989 * range. 1990 */ 1991 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1992 1993 err = cs_etm__synth_instruction_sample( 1994 etmq, tidq, addr, 1995 tidq->period_instructions); 1996 if (err) 1997 return err; 1998 1999 tidq->period_instructions = 0; 2000 } 2001 2002 return 0; 2003 } 2004 /* 2005 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 2006 * if need be. 2007 * Returns: < 0 if error 2008 * = 0 if no more auxtrace_buffer to read 2009 * > 0 if the current buffer isn't empty yet 2010 */ 2011 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 2012 { 2013 int ret; 2014 2015 if (!etmq->buf_len) { 2016 ret = cs_etm__get_trace(etmq); 2017 if (ret <= 0) 2018 return ret; 2019 /* 2020 * We cannot assume consecutive blocks in the data file 2021 * are contiguous, reset the decoder to force re-sync. 2022 */ 2023 ret = cs_etm_decoder__reset(etmq->decoder); 2024 if (ret) 2025 return ret; 2026 } 2027 2028 return etmq->buf_len; 2029 } 2030 2031 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 2032 struct cs_etm_packet *packet, 2033 u64 end_addr) 2034 { 2035 /* Initialise to keep compiler happy */ 2036 u16 instr16 = 0; 2037 u32 instr32 = 0; 2038 u64 addr; 2039 2040 switch (packet->isa) { 2041 case CS_ETM_ISA_T32: 2042 /* 2043 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 2044 * 2045 * b'15 b'8 2046 * +-----------------+--------+ 2047 * | 1 1 0 1 1 1 1 1 | imm8 | 2048 * +-----------------+--------+ 2049 * 2050 * According to the specification, it only defines SVC for T32 2051 * with 16 bits instruction and has no definition for 32bits; 2052 * so below only read 2 bytes as instruction size for T32. 2053 */ 2054 addr = end_addr - 2; 2055 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), 2056 (u8 *)&instr16, 0); 2057 if ((instr16 & 0xFF00) == 0xDF00) 2058 return true; 2059 2060 break; 2061 case CS_ETM_ISA_A32: 2062 /* 2063 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 2064 * 2065 * b'31 b'28 b'27 b'24 2066 * +---------+---------+-------------------------+ 2067 * | !1111 | 1 1 1 1 | imm24 | 2068 * +---------+---------+-------------------------+ 2069 */ 2070 addr = end_addr - 4; 2071 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2072 (u8 *)&instr32, 0); 2073 if ((instr32 & 0x0F000000) == 0x0F000000 && 2074 (instr32 & 0xF0000000) != 0xF0000000) 2075 return true; 2076 2077 break; 2078 case CS_ETM_ISA_A64: 2079 /* 2080 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 2081 * 2082 * b'31 b'21 b'4 b'0 2083 * +-----------------------+---------+-----------+ 2084 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 2085 * +-----------------------+---------+-----------+ 2086 */ 2087 addr = end_addr - 4; 2088 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2089 (u8 *)&instr32, 0); 2090 if ((instr32 & 0xFFE0001F) == 0xd4000001) 2091 return true; 2092 2093 break; 2094 case CS_ETM_ISA_UNKNOWN: 2095 default: 2096 break; 2097 } 2098 2099 return false; 2100 } 2101 2102 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 2103 struct cs_etm_traceid_queue *tidq, u64 magic) 2104 { 2105 u8 trace_chan_id = tidq->trace_chan_id; 2106 struct cs_etm_packet *packet = tidq->packet; 2107 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2108 2109 if (magic == __perf_cs_etmv3_magic) 2110 if (packet->exception_number == CS_ETMV3_EXC_SVC) 2111 return true; 2112 2113 /* 2114 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 2115 * HVC cases; need to check if it's SVC instruction based on 2116 * packet address. 2117 */ 2118 if (magic == __perf_cs_etmv4_magic) { 2119 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2120 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2121 prev_packet->end_addr)) 2122 return true; 2123 } 2124 2125 return false; 2126 } 2127 2128 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 2129 u64 magic) 2130 { 2131 struct cs_etm_packet *packet = tidq->packet; 2132 2133 if (magic == __perf_cs_etmv3_magic) 2134 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 2135 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 2136 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 2137 packet->exception_number == CS_ETMV3_EXC_IRQ || 2138 packet->exception_number == CS_ETMV3_EXC_FIQ) 2139 return true; 2140 2141 if (magic == __perf_cs_etmv4_magic) 2142 if (packet->exception_number == CS_ETMV4_EXC_RESET || 2143 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 2144 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 2145 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 2146 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 2147 packet->exception_number == CS_ETMV4_EXC_IRQ || 2148 packet->exception_number == CS_ETMV4_EXC_FIQ) 2149 return true; 2150 2151 return false; 2152 } 2153 2154 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 2155 struct cs_etm_traceid_queue *tidq, 2156 u64 magic) 2157 { 2158 u8 trace_chan_id = tidq->trace_chan_id; 2159 struct cs_etm_packet *packet = tidq->packet; 2160 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2161 2162 if (magic == __perf_cs_etmv3_magic) 2163 if (packet->exception_number == CS_ETMV3_EXC_SMC || 2164 packet->exception_number == CS_ETMV3_EXC_HYP || 2165 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 2166 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 2167 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 2168 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 2169 packet->exception_number == CS_ETMV3_EXC_GENERIC) 2170 return true; 2171 2172 if (magic == __perf_cs_etmv4_magic) { 2173 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 2174 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 2175 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 2176 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 2177 return true; 2178 2179 /* 2180 * For CS_ETMV4_EXC_CALL, except SVC other instructions 2181 * (SMC, HVC) are taken as sync exceptions. 2182 */ 2183 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2184 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2185 prev_packet->end_addr)) 2186 return true; 2187 2188 /* 2189 * ETMv4 has 5 bits for exception number; if the numbers 2190 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 2191 * they are implementation defined exceptions. 2192 * 2193 * For this case, simply take it as sync exception. 2194 */ 2195 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 2196 packet->exception_number <= CS_ETMV4_EXC_END) 2197 return true; 2198 } 2199 2200 return false; 2201 } 2202 2203 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 2204 struct cs_etm_traceid_queue *tidq) 2205 { 2206 struct cs_etm_packet *packet = tidq->packet; 2207 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2208 u8 trace_chan_id = tidq->trace_chan_id; 2209 u64 magic; 2210 int ret; 2211 2212 switch (packet->sample_type) { 2213 case CS_ETM_RANGE: 2214 /* 2215 * Immediate branch instruction without neither link nor 2216 * return flag, it's normal branch instruction within 2217 * the function. 2218 */ 2219 if (packet->last_instr_type == OCSD_INSTR_BR && 2220 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 2221 packet->flags = PERF_IP_FLAG_BRANCH; 2222 2223 if (packet->last_instr_cond) 2224 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 2225 } 2226 2227 /* 2228 * Immediate branch instruction with link (e.g. BL), this is 2229 * branch instruction for function call. 2230 */ 2231 if (packet->last_instr_type == OCSD_INSTR_BR && 2232 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2233 packet->flags = PERF_IP_FLAG_BRANCH | 2234 PERF_IP_FLAG_CALL; 2235 2236 /* 2237 * Indirect branch instruction with link (e.g. BLR), this is 2238 * branch instruction for function call. 2239 */ 2240 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2241 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2242 packet->flags = PERF_IP_FLAG_BRANCH | 2243 PERF_IP_FLAG_CALL; 2244 2245 /* 2246 * Indirect branch instruction with subtype of 2247 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 2248 * function return for A32/T32. 2249 */ 2250 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2251 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 2252 packet->flags = PERF_IP_FLAG_BRANCH | 2253 PERF_IP_FLAG_RETURN; 2254 2255 /* 2256 * Indirect branch instruction without link (e.g. BR), usually 2257 * this is used for function return, especially for functions 2258 * within dynamic link lib. 2259 */ 2260 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2261 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 2262 packet->flags = PERF_IP_FLAG_BRANCH | 2263 PERF_IP_FLAG_RETURN; 2264 2265 /* Return instruction for function return. */ 2266 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2267 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 2268 packet->flags = PERF_IP_FLAG_BRANCH | 2269 PERF_IP_FLAG_RETURN; 2270 2271 /* 2272 * Decoder might insert a discontinuity in the middle of 2273 * instruction packets, fixup prev_packet with flag 2274 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 2275 */ 2276 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 2277 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2278 PERF_IP_FLAG_TRACE_BEGIN; 2279 2280 /* 2281 * If the previous packet is an exception return packet 2282 * and the return address just follows SVC instruction, 2283 * it needs to calibrate the previous packet sample flags 2284 * as PERF_IP_FLAG_SYSCALLRET. 2285 */ 2286 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 2287 PERF_IP_FLAG_RETURN | 2288 PERF_IP_FLAG_INTERRUPT) && 2289 cs_etm__is_svc_instr(etmq, trace_chan_id, 2290 packet, packet->start_addr)) 2291 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2292 PERF_IP_FLAG_RETURN | 2293 PERF_IP_FLAG_SYSCALLRET; 2294 break; 2295 case CS_ETM_DISCONTINUITY: 2296 /* 2297 * The trace is discontinuous, if the previous packet is 2298 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 2299 * for previous packet. 2300 */ 2301 if (prev_packet->sample_type == CS_ETM_RANGE) 2302 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2303 PERF_IP_FLAG_TRACE_END; 2304 break; 2305 case CS_ETM_EXCEPTION: 2306 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); 2307 if (ret) 2308 return ret; 2309 2310 /* The exception is for system call. */ 2311 if (cs_etm__is_syscall(etmq, tidq, magic)) 2312 packet->flags = PERF_IP_FLAG_BRANCH | 2313 PERF_IP_FLAG_CALL | 2314 PERF_IP_FLAG_SYSCALLRET; 2315 /* 2316 * The exceptions are triggered by external signals from bus, 2317 * interrupt controller, debug module, PE reset or halt. 2318 */ 2319 else if (cs_etm__is_async_exception(tidq, magic)) 2320 packet->flags = PERF_IP_FLAG_BRANCH | 2321 PERF_IP_FLAG_CALL | 2322 PERF_IP_FLAG_ASYNC | 2323 PERF_IP_FLAG_INTERRUPT; 2324 /* 2325 * Otherwise, exception is caused by trap, instruction & 2326 * data fault, or alignment errors. 2327 */ 2328 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 2329 packet->flags = PERF_IP_FLAG_BRANCH | 2330 PERF_IP_FLAG_CALL | 2331 PERF_IP_FLAG_INTERRUPT; 2332 2333 /* 2334 * When the exception packet is inserted, since exception 2335 * packet is not used standalone for generating samples 2336 * and it's affiliation to the previous instruction range 2337 * packet; so set previous range packet flags to tell perf 2338 * it is an exception taken branch. 2339 */ 2340 if (prev_packet->sample_type == CS_ETM_RANGE) 2341 prev_packet->flags = packet->flags; 2342 break; 2343 case CS_ETM_EXCEPTION_RET: 2344 /* 2345 * When the exception return packet is inserted, since 2346 * exception return packet is not used standalone for 2347 * generating samples and it's affiliation to the previous 2348 * instruction range packet; so set previous range packet 2349 * flags to tell perf it is an exception return branch. 2350 * 2351 * The exception return can be for either system call or 2352 * other exception types; unfortunately the packet doesn't 2353 * contain exception type related info so we cannot decide 2354 * the exception type purely based on exception return packet. 2355 * If we record the exception number from exception packet and 2356 * reuse it for exception return packet, this is not reliable 2357 * due the trace can be discontinuity or the interrupt can 2358 * be nested, thus the recorded exception number cannot be 2359 * used for exception return packet for these two cases. 2360 * 2361 * For exception return packet, we only need to distinguish the 2362 * packet is for system call or for other types. Thus the 2363 * decision can be deferred when receive the next packet which 2364 * contains the return address, based on the return address we 2365 * can read out the previous instruction and check if it's a 2366 * system call instruction and then calibrate the sample flag 2367 * as needed. 2368 */ 2369 if (prev_packet->sample_type == CS_ETM_RANGE) 2370 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2371 PERF_IP_FLAG_RETURN | 2372 PERF_IP_FLAG_INTERRUPT; 2373 break; 2374 case CS_ETM_EMPTY: 2375 default: 2376 break; 2377 } 2378 2379 return 0; 2380 } 2381 2382 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 2383 { 2384 int ret = 0; 2385 size_t processed = 0; 2386 2387 /* 2388 * Packets are decoded and added to the decoder's packet queue 2389 * until the decoder packet processing callback has requested that 2390 * processing stops or there is nothing left in the buffer. Normal 2391 * operations that stop processing are a timestamp packet or a full 2392 * decoder buffer queue. 2393 */ 2394 ret = cs_etm_decoder__process_data_block(etmq->decoder, 2395 etmq->offset, 2396 &etmq->buf[etmq->buf_used], 2397 etmq->buf_len, 2398 &processed); 2399 if (ret) 2400 goto out; 2401 2402 etmq->offset += processed; 2403 etmq->buf_used += processed; 2404 etmq->buf_len -= processed; 2405 2406 out: 2407 return ret; 2408 } 2409 2410 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 2411 struct cs_etm_traceid_queue *tidq) 2412 { 2413 int ret; 2414 struct cs_etm_packet_queue *packet_queue; 2415 2416 packet_queue = &tidq->packet_queue; 2417 2418 /* Process each packet in this chunk */ 2419 while (1) { 2420 ret = cs_etm_decoder__get_packet(packet_queue, 2421 tidq->packet); 2422 if (ret <= 0) 2423 /* 2424 * Stop processing this chunk on 2425 * end of data or error 2426 */ 2427 break; 2428 2429 /* 2430 * Since packet addresses are swapped in packet 2431 * handling within below switch() statements, 2432 * thus setting sample flags must be called 2433 * prior to switch() statement to use address 2434 * information before packets swapping. 2435 */ 2436 ret = cs_etm__set_sample_flags(etmq, tidq); 2437 if (ret < 0) 2438 break; 2439 2440 switch (tidq->packet->sample_type) { 2441 case CS_ETM_RANGE: 2442 /* 2443 * If the packet contains an instruction 2444 * range, generate instruction sequence 2445 * events. 2446 */ 2447 cs_etm__sample(etmq, tidq); 2448 break; 2449 case CS_ETM_EXCEPTION: 2450 case CS_ETM_EXCEPTION_RET: 2451 /* 2452 * If the exception packet is coming, 2453 * make sure the previous instruction 2454 * range packet to be handled properly. 2455 */ 2456 cs_etm__exception(tidq); 2457 break; 2458 case CS_ETM_DISCONTINUITY: 2459 /* 2460 * Discontinuity in trace, flush 2461 * previous branch stack 2462 */ 2463 cs_etm__flush(etmq, tidq); 2464 break; 2465 case CS_ETM_EMPTY: 2466 /* 2467 * Should not receive empty packet, 2468 * report error. 2469 */ 2470 pr_err("CS ETM Trace: empty packet\n"); 2471 return -EINVAL; 2472 default: 2473 break; 2474 } 2475 } 2476 2477 return ret; 2478 } 2479 2480 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2481 { 2482 int idx; 2483 struct int_node *inode; 2484 struct cs_etm_traceid_queue *tidq; 2485 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2486 2487 intlist__for_each_entry(inode, traceid_queues_list) { 2488 idx = (int)(intptr_t)inode->priv; 2489 tidq = etmq->traceid_queues[idx]; 2490 2491 /* Ignore return value */ 2492 cs_etm__process_traceid_queue(etmq, tidq); 2493 2494 /* 2495 * Generate an instruction sample with the remaining 2496 * branchstack entries. 2497 */ 2498 cs_etm__flush(etmq, tidq); 2499 } 2500 } 2501 2502 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) 2503 { 2504 int err = 0; 2505 struct cs_etm_traceid_queue *tidq; 2506 2507 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2508 if (!tidq) 2509 return -EINVAL; 2510 2511 /* Go through each buffer in the queue and decode them one by one */ 2512 while (1) { 2513 err = cs_etm__get_data_block(etmq); 2514 if (err <= 0) 2515 return err; 2516 2517 /* Run trace decoder until buffer consumed or end of trace */ 2518 do { 2519 err = cs_etm__decode_data_block(etmq); 2520 if (err) 2521 return err; 2522 2523 /* 2524 * Process each packet in this chunk, nothing to do if 2525 * an error occurs other than hoping the next one will 2526 * be better. 2527 */ 2528 err = cs_etm__process_traceid_queue(etmq, tidq); 2529 2530 } while (etmq->buf_len); 2531 2532 if (err == 0) 2533 /* Flush any remaining branch stack entries */ 2534 err = cs_etm__end_block(etmq, tidq); 2535 } 2536 2537 return err; 2538 } 2539 2540 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) 2541 { 2542 int idx, err = 0; 2543 struct cs_etm_traceid_queue *tidq; 2544 struct int_node *inode; 2545 2546 /* Go through each buffer in the queue and decode them one by one */ 2547 while (1) { 2548 err = cs_etm__get_data_block(etmq); 2549 if (err <= 0) 2550 return err; 2551 2552 /* Run trace decoder until buffer consumed or end of trace */ 2553 do { 2554 err = cs_etm__decode_data_block(etmq); 2555 if (err) 2556 return err; 2557 2558 /* 2559 * cs_etm__run_per_thread_timeless_decoder() runs on a 2560 * single traceID queue because each TID has a separate 2561 * buffer. But here in per-cpu mode we need to iterate 2562 * over each channel instead. 2563 */ 2564 intlist__for_each_entry(inode, 2565 etmq->traceid_queues_list) { 2566 idx = (int)(intptr_t)inode->priv; 2567 tidq = etmq->traceid_queues[idx]; 2568 cs_etm__process_traceid_queue(etmq, tidq); 2569 } 2570 } while (etmq->buf_len); 2571 2572 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2573 idx = (int)(intptr_t)inode->priv; 2574 tidq = etmq->traceid_queues[idx]; 2575 /* Flush any remaining branch stack entries */ 2576 err = cs_etm__end_block(etmq, tidq); 2577 if (err) 2578 return err; 2579 } 2580 } 2581 2582 return err; 2583 } 2584 2585 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2586 pid_t tid) 2587 { 2588 unsigned int i; 2589 struct auxtrace_queues *queues = &etm->queues; 2590 2591 for (i = 0; i < queues->nr_queues; i++) { 2592 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2593 struct cs_etm_queue *etmq = queue->priv; 2594 struct cs_etm_traceid_queue *tidq; 2595 2596 if (!etmq) 2597 continue; 2598 2599 if (etm->per_thread_decoding) { 2600 tidq = cs_etm__etmq_get_traceid_queue( 2601 etmq, CS_ETM_PER_THREAD_TRACEID); 2602 2603 if (!tidq) 2604 continue; 2605 2606 if (tid == -1 || thread__tid(tidq->thread) == tid) 2607 cs_etm__run_per_thread_timeless_decoder(etmq); 2608 } else 2609 cs_etm__run_per_cpu_timeless_decoder(etmq); 2610 } 2611 2612 return 0; 2613 } 2614 2615 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) 2616 { 2617 int ret = 0; 2618 unsigned int cs_queue_nr, queue_nr, i; 2619 u8 trace_chan_id; 2620 u64 cs_timestamp; 2621 struct auxtrace_queue *queue; 2622 struct cs_etm_queue *etmq; 2623 struct cs_etm_traceid_queue *tidq; 2624 2625 /* 2626 * Pre-populate the heap with one entry from each queue so that we can 2627 * start processing in time order across all queues. 2628 */ 2629 for (i = 0; i < etm->queues.nr_queues; i++) { 2630 etmq = etm->queues.queue_array[i].priv; 2631 if (!etmq) 2632 continue; 2633 2634 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); 2635 if (ret) 2636 return ret; 2637 } 2638 2639 while (1) { 2640 if (!etm->heap.heap_cnt) 2641 goto out; 2642 2643 /* Take the entry at the top of the min heap */ 2644 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2645 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2646 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2647 queue = &etm->queues.queue_array[queue_nr]; 2648 etmq = queue->priv; 2649 2650 /* 2651 * Remove the top entry from the heap since we are about 2652 * to process it. 2653 */ 2654 auxtrace_heap__pop(&etm->heap); 2655 2656 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2657 if (!tidq) { 2658 /* 2659 * No traceID queue has been allocated for this traceID, 2660 * which means something somewhere went very wrong. No 2661 * other choice than simply exit. 2662 */ 2663 ret = -EINVAL; 2664 goto out; 2665 } 2666 2667 /* 2668 * Packets associated with this timestamp are already in 2669 * the etmq's traceID queue, so process them. 2670 */ 2671 ret = cs_etm__process_traceid_queue(etmq, tidq); 2672 if (ret < 0) 2673 goto out; 2674 2675 /* 2676 * Packets for this timestamp have been processed, time to 2677 * move on to the next timestamp, fetching a new auxtrace_buffer 2678 * if need be. 2679 */ 2680 refetch: 2681 ret = cs_etm__get_data_block(etmq); 2682 if (ret < 0) 2683 goto out; 2684 2685 /* 2686 * No more auxtrace_buffers to process in this etmq, simply 2687 * move on to another entry in the auxtrace_heap. 2688 */ 2689 if (!ret) 2690 continue; 2691 2692 ret = cs_etm__decode_data_block(etmq); 2693 if (ret) 2694 goto out; 2695 2696 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2697 2698 if (!cs_timestamp) { 2699 /* 2700 * Function cs_etm__decode_data_block() returns when 2701 * there is no more traces to decode in the current 2702 * auxtrace_buffer OR when a timestamp has been 2703 * encountered on any of the traceID queues. Since we 2704 * did not get a timestamp, there is no more traces to 2705 * process in this auxtrace_buffer. As such empty and 2706 * flush all traceID queues. 2707 */ 2708 cs_etm__clear_all_traceid_queues(etmq); 2709 2710 /* Fetch another auxtrace_buffer for this etmq */ 2711 goto refetch; 2712 } 2713 2714 /* 2715 * Add to the min heap the timestamp for packets that have 2716 * just been decoded. They will be processed and synthesized 2717 * during the next call to cs_etm__process_traceid_queue() for 2718 * this queue/traceID. 2719 */ 2720 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2721 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 2722 } 2723 2724 out: 2725 return ret; 2726 } 2727 2728 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2729 union perf_event *event) 2730 { 2731 struct thread *th; 2732 2733 if (etm->timeless_decoding) 2734 return 0; 2735 2736 /* 2737 * Add the tid/pid to the log so that we can get a match when we get a 2738 * contextID from the decoder. Only track for the host: only kernel 2739 * trace is supported for guests which wouldn't need pids so this should 2740 * be fine. 2741 */ 2742 th = machine__findnew_thread(&etm->session->machines.host, 2743 event->itrace_start.pid, 2744 event->itrace_start.tid); 2745 if (!th) 2746 return -ENOMEM; 2747 2748 thread__put(th); 2749 2750 return 0; 2751 } 2752 2753 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2754 union perf_event *event) 2755 { 2756 struct thread *th; 2757 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2758 2759 /* 2760 * Context switch in per-thread mode are irrelevant since perf 2761 * will start/stop tracing as the process is scheduled. 2762 */ 2763 if (etm->timeless_decoding) 2764 return 0; 2765 2766 /* 2767 * SWITCH_IN events carry the next process to be switched out while 2768 * SWITCH_OUT events carry the process to be switched in. As such 2769 * we don't care about IN events. 2770 */ 2771 if (!out) 2772 return 0; 2773 2774 /* 2775 * Add the tid/pid to the log so that we can get a match when we get a 2776 * contextID from the decoder. Only track for the host: only kernel 2777 * trace is supported for guests which wouldn't need pids so this should 2778 * be fine. 2779 */ 2780 th = machine__findnew_thread(&etm->session->machines.host, 2781 event->context_switch.next_prev_pid, 2782 event->context_switch.next_prev_tid); 2783 if (!th) 2784 return -ENOMEM; 2785 2786 thread__put(th); 2787 2788 return 0; 2789 } 2790 2791 static int cs_etm__process_event(struct perf_session *session, 2792 union perf_event *event, 2793 struct perf_sample *sample, 2794 const struct perf_tool *tool) 2795 { 2796 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2797 struct cs_etm_auxtrace, 2798 auxtrace); 2799 2800 if (dump_trace) 2801 return 0; 2802 2803 if (!tool->ordered_events) { 2804 pr_err("CoreSight ETM Trace requires ordered events\n"); 2805 return -EINVAL; 2806 } 2807 2808 switch (event->header.type) { 2809 case PERF_RECORD_EXIT: 2810 /* 2811 * Don't need to wait for cs_etm__flush_events() in per-thread mode to 2812 * start the decode because we know there will be no more trace from 2813 * this thread. All this does is emit samples earlier than waiting for 2814 * the flush in other modes, but with timestamps it makes sense to wait 2815 * for flush so that events from different threads are interleaved 2816 * properly. 2817 */ 2818 if (etm->per_thread_decoding && etm->timeless_decoding) 2819 return cs_etm__process_timeless_queues(etm, 2820 event->fork.tid); 2821 break; 2822 2823 case PERF_RECORD_ITRACE_START: 2824 return cs_etm__process_itrace_start(etm, event); 2825 2826 case PERF_RECORD_SWITCH_CPU_WIDE: 2827 return cs_etm__process_switch_cpu_wide(etm, event); 2828 2829 case PERF_RECORD_AUX: 2830 /* 2831 * Record the latest kernel timestamp available in the header 2832 * for samples so that synthesised samples occur from this point 2833 * onwards. 2834 */ 2835 if (sample->time && (sample->time != (u64)-1)) 2836 etm->latest_kernel_timestamp = sample->time; 2837 break; 2838 2839 default: 2840 break; 2841 } 2842 2843 return 0; 2844 } 2845 2846 static void dump_queued_data(struct cs_etm_auxtrace *etm, 2847 struct perf_record_auxtrace *event) 2848 { 2849 struct auxtrace_buffer *buf; 2850 unsigned int i; 2851 /* 2852 * Find all buffers with same reference in the queues and dump them. 2853 * This is because the queues can contain multiple entries of the same 2854 * buffer that were split on aux records. 2855 */ 2856 for (i = 0; i < etm->queues.nr_queues; ++i) 2857 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) 2858 if (buf->reference == event->reference) 2859 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); 2860 } 2861 2862 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2863 union perf_event *event, 2864 const struct perf_tool *tool __maybe_unused) 2865 { 2866 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2867 struct cs_etm_auxtrace, 2868 auxtrace); 2869 if (!etm->data_queued) { 2870 struct auxtrace_buffer *buffer; 2871 off_t data_offset; 2872 int fd = perf_data__fd(session->data); 2873 bool is_pipe = perf_data__is_pipe(session->data); 2874 int err; 2875 int idx = event->auxtrace.idx; 2876 2877 if (is_pipe) 2878 data_offset = 0; 2879 else { 2880 data_offset = lseek(fd, 0, SEEK_CUR); 2881 if (data_offset == -1) 2882 return -errno; 2883 } 2884 2885 err = auxtrace_queues__add_event(&etm->queues, session, 2886 event, data_offset, &buffer); 2887 if (err) 2888 return err; 2889 2890 if (dump_trace) 2891 if (auxtrace_buffer__get_data(buffer, fd)) { 2892 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); 2893 auxtrace_buffer__put_data(buffer); 2894 } 2895 } else if (dump_trace) 2896 dump_queued_data(etm, &event->auxtrace); 2897 2898 return 0; 2899 } 2900 2901 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) 2902 { 2903 struct evsel *evsel; 2904 struct evlist *evlist = etm->session->evlist; 2905 2906 /* Override timeless mode with user input from --itrace=Z */ 2907 if (etm->synth_opts.timeless_decoding) { 2908 etm->timeless_decoding = true; 2909 return 0; 2910 } 2911 2912 /* 2913 * Find the cs_etm evsel and look at what its timestamp setting was 2914 */ 2915 evlist__for_each_entry(evlist, evsel) 2916 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { 2917 etm->timeless_decoding = 2918 !(evsel->core.attr.config & BIT(ETM_OPT_TS)); 2919 return 0; 2920 } 2921 2922 pr_err("CS ETM: Couldn't find ETM evsel\n"); 2923 return -EINVAL; 2924 } 2925 2926 /* 2927 * Read a single cpu parameter block from the auxtrace_info priv block. 2928 * 2929 * For version 1 there is a per cpu nr_params entry. If we are handling 2930 * version 1 file, then there may be less, the same, or more params 2931 * indicated by this value than the compile time number we understand. 2932 * 2933 * For a version 0 info block, there are a fixed number, and we need to 2934 * fill out the nr_param value in the metadata we create. 2935 */ 2936 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 2937 int out_blk_size, int nr_params_v0) 2938 { 2939 u64 *metadata = NULL; 2940 int hdr_version; 2941 int nr_in_params, nr_out_params, nr_cmn_params; 2942 int i, k; 2943 2944 metadata = zalloc(sizeof(*metadata) * out_blk_size); 2945 if (!metadata) 2946 return NULL; 2947 2948 /* read block current index & version */ 2949 i = *buff_in_offset; 2950 hdr_version = buff_in[CS_HEADER_VERSION]; 2951 2952 if (!hdr_version) { 2953 /* read version 0 info block into a version 1 metadata block */ 2954 nr_in_params = nr_params_v0; 2955 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 2956 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 2957 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 2958 /* remaining block params at offset +1 from source */ 2959 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 2960 metadata[k + 1] = buff_in[i + k]; 2961 /* version 0 has 2 common params */ 2962 nr_cmn_params = 2; 2963 } else { 2964 /* read version 1 info block - input and output nr_params may differ */ 2965 /* version 1 has 3 common params */ 2966 nr_cmn_params = 3; 2967 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 2968 2969 /* if input has more params than output - skip excess */ 2970 nr_out_params = nr_in_params + nr_cmn_params; 2971 if (nr_out_params > out_blk_size) 2972 nr_out_params = out_blk_size; 2973 2974 for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 2975 metadata[k] = buff_in[i + k]; 2976 2977 /* record the actual nr params we copied */ 2978 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 2979 } 2980 2981 /* adjust in offset by number of in params used */ 2982 i += nr_in_params + nr_cmn_params; 2983 *buff_in_offset = i; 2984 return metadata; 2985 } 2986 2987 /** 2988 * Puts a fragment of an auxtrace buffer into the auxtrace queues based 2989 * on the bounds of aux_event, if it matches with the buffer that's at 2990 * file_offset. 2991 * 2992 * Normally, whole auxtrace buffers would be added to the queue. But we 2993 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder 2994 * is reset across each buffer, so splitting the buffers up in advance has 2995 * the same effect. 2996 */ 2997 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, 2998 struct perf_record_aux *aux_event, struct perf_sample *sample) 2999 { 3000 int err; 3001 char buf[PERF_SAMPLE_MAX_SIZE]; 3002 union perf_event *auxtrace_event_union; 3003 struct perf_record_auxtrace *auxtrace_event; 3004 union perf_event auxtrace_fragment; 3005 __u64 aux_offset, aux_size; 3006 enum cs_etm_format format; 3007 3008 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 3009 struct cs_etm_auxtrace, 3010 auxtrace); 3011 3012 /* 3013 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got 3014 * from looping through the auxtrace index. 3015 */ 3016 err = perf_session__peek_event(session, file_offset, buf, 3017 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); 3018 if (err) 3019 return err; 3020 auxtrace_event = &auxtrace_event_union->auxtrace; 3021 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) 3022 return -EINVAL; 3023 3024 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || 3025 auxtrace_event->header.size != sz) { 3026 return -EINVAL; 3027 } 3028 3029 /* 3030 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See 3031 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a 3032 * CPU as we set this always for the AUX_OUTPUT_HW_ID event. 3033 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. 3034 * Return 'not found' if mismatch. 3035 */ 3036 if (auxtrace_event->cpu == (__u32) -1) { 3037 etm->per_thread_decoding = true; 3038 if (auxtrace_event->tid != sample->tid) 3039 return 1; 3040 } else if (auxtrace_event->cpu != sample->cpu) { 3041 if (etm->per_thread_decoding) { 3042 /* 3043 * Found a per-cpu buffer after a per-thread one was 3044 * already found 3045 */ 3046 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); 3047 return -EINVAL; 3048 } 3049 return 1; 3050 } 3051 3052 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { 3053 /* 3054 * Clamp size in snapshot mode. The buffer size is clamped in 3055 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect 3056 * the buffer size. 3057 */ 3058 aux_size = min(aux_event->aux_size, auxtrace_event->size); 3059 3060 /* 3061 * In this mode, the head also points to the end of the buffer so aux_offset 3062 * needs to have the size subtracted so it points to the beginning as in normal mode 3063 */ 3064 aux_offset = aux_event->aux_offset - aux_size; 3065 } else { 3066 aux_size = aux_event->aux_size; 3067 aux_offset = aux_event->aux_offset; 3068 } 3069 3070 if (aux_offset >= auxtrace_event->offset && 3071 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { 3072 struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv; 3073 3074 /* 3075 * If this AUX event was inside this buffer somewhere, create a new auxtrace event 3076 * based on the sizes of the aux event, and queue that fragment. 3077 */ 3078 auxtrace_fragment.auxtrace = *auxtrace_event; 3079 auxtrace_fragment.auxtrace.size = aux_size; 3080 auxtrace_fragment.auxtrace.offset = aux_offset; 3081 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; 3082 3083 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 3084 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); 3085 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, 3086 file_offset, NULL); 3087 if (err) 3088 return err; 3089 3090 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? 3091 UNFORMATTED : FORMATTED; 3092 if (etmq->format != UNSET && format != etmq->format) { 3093 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); 3094 return -EINVAL; 3095 } 3096 etmq->format = format; 3097 return 0; 3098 } 3099 3100 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ 3101 return 1; 3102 } 3103 3104 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, 3105 u64 offset __maybe_unused, void *data __maybe_unused) 3106 { 3107 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ 3108 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { 3109 (*(int *)data)++; /* increment found count */ 3110 return cs_etm__process_aux_output_hw_id(session, event); 3111 } 3112 return 0; 3113 } 3114 3115 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, 3116 u64 offset __maybe_unused, void *data __maybe_unused) 3117 { 3118 struct perf_sample sample; 3119 int ret; 3120 struct auxtrace_index_entry *ent; 3121 struct auxtrace_index *auxtrace_index; 3122 struct evsel *evsel; 3123 size_t i; 3124 3125 /* Don't care about any other events, we're only queuing buffers for AUX events */ 3126 if (event->header.type != PERF_RECORD_AUX) 3127 return 0; 3128 3129 if (event->header.size < sizeof(struct perf_record_aux)) 3130 return -EINVAL; 3131 3132 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ 3133 if (!event->aux.aux_size) 3134 return 0; 3135 3136 /* 3137 * Parse the sample, we need the sample_id_all data that comes after the event so that the 3138 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. 3139 */ 3140 evsel = evlist__event2evsel(session->evlist, event); 3141 if (!evsel) 3142 return -EINVAL; 3143 ret = evsel__parse_sample(evsel, event, &sample); 3144 if (ret) 3145 return ret; 3146 3147 /* 3148 * Loop through the auxtrace index to find the buffer that matches up with this aux event. 3149 */ 3150 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { 3151 for (i = 0; i < auxtrace_index->nr; i++) { 3152 ent = &auxtrace_index->entries[i]; 3153 ret = cs_etm__queue_aux_fragment(session, ent->file_offset, 3154 ent->sz, &event->aux, &sample); 3155 /* 3156 * Stop search on error or successful values. Continue search on 3157 * 1 ('not found') 3158 */ 3159 if (ret != 1) 3160 return ret; 3161 } 3162 } 3163 3164 /* 3165 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but 3166 * don't exit with an error because it will still be possible to decode other aux records. 3167 */ 3168 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 3169 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); 3170 return 0; 3171 } 3172 3173 static int cs_etm__queue_aux_records(struct perf_session *session) 3174 { 3175 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, 3176 struct auxtrace_index, list); 3177 if (index && index->nr > 0) 3178 return perf_session__peek_events(session, session->header.data_offset, 3179 session->header.data_size, 3180 cs_etm__queue_aux_records_cb, NULL); 3181 3182 /* 3183 * We would get here if there are no entries in the index (either no auxtrace 3184 * buffers or no index at all). Fail silently as there is the possibility of 3185 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still 3186 * false. 3187 * 3188 * In that scenario, buffers will not be split by AUX records. 3189 */ 3190 return 0; 3191 } 3192 3193 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ 3194 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) 3195 3196 /* 3197 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual 3198 * timestamps). 3199 */ 3200 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) 3201 { 3202 int j; 3203 3204 for (j = 0; j < num_cpu; j++) { 3205 switch (metadata[j][CS_ETM_MAGIC]) { 3206 case __perf_cs_etmv4_magic: 3207 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) 3208 return false; 3209 break; 3210 case __perf_cs_ete_magic: 3211 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) 3212 return false; 3213 break; 3214 default: 3215 /* Unknown / unsupported magic number. */ 3216 return false; 3217 } 3218 } 3219 return true; 3220 } 3221 3222 /* map trace ids to correct metadata block, from information in metadata */ 3223 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, 3224 u64 **metadata) 3225 { 3226 u64 cs_etm_magic; 3227 u8 trace_chan_id; 3228 int i, err; 3229 3230 for (i = 0; i < num_cpu; i++) { 3231 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3232 switch (cs_etm_magic) { 3233 case __perf_cs_etmv3_magic: 3234 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3235 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); 3236 break; 3237 case __perf_cs_etmv4_magic: 3238 case __perf_cs_ete_magic: 3239 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3240 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); 3241 break; 3242 default: 3243 /* unknown magic number */ 3244 return -EINVAL; 3245 } 3246 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); 3247 if (err) 3248 return err; 3249 } 3250 return 0; 3251 } 3252 3253 /* 3254 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX 3255 * (formatted or not) packets to create the decoders. 3256 */ 3257 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) 3258 { 3259 struct cs_etm_decoder_params d_params; 3260 struct cs_etm_trace_params *t_params; 3261 int decoders = intlist__nr_entries(etmq->traceid_list); 3262 3263 if (decoders == 0) 3264 return 0; 3265 3266 /* 3267 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 3268 * needed. 3269 */ 3270 if (etmq->format == UNFORMATTED) 3271 assert(decoders == 1); 3272 3273 /* Use metadata to fill in trace parameters for trace decoder */ 3274 t_params = zalloc(sizeof(*t_params) * decoders); 3275 3276 if (!t_params) 3277 goto out_free; 3278 3279 if (cs_etm__init_trace_params(t_params, etmq)) 3280 goto out_free; 3281 3282 /* Set decoder parameters to decode trace packets */ 3283 if (cs_etm__init_decoder_params(&d_params, etmq, 3284 dump_trace ? CS_ETM_OPERATION_PRINT : 3285 CS_ETM_OPERATION_DECODE)) 3286 goto out_free; 3287 3288 etmq->decoder = cs_etm_decoder__new(decoders, &d_params, 3289 t_params); 3290 3291 if (!etmq->decoder) 3292 goto out_free; 3293 3294 /* 3295 * Register a function to handle all memory accesses required by 3296 * the trace decoder library. 3297 */ 3298 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 3299 0x0L, ((u64) -1L), 3300 cs_etm__mem_access)) 3301 goto out_free_decoder; 3302 3303 zfree(&t_params); 3304 return 0; 3305 3306 out_free_decoder: 3307 cs_etm_decoder__free(etmq->decoder); 3308 out_free: 3309 zfree(&t_params); 3310 return -EINVAL; 3311 } 3312 3313 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) 3314 { 3315 struct auxtrace_queues *queues = &etm->queues; 3316 3317 for (unsigned int i = 0; i < queues->nr_queues; i++) { 3318 bool empty = list_empty(&queues->queue_array[i].head); 3319 struct cs_etm_queue *etmq = queues->queue_array[i].priv; 3320 int ret; 3321 3322 /* 3323 * Don't create decoders for empty queues, mainly because 3324 * etmq->format is unknown for empty queues. 3325 */ 3326 assert(empty == (etmq->format == UNSET)); 3327 if (empty) 3328 continue; 3329 3330 ret = cs_etm__create_queue_decoders(etmq); 3331 if (ret) 3332 return ret; 3333 } 3334 return 0; 3335 } 3336 3337 int cs_etm__process_auxtrace_info_full(union perf_event *event, 3338 struct perf_session *session) 3339 { 3340 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3341 struct cs_etm_auxtrace *etm = NULL; 3342 struct perf_record_time_conv *tc = &session->time_conv; 3343 int event_header_size = sizeof(struct perf_event_header); 3344 int total_size = auxtrace_info->header.size; 3345 int priv_size = 0; 3346 int num_cpu, max_cpu = 0; 3347 int err = 0; 3348 int aux_hw_id_found; 3349 int i; 3350 u64 *ptr = NULL; 3351 u64 **metadata = NULL; 3352 3353 /* First the global part */ 3354 ptr = (u64 *) auxtrace_info->priv; 3355 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; 3356 metadata = zalloc(sizeof(*metadata) * num_cpu); 3357 if (!metadata) 3358 return -ENOMEM; 3359 3360 /* Start parsing after the common part of the header */ 3361 i = CS_HEADER_VERSION_MAX; 3362 3363 /* 3364 * The metadata is stored in the auxtrace_info section and encodes 3365 * the configuration of the ARM embedded trace macrocell which is 3366 * required by the trace decoder to properly decode the trace due 3367 * to its highly compressed nature. 3368 */ 3369 for (int j = 0; j < num_cpu; j++) { 3370 if (ptr[i] == __perf_cs_etmv3_magic) { 3371 metadata[j] = 3372 cs_etm__create_meta_blk(ptr, &i, 3373 CS_ETM_PRIV_MAX, 3374 CS_ETM_NR_TRC_PARAMS_V0); 3375 } else if (ptr[i] == __perf_cs_etmv4_magic) { 3376 metadata[j] = 3377 cs_etm__create_meta_blk(ptr, &i, 3378 CS_ETMV4_PRIV_MAX, 3379 CS_ETMV4_NR_TRC_PARAMS_V0); 3380 } else if (ptr[i] == __perf_cs_ete_magic) { 3381 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); 3382 } else { 3383 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", 3384 ptr[i]); 3385 err = -EINVAL; 3386 goto err_free_metadata; 3387 } 3388 3389 if (!metadata[j]) { 3390 err = -ENOMEM; 3391 goto err_free_metadata; 3392 } 3393 3394 if ((int) metadata[j][CS_ETM_CPU] > max_cpu) 3395 max_cpu = metadata[j][CS_ETM_CPU]; 3396 } 3397 3398 /* 3399 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 3400 * CS_ETMV4_PRIV_MAX mark how many double words are in the 3401 * global metadata, and each cpu's metadata respectively. 3402 * The following tests if the correct number of double words was 3403 * present in the auxtrace info section. 3404 */ 3405 priv_size = total_size - event_header_size - INFO_HEADER_SIZE; 3406 if (i * 8 != priv_size) { 3407 err = -EINVAL; 3408 goto err_free_metadata; 3409 } 3410 3411 etm = zalloc(sizeof(*etm)); 3412 3413 if (!etm) { 3414 err = -ENOMEM; 3415 goto err_free_metadata; 3416 } 3417 3418 /* 3419 * As all the ETMs run at the same exception level, the system should 3420 * have the same PID format crossing CPUs. So cache the PID format 3421 * and reuse it for sequential decoding. 3422 */ 3423 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); 3424 3425 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); 3426 if (err) 3427 goto err_free_etm; 3428 3429 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { 3430 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); 3431 if (err) 3432 goto err_free_queues; 3433 } 3434 3435 if (session->itrace_synth_opts->set) { 3436 etm->synth_opts = *session->itrace_synth_opts; 3437 } else { 3438 itrace_synth_opts__set_default(&etm->synth_opts, 3439 session->itrace_synth_opts->default_no_sample); 3440 etm->synth_opts.callchain = false; 3441 } 3442 3443 etm->session = session; 3444 3445 etm->num_cpu = num_cpu; 3446 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); 3447 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); 3448 etm->metadata = metadata; 3449 etm->auxtrace_type = auxtrace_info->type; 3450 3451 if (etm->synth_opts.use_timestamp) 3452 /* 3453 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3454 * therefore the decoder cannot know if the timestamp trace is 3455 * same with the kernel time. 3456 * 3457 * If a user has knowledge for the working platform and can 3458 * specify itrace option 'T' to tell decoder to forcely use the 3459 * traced timestamp as the kernel time. 3460 */ 3461 etm->has_virtual_ts = true; 3462 else 3463 /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3464 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3465 3466 if (!etm->has_virtual_ts) 3467 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3468 "The time field of the samples will not be set accurately.\n" 3469 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3470 "you can specify the itrace option 'T' for timestamp decoding\n" 3471 "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3472 3473 etm->auxtrace.process_event = cs_etm__process_event; 3474 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 3475 etm->auxtrace.flush_events = cs_etm__flush_events; 3476 etm->auxtrace.free_events = cs_etm__free_events; 3477 etm->auxtrace.free = cs_etm__free; 3478 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; 3479 session->auxtrace = &etm->auxtrace; 3480 3481 err = cs_etm__setup_timeless_decoding(etm); 3482 if (err) 3483 return err; 3484 3485 etm->tc.time_shift = tc->time_shift; 3486 etm->tc.time_mult = tc->time_mult; 3487 etm->tc.time_zero = tc->time_zero; 3488 if (event_contains(*tc, time_cycles)) { 3489 etm->tc.time_cycles = tc->time_cycles; 3490 etm->tc.time_mask = tc->time_mask; 3491 etm->tc.cap_user_time_zero = tc->cap_user_time_zero; 3492 etm->tc.cap_user_time_short = tc->cap_user_time_short; 3493 } 3494 err = cs_etm__synth_events(etm, session); 3495 if (err) 3496 goto err_free_queues; 3497 3498 err = cs_etm__queue_aux_records(session); 3499 if (err) 3500 goto err_free_queues; 3501 3502 /* 3503 * Map Trace ID values to CPU metadata. 3504 * 3505 * Trace metadata will always contain Trace ID values from the legacy algorithm 3506 * in case it's read by a version of Perf that doesn't know about HW_ID packets 3507 * or the kernel doesn't emit them. 3508 * 3509 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3510 * the same IDs as the old algorithm as far as is possible, unless there are clashes 3511 * in which case a different value will be used. This means an older perf may still 3512 * be able to record and read files generate on a newer system. 3513 * 3514 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3515 * those packets. If they are there then the values will be mapped and plugged into 3516 * the metadata and decoders are only created for each mapping received. 3517 * 3518 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3519 * then we map Trace ID values to CPU directly from the metadata and create decoders 3520 * for all mappings. 3521 */ 3522 3523 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ 3524 aux_hw_id_found = 0; 3525 err = perf_session__peek_events(session, session->header.data_offset, 3526 session->header.data_size, 3527 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); 3528 if (err) 3529 goto err_free_queues; 3530 3531 /* if no HW ID found this is a file with metadata values only, map from metadata */ 3532 if (!aux_hw_id_found) { 3533 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); 3534 if (err) 3535 goto err_free_queues; 3536 } 3537 3538 err = cs_etm__create_decoders(etm); 3539 if (err) 3540 goto err_free_queues; 3541 3542 etm->data_queued = etm->queues.populated; 3543 return 0; 3544 3545 err_free_queues: 3546 auxtrace_queues__free(&etm->queues); 3547 session->auxtrace = NULL; 3548 err_free_etm: 3549 zfree(&etm); 3550 err_free_metadata: 3551 /* No need to check @metadata[j], free(NULL) is supported */ 3552 for (int j = 0; j < num_cpu; j++) 3553 zfree(&metadata[j]); 3554 zfree(&metadata); 3555 return err; 3556 } 3557