1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/bitfield.h> 11 #include <linux/bitops.h> 12 #include <linux/coresight-pmu.h> 13 #include <linux/err.h> 14 #include <linux/log2.h> 15 #include <linux/types.h> 16 #include <linux/zalloc.h> 17 18 #include <stdlib.h> 19 20 #include "auxtrace.h" 21 #include "color.h" 22 #include "cs-etm.h" 23 #include "cs-etm-decoder/cs-etm-decoder.h" 24 #include "debug.h" 25 #include "dso.h" 26 #include "evlist.h" 27 #include "intlist.h" 28 #include "machine.h" 29 #include "map.h" 30 #include "perf.h" 31 #include "session.h" 32 #include "map_symbol.h" 33 #include "branch.h" 34 #include "symbol.h" 35 #include "tool.h" 36 #include "thread.h" 37 #include "thread-stack.h" 38 #include "tsc.h" 39 #include <tools/libc_compat.h> 40 #include "util/synthetic-events.h" 41 #include "util/util.h" 42 43 struct cs_etm_auxtrace { 44 struct auxtrace auxtrace; 45 struct auxtrace_queues queues; 46 struct auxtrace_heap heap; 47 struct itrace_synth_opts synth_opts; 48 struct perf_session *session; 49 struct perf_tsc_conversion tc; 50 51 /* 52 * Timeless has no timestamps in the trace so overlapping mmap lookups 53 * are less accurate but produces smaller trace data. We use context IDs 54 * in the trace instead of matching timestamps with fork records so 55 * they're not really needed in the general case. Overlapping mmaps 56 * happen in cases like between a fork and an exec. 57 */ 58 bool timeless_decoding; 59 60 /* 61 * Per-thread ignores the trace channel ID and instead assumes that 62 * everything in a buffer comes from the same process regardless of 63 * which CPU it ran on. It also implies no context IDs so the TID is 64 * taken from the auxtrace buffer. 65 */ 66 bool per_thread_decoding; 67 bool snapshot_mode; 68 bool data_queued; 69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ 70 71 int num_cpu; 72 u64 latest_kernel_timestamp; 73 u32 auxtrace_type; 74 u64 branches_sample_type; 75 u64 branches_id; 76 u64 instructions_sample_type; 77 u64 instructions_sample_period; 78 u64 instructions_id; 79 u64 **metadata; 80 unsigned int pmu_type; 81 enum cs_etm_pid_fmt pid_fmt; 82 }; 83 84 struct cs_etm_traceid_queue { 85 u8 trace_chan_id; 86 u64 period_instructions; 87 size_t last_branch_pos; 88 union perf_event *event_buf; 89 struct thread *thread; 90 struct thread *prev_packet_thread; 91 ocsd_ex_level prev_packet_el; 92 ocsd_ex_level el; 93 struct branch_stack *last_branch; 94 struct branch_stack *last_branch_rb; 95 struct cs_etm_packet *prev_packet; 96 struct cs_etm_packet *packet; 97 struct cs_etm_packet_queue packet_queue; 98 }; 99 100 enum cs_etm_format { 101 UNSET, 102 FORMATTED, 103 UNFORMATTED 104 }; 105 106 struct cs_etm_queue { 107 struct cs_etm_auxtrace *etm; 108 struct cs_etm_decoder *decoder; 109 struct auxtrace_buffer *buffer; 110 unsigned int queue_nr; 111 u8 pending_timestamp_chan_id; 112 enum cs_etm_format format; 113 u64 offset; 114 const unsigned char *buf; 115 size_t buf_len, buf_used; 116 /* Conversion between traceID and index in traceid_queues array */ 117 struct intlist *traceid_queues_list; 118 struct cs_etm_traceid_queue **traceid_queues; 119 /* Conversion between traceID and metadata pointers */ 120 struct intlist *traceid_list; 121 }; 122 123 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); 124 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 125 pid_t tid); 126 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 127 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 128 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata); 129 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu); 130 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); 131 132 /* PTMs ETMIDR [11:8] set to b0011 */ 133 #define ETMIDR_PTM_VERSION 0x00000300 134 135 /* 136 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 137 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 138 * encode the etm queue number as the upper 16 bit and the channel as 139 * the lower 16 bit. 140 */ 141 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ 142 (queue_nr << 16 | trace_chan_id) 143 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 144 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 145 146 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 147 { 148 etmidr &= ETMIDR_PTM_VERSION; 149 150 if (etmidr == ETMIDR_PTM_VERSION) 151 return CS_ETM_PROTO_PTM; 152 153 return CS_ETM_PROTO_ETMV3; 154 } 155 156 static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic) 157 { 158 struct int_node *inode; 159 u64 *metadata; 160 161 inode = intlist__find(etmq->traceid_list, trace_chan_id); 162 if (!inode) 163 return -EINVAL; 164 165 metadata = inode->priv; 166 *magic = metadata[CS_ETM_MAGIC]; 167 return 0; 168 } 169 170 int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu) 171 { 172 struct int_node *inode; 173 u64 *metadata; 174 175 inode = intlist__find(etmq->traceid_list, trace_chan_id); 176 if (!inode) 177 return -EINVAL; 178 179 metadata = inode->priv; 180 *cpu = (int)metadata[CS_ETM_CPU]; 181 return 0; 182 } 183 184 /* 185 * The returned PID format is presented as an enum: 186 * 187 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. 188 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. 189 * CS_ETM_PIDFMT_NONE: No context IDs 190 * 191 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 192 * are enabled at the same time when the session runs on an EL2 kernel. 193 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 194 * recorded in the trace data, the tool will selectively use 195 * CONTEXTIDR_EL2 as PID. 196 * 197 * The result is cached in etm->pid_fmt so this function only needs to be called 198 * when processing the aux info. 199 */ 200 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) 201 { 202 u64 val; 203 204 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 205 val = metadata[CS_ETM_ETMCR]; 206 /* CONTEXTIDR is traced */ 207 if (val & BIT(ETM_OPT_CTXTID)) 208 return CS_ETM_PIDFMT_CTXTID; 209 } else { 210 val = metadata[CS_ETMV4_TRCCONFIGR]; 211 /* CONTEXTIDR_EL2 is traced */ 212 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) 213 return CS_ETM_PIDFMT_CTXTID2; 214 /* CONTEXTIDR_EL1 is traced */ 215 else if (val & BIT(ETM4_CFG_BIT_CTXTID)) 216 return CS_ETM_PIDFMT_CTXTID; 217 } 218 219 return CS_ETM_PIDFMT_NONE; 220 } 221 222 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) 223 { 224 return etmq->etm->pid_fmt; 225 } 226 227 static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, 228 u8 trace_chan_id, u64 *cpu_metadata) 229 { 230 /* Get an RB node for this CPU */ 231 struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id); 232 233 /* Something went wrong, no need to continue */ 234 if (!inode) 235 return -ENOMEM; 236 237 /* Disallow re-mapping a different traceID to metadata pair. */ 238 if (inode->priv) { 239 u64 *curr_cpu_data = inode->priv; 240 u8 curr_chan_id; 241 int err; 242 243 if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { 244 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); 245 return -EINVAL; 246 } 247 248 /* check that the mapped ID matches */ 249 err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); 250 if (err) 251 return err; 252 253 if (curr_chan_id != trace_chan_id) { 254 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); 255 return -EINVAL; 256 } 257 258 /* Skip re-adding the same mappings if everything matched */ 259 return 0; 260 } 261 262 /* Not one we've seen before, associate the traceID with the metadata pointer */ 263 inode->priv = cpu_metadata; 264 265 return 0; 266 } 267 268 static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu) 269 { 270 if (etm->per_thread_decoding) 271 return etm->queues.queue_array[0].priv; 272 else 273 return etm->queues.queue_array[cpu].priv; 274 } 275 276 static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id, 277 u64 *cpu_metadata) 278 { 279 struct cs_etm_queue *etmq; 280 281 /* 282 * If the queue is unformatted then only save one mapping in the 283 * queue associated with that CPU so only one decoder is made. 284 */ 285 etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); 286 if (etmq->format == UNFORMATTED) 287 return cs_etm__insert_trace_id_node(etmq, trace_chan_id, 288 cpu_metadata); 289 290 /* 291 * Otherwise, version 0 trace IDs are global so save them into every 292 * queue. 293 */ 294 for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { 295 int ret; 296 297 etmq = etm->queues.queue_array[i].priv; 298 ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id, 299 cpu_metadata); 300 if (ret) 301 return ret; 302 } 303 304 return 0; 305 } 306 307 static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, 308 u64 hw_id) 309 { 310 int err; 311 u64 *cpu_data; 312 u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 313 314 cpu_data = get_cpu_data(etm, cpu); 315 if (cpu_data == NULL) 316 return -EINVAL; 317 318 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); 319 if (err) 320 return err; 321 322 /* 323 * if we are picking up the association from the packet, need to plug 324 * the correct trace ID into the metadata for setting up decoders later. 325 */ 326 return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); 327 } 328 329 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) 330 { 331 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 332 333 switch (cs_etm_magic) { 334 case __perf_cs_etmv3_magic: 335 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & 336 CORESIGHT_TRACE_ID_VAL_MASK); 337 break; 338 case __perf_cs_etmv4_magic: 339 case __perf_cs_ete_magic: 340 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & 341 CORESIGHT_TRACE_ID_VAL_MASK); 342 break; 343 default: 344 return -EINVAL; 345 } 346 return 0; 347 } 348 349 /* 350 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 351 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. 352 */ 353 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 354 { 355 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 356 357 switch (cs_etm_magic) { 358 case __perf_cs_etmv3_magic: 359 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; 360 break; 361 case __perf_cs_etmv4_magic: 362 case __perf_cs_ete_magic: 363 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; 364 break; 365 366 default: 367 return -EINVAL; 368 } 369 return 0; 370 } 371 372 /* 373 * Get a metadata index for a specific cpu from an array. 374 * 375 */ 376 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) 377 { 378 int i; 379 380 for (i = 0; i < etm->num_cpu; i++) { 381 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { 382 return i; 383 } 384 } 385 386 return -1; 387 } 388 389 /* 390 * Get a metadata for a specific cpu from an array. 391 * 392 */ 393 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) 394 { 395 int idx = get_cpu_data_idx(etm, cpu); 396 397 return (idx != -1) ? etm->metadata[idx] : NULL; 398 } 399 400 /* 401 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. 402 * 403 * The payload associates the Trace ID and the CPU. 404 * The routine is tolerant of seeing multiple packets with the same association, 405 * but a CPU / Trace ID association changing during a session is an error. 406 */ 407 static int cs_etm__process_aux_output_hw_id(struct perf_session *session, 408 union perf_event *event) 409 { 410 struct cs_etm_auxtrace *etm; 411 struct perf_sample sample; 412 struct evsel *evsel; 413 u64 hw_id; 414 int cpu, version, err; 415 416 /* extract and parse the HW ID */ 417 hw_id = event->aux_output_hw_id.hw_id; 418 version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); 419 420 /* check that we can handle this version */ 421 if (version > CS_AUX_HW_ID_CURR_VERSION) { 422 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", 423 version); 424 return -EINVAL; 425 } 426 427 /* get access to the etm metadata */ 428 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); 429 if (!etm || !etm->metadata) 430 return -EINVAL; 431 432 /* parse the sample to get the CPU */ 433 evsel = evlist__event2evsel(session->evlist, event); 434 if (!evsel) 435 return -EINVAL; 436 err = evsel__parse_sample(evsel, event, &sample); 437 if (err) 438 return err; 439 cpu = sample.cpu; 440 if (cpu == -1) { 441 /* no CPU in the sample - possibly recorded with an old version of perf */ 442 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); 443 return -EINVAL; 444 } 445 446 return cs_etm__process_trace_id_v0(etm, cpu, hw_id); 447 } 448 449 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 450 u8 trace_chan_id) 451 { 452 /* 453 * When a timestamp packet is encountered the backend code 454 * is stopped so that the front end has time to process packets 455 * that were accumulated in the traceID queue. Since there can 456 * be more than one channel per cs_etm_queue, we need to specify 457 * what traceID queue needs servicing. 458 */ 459 etmq->pending_timestamp_chan_id = trace_chan_id; 460 } 461 462 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 463 u8 *trace_chan_id) 464 { 465 struct cs_etm_packet_queue *packet_queue; 466 467 if (!etmq->pending_timestamp_chan_id) 468 return 0; 469 470 if (trace_chan_id) 471 *trace_chan_id = etmq->pending_timestamp_chan_id; 472 473 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 474 etmq->pending_timestamp_chan_id); 475 if (!packet_queue) 476 return 0; 477 478 /* Acknowledge pending status */ 479 etmq->pending_timestamp_chan_id = 0; 480 481 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 482 return packet_queue->cs_timestamp; 483 } 484 485 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 486 { 487 int i; 488 489 queue->head = 0; 490 queue->tail = 0; 491 queue->packet_count = 0; 492 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 493 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 494 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 495 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 496 queue->packet_buffer[i].instr_count = 0; 497 queue->packet_buffer[i].last_instr_taken_branch = false; 498 queue->packet_buffer[i].last_instr_size = 0; 499 queue->packet_buffer[i].last_instr_type = 0; 500 queue->packet_buffer[i].last_instr_subtype = 0; 501 queue->packet_buffer[i].last_instr_cond = 0; 502 queue->packet_buffer[i].flags = 0; 503 queue->packet_buffer[i].exception_number = UINT32_MAX; 504 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 505 queue->packet_buffer[i].cpu = INT_MIN; 506 } 507 } 508 509 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 510 { 511 int idx; 512 struct int_node *inode; 513 struct cs_etm_traceid_queue *tidq; 514 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 515 516 intlist__for_each_entry(inode, traceid_queues_list) { 517 idx = (int)(intptr_t)inode->priv; 518 tidq = etmq->traceid_queues[idx]; 519 cs_etm__clear_packet_queue(&tidq->packet_queue); 520 } 521 } 522 523 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 524 struct cs_etm_traceid_queue *tidq, 525 u8 trace_chan_id) 526 { 527 int rc = -ENOMEM; 528 struct auxtrace_queue *queue; 529 struct cs_etm_auxtrace *etm = etmq->etm; 530 531 cs_etm__clear_packet_queue(&tidq->packet_queue); 532 533 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 534 tidq->trace_chan_id = trace_chan_id; 535 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; 536 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, 537 queue->tid); 538 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); 539 540 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 541 if (!tidq->packet) 542 goto out; 543 544 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 545 if (!tidq->prev_packet) 546 goto out_free; 547 548 if (etm->synth_opts.last_branch) { 549 size_t sz = sizeof(struct branch_stack); 550 551 sz += etm->synth_opts.last_branch_sz * 552 sizeof(struct branch_entry); 553 tidq->last_branch = zalloc(sz); 554 if (!tidq->last_branch) 555 goto out_free; 556 tidq->last_branch_rb = zalloc(sz); 557 if (!tidq->last_branch_rb) 558 goto out_free; 559 } 560 561 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 562 if (!tidq->event_buf) 563 goto out_free; 564 565 return 0; 566 567 out_free: 568 zfree(&tidq->last_branch_rb); 569 zfree(&tidq->last_branch); 570 zfree(&tidq->prev_packet); 571 zfree(&tidq->packet); 572 out: 573 return rc; 574 } 575 576 static struct cs_etm_traceid_queue 577 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 578 { 579 int idx; 580 struct int_node *inode; 581 struct intlist *traceid_queues_list; 582 struct cs_etm_traceid_queue *tidq, **traceid_queues; 583 struct cs_etm_auxtrace *etm = etmq->etm; 584 585 if (etm->per_thread_decoding) 586 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 587 588 traceid_queues_list = etmq->traceid_queues_list; 589 590 /* 591 * Check if the traceid_queue exist for this traceID by looking 592 * in the queue list. 593 */ 594 inode = intlist__find(traceid_queues_list, trace_chan_id); 595 if (inode) { 596 idx = (int)(intptr_t)inode->priv; 597 return etmq->traceid_queues[idx]; 598 } 599 600 /* We couldn't find a traceid_queue for this traceID, allocate one */ 601 tidq = malloc(sizeof(*tidq)); 602 if (!tidq) 603 return NULL; 604 605 memset(tidq, 0, sizeof(*tidq)); 606 607 /* Get a valid index for the new traceid_queue */ 608 idx = intlist__nr_entries(traceid_queues_list); 609 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 610 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 611 if (!inode) 612 goto out_free; 613 614 /* Associate this traceID with this index */ 615 inode->priv = (void *)(intptr_t)idx; 616 617 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 618 goto out_free; 619 620 /* Grow the traceid_queues array by one unit */ 621 traceid_queues = etmq->traceid_queues; 622 traceid_queues = reallocarray(traceid_queues, 623 idx + 1, 624 sizeof(*traceid_queues)); 625 626 /* 627 * On failure reallocarray() returns NULL and the original block of 628 * memory is left untouched. 629 */ 630 if (!traceid_queues) 631 goto out_free; 632 633 traceid_queues[idx] = tidq; 634 etmq->traceid_queues = traceid_queues; 635 636 return etmq->traceid_queues[idx]; 637 638 out_free: 639 /* 640 * Function intlist__remove() removes the inode from the list 641 * and delete the memory associated to it. 642 */ 643 intlist__remove(traceid_queues_list, inode); 644 free(tidq); 645 646 return NULL; 647 } 648 649 struct cs_etm_packet_queue 650 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 651 { 652 struct cs_etm_traceid_queue *tidq; 653 654 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 655 if (tidq) 656 return &tidq->packet_queue; 657 658 return NULL; 659 } 660 661 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 662 struct cs_etm_traceid_queue *tidq) 663 { 664 struct cs_etm_packet *tmp; 665 666 if (etm->synth_opts.branches || etm->synth_opts.last_branch || 667 etm->synth_opts.instructions) { 668 /* 669 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 670 * the next incoming packet. 671 * 672 * Threads and exception levels are also tracked for both the 673 * previous and current packets. This is because the previous 674 * packet is used for the 'from' IP for branch samples, so the 675 * thread at that time must also be assigned to that sample. 676 * Across discontinuity packets the thread can change, so by 677 * tracking the thread for the previous packet the branch sample 678 * will have the correct info. 679 */ 680 tmp = tidq->packet; 681 tidq->packet = tidq->prev_packet; 682 tidq->prev_packet = tmp; 683 tidq->prev_packet_el = tidq->el; 684 thread__put(tidq->prev_packet_thread); 685 tidq->prev_packet_thread = thread__get(tidq->thread); 686 } 687 } 688 689 static void cs_etm__packet_dump(const char *pkt_string) 690 { 691 const char *color = PERF_COLOR_BLUE; 692 int len = strlen(pkt_string); 693 694 if (len && (pkt_string[len-1] == '\n')) 695 color_fprintf(stdout, color, " %s", pkt_string); 696 else 697 color_fprintf(stdout, color, " %s\n", pkt_string); 698 699 fflush(stdout); 700 } 701 702 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 703 struct cs_etm_auxtrace *etm, int t_idx, 704 int m_idx, u32 etmidr) 705 { 706 u64 **metadata = etm->metadata; 707 708 t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 709 t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; 710 t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; 711 } 712 713 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 714 struct cs_etm_auxtrace *etm, int t_idx, 715 int m_idx) 716 { 717 u64 **metadata = etm->metadata; 718 719 t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i; 720 t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0]; 721 t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1]; 722 t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2]; 723 t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8]; 724 t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR]; 725 t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR]; 726 } 727 728 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 729 struct cs_etm_auxtrace *etm, int t_idx, 730 int m_idx) 731 { 732 u64 **metadata = etm->metadata; 733 734 t_params[t_idx].protocol = CS_ETM_PROTO_ETE; 735 t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0]; 736 t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1]; 737 t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2]; 738 t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8]; 739 t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR]; 740 t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR]; 741 t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH]; 742 } 743 744 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 745 struct cs_etm_auxtrace *etm, 746 enum cs_etm_format format, 747 int sample_cpu, 748 int decoders) 749 { 750 int t_idx, m_idx; 751 u32 etmidr; 752 u64 architecture; 753 754 for (t_idx = 0; t_idx < decoders; t_idx++) { 755 if (format == FORMATTED) 756 m_idx = t_idx; 757 else { 758 m_idx = get_cpu_data_idx(etm, sample_cpu); 759 if (m_idx == -1) { 760 pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n"); 761 m_idx = 0; 762 } 763 } 764 765 architecture = etm->metadata[m_idx][CS_ETM_MAGIC]; 766 767 switch (architecture) { 768 case __perf_cs_etmv3_magic: 769 etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR]; 770 cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr); 771 break; 772 case __perf_cs_etmv4_magic: 773 cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx); 774 break; 775 case __perf_cs_ete_magic: 776 cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx); 777 break; 778 default: 779 return -EINVAL; 780 } 781 } 782 783 return 0; 784 } 785 786 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 787 struct cs_etm_queue *etmq, 788 enum cs_etm_decoder_operation mode) 789 { 790 int ret = -EINVAL; 791 792 if (!(mode < CS_ETM_OPERATION_MAX)) 793 goto out; 794 795 d_params->packet_printer = cs_etm__packet_dump; 796 d_params->operation = mode; 797 d_params->data = etmq; 798 d_params->formatted = etmq->format == FORMATTED; 799 d_params->fsyncs = false; 800 d_params->hsyncs = false; 801 d_params->frame_aligned = true; 802 803 ret = 0; 804 out: 805 return ret; 806 } 807 808 static void cs_etm__dump_event(struct cs_etm_queue *etmq, 809 struct auxtrace_buffer *buffer) 810 { 811 int ret; 812 const char *color = PERF_COLOR_BLUE; 813 size_t buffer_used = 0; 814 815 fprintf(stdout, "\n"); 816 color_fprintf(stdout, color, 817 ". ... CoreSight %s Trace data: size %#zx bytes\n", 818 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 819 820 do { 821 size_t consumed; 822 823 ret = cs_etm_decoder__process_data_block( 824 etmq->decoder, buffer->offset, 825 &((u8 *)buffer->data)[buffer_used], 826 buffer->size - buffer_used, &consumed); 827 if (ret) 828 break; 829 830 buffer_used += consumed; 831 } while (buffer_used < buffer->size); 832 833 cs_etm_decoder__reset(etmq->decoder); 834 } 835 836 static int cs_etm__flush_events(struct perf_session *session, 837 const struct perf_tool *tool) 838 { 839 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 840 struct cs_etm_auxtrace, 841 auxtrace); 842 if (dump_trace) 843 return 0; 844 845 if (!tool->ordered_events) 846 return -EINVAL; 847 848 if (etm->timeless_decoding) { 849 /* 850 * Pass tid = -1 to process all queues. But likely they will have 851 * already been processed on PERF_RECORD_EXIT anyway. 852 */ 853 return cs_etm__process_timeless_queues(etm, -1); 854 } 855 856 return cs_etm__process_timestamped_queues(etm); 857 } 858 859 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 860 { 861 int idx; 862 uintptr_t priv; 863 struct int_node *inode, *tmp; 864 struct cs_etm_traceid_queue *tidq; 865 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 866 867 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 868 priv = (uintptr_t)inode->priv; 869 idx = priv; 870 871 /* Free this traceid_queue from the array */ 872 tidq = etmq->traceid_queues[idx]; 873 thread__zput(tidq->thread); 874 thread__zput(tidq->prev_packet_thread); 875 zfree(&tidq->event_buf); 876 zfree(&tidq->last_branch); 877 zfree(&tidq->last_branch_rb); 878 zfree(&tidq->prev_packet); 879 zfree(&tidq->packet); 880 zfree(&tidq); 881 882 /* 883 * Function intlist__remove() removes the inode from the list 884 * and delete the memory associated to it. 885 */ 886 intlist__remove(traceid_queues_list, inode); 887 } 888 889 /* Then the RB tree itself */ 890 intlist__delete(traceid_queues_list); 891 etmq->traceid_queues_list = NULL; 892 893 /* finally free the traceid_queues array */ 894 zfree(&etmq->traceid_queues); 895 } 896 897 static void cs_etm__free_queue(void *priv) 898 { 899 struct int_node *inode, *tmp; 900 struct cs_etm_queue *etmq = priv; 901 902 if (!etmq) 903 return; 904 905 cs_etm_decoder__free(etmq->decoder); 906 cs_etm__free_traceid_queues(etmq); 907 908 /* First remove all traceID/metadata nodes for the RB tree */ 909 intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list) 910 intlist__remove(etmq->traceid_list, inode); 911 912 /* Then the RB tree itself */ 913 intlist__delete(etmq->traceid_list); 914 915 free(etmq); 916 } 917 918 static void cs_etm__free_events(struct perf_session *session) 919 { 920 unsigned int i; 921 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 922 struct cs_etm_auxtrace, 923 auxtrace); 924 struct auxtrace_queues *queues = &aux->queues; 925 926 for (i = 0; i < queues->nr_queues; i++) { 927 cs_etm__free_queue(queues->queue_array[i].priv); 928 queues->queue_array[i].priv = NULL; 929 } 930 931 auxtrace_queues__free(queues); 932 } 933 934 static void cs_etm__free(struct perf_session *session) 935 { 936 int i; 937 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 938 struct cs_etm_auxtrace, 939 auxtrace); 940 cs_etm__free_events(session); 941 session->auxtrace = NULL; 942 943 for (i = 0; i < aux->num_cpu; i++) 944 zfree(&aux->metadata[i]); 945 946 zfree(&aux->metadata); 947 zfree(&aux); 948 } 949 950 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, 951 struct evsel *evsel) 952 { 953 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 954 struct cs_etm_auxtrace, 955 auxtrace); 956 957 return evsel->core.attr.type == aux->pmu_type; 958 } 959 960 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, 961 ocsd_ex_level el) 962 { 963 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); 964 965 /* 966 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels 967 * running at EL1 assume everything is the host. 968 */ 969 if (pid_fmt == CS_ETM_PIDFMT_CTXTID) 970 return &etmq->etm->session->machines.host; 971 972 /* 973 * Not perfect, but otherwise assume anything in EL1 is the default 974 * guest, and everything else is the host. Distinguishing between guest 975 * and host userspaces isn't currently supported either. Neither is 976 * multiple guest support. All this does is reduce the likeliness of 977 * decode errors where we look into the host kernel maps when it should 978 * have been the guest maps. 979 */ 980 switch (el) { 981 case ocsd_EL1: 982 return machines__find_guest(&etmq->etm->session->machines, 983 DEFAULT_GUEST_KERNEL_ID); 984 case ocsd_EL3: 985 case ocsd_EL2: 986 case ocsd_EL0: 987 case ocsd_EL_unknown: 988 default: 989 return &etmq->etm->session->machines.host; 990 } 991 } 992 993 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, 994 ocsd_ex_level el) 995 { 996 struct machine *machine = cs_etm__get_machine(etmq, el); 997 998 if (address >= machine__kernel_start(machine)) { 999 if (machine__is_host(machine)) 1000 return PERF_RECORD_MISC_KERNEL; 1001 else 1002 return PERF_RECORD_MISC_GUEST_KERNEL; 1003 } else { 1004 if (machine__is_host(machine)) 1005 return PERF_RECORD_MISC_USER; 1006 else { 1007 /* 1008 * Can't really happen at the moment because 1009 * cs_etm__get_machine() will always return 1010 * machines.host for any non EL1 trace. 1011 */ 1012 return PERF_RECORD_MISC_GUEST_USER; 1013 } 1014 } 1015 } 1016 1017 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 1018 u64 address, size_t size, u8 *buffer, 1019 const ocsd_mem_space_acc_t mem_space) 1020 { 1021 u8 cpumode; 1022 u64 offset; 1023 int len; 1024 struct addr_location al; 1025 struct dso *dso; 1026 struct cs_etm_traceid_queue *tidq; 1027 int ret = 0; 1028 1029 if (!etmq) 1030 return 0; 1031 1032 addr_location__init(&al); 1033 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1034 if (!tidq) 1035 goto out; 1036 1037 /* 1038 * We've already tracked EL along side the PID in cs_etm__set_thread() 1039 * so double check that it matches what OpenCSD thinks as well. It 1040 * doesn't distinguish between EL0 and EL1 for this mem access callback 1041 * so we had to do the extra tracking. Skip validation if it's any of 1042 * the 'any' values. 1043 */ 1044 if (!(mem_space == OCSD_MEM_SPACE_ANY || 1045 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { 1046 if (mem_space & OCSD_MEM_SPACE_EL1N) { 1047 /* Includes both non secure EL1 and EL0 */ 1048 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); 1049 } else if (mem_space & OCSD_MEM_SPACE_EL2) 1050 assert(tidq->el == ocsd_EL2); 1051 else if (mem_space & OCSD_MEM_SPACE_EL3) 1052 assert(tidq->el == ocsd_EL3); 1053 } 1054 1055 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); 1056 1057 if (!thread__find_map(tidq->thread, cpumode, address, &al)) 1058 goto out; 1059 1060 dso = map__dso(al.map); 1061 if (!dso) 1062 goto out; 1063 1064 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && 1065 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) 1066 goto out; 1067 1068 offset = map__map_ip(al.map, address); 1069 1070 map__load(al.map); 1071 1072 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), 1073 offset, buffer, size); 1074 1075 if (len <= 0) { 1076 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" 1077 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); 1078 if (!dso__auxtrace_warned(dso)) { 1079 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", 1080 address, 1081 dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); 1082 dso__set_auxtrace_warned(dso); 1083 } 1084 goto out; 1085 } 1086 ret = len; 1087 out: 1088 addr_location__exit(&al); 1089 return ret; 1090 } 1091 1092 static struct cs_etm_queue *cs_etm__alloc_queue(void) 1093 { 1094 struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); 1095 if (!etmq) 1096 return NULL; 1097 1098 etmq->traceid_queues_list = intlist__new(NULL); 1099 if (!etmq->traceid_queues_list) 1100 goto out_free; 1101 1102 /* 1103 * Create an RB tree for traceID-metadata tuple. Since the conversion 1104 * has to be made for each packet that gets decoded, optimizing access 1105 * in anything other than a sequential array is worth doing. 1106 */ 1107 etmq->traceid_list = intlist__new(NULL); 1108 if (!etmq->traceid_list) 1109 goto out_free; 1110 1111 return etmq; 1112 1113 out_free: 1114 intlist__delete(etmq->traceid_queues_list); 1115 free(etmq); 1116 1117 return NULL; 1118 } 1119 1120 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 1121 struct auxtrace_queue *queue, 1122 unsigned int queue_nr) 1123 { 1124 struct cs_etm_queue *etmq = queue->priv; 1125 1126 if (etmq) 1127 return 0; 1128 1129 etmq = cs_etm__alloc_queue(); 1130 1131 if (!etmq) 1132 return -ENOMEM; 1133 1134 queue->priv = etmq; 1135 etmq->etm = etm; 1136 etmq->queue_nr = queue_nr; 1137 queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ 1138 etmq->offset = 0; 1139 1140 return 0; 1141 } 1142 1143 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, 1144 struct cs_etm_queue *etmq, 1145 unsigned int queue_nr) 1146 { 1147 int ret = 0; 1148 unsigned int cs_queue_nr; 1149 u8 trace_chan_id; 1150 u64 cs_timestamp; 1151 1152 /* 1153 * We are under a CPU-wide trace scenario. As such we need to know 1154 * when the code that generated the traces started to execute so that 1155 * it can be correlated with execution on other CPUs. So we get a 1156 * handle on the beginning of traces and decode until we find a 1157 * timestamp. The timestamp is then added to the auxtrace min heap 1158 * in order to know what nibble (of all the etmqs) to decode first. 1159 */ 1160 while (1) { 1161 /* 1162 * Fetch an aux_buffer from this etmq. Bail if no more 1163 * blocks or an error has been encountered. 1164 */ 1165 ret = cs_etm__get_data_block(etmq); 1166 if (ret <= 0) 1167 goto out; 1168 1169 /* 1170 * Run decoder on the trace block. The decoder will stop when 1171 * encountering a CS timestamp, a full packet queue or the end of 1172 * trace for that block. 1173 */ 1174 ret = cs_etm__decode_data_block(etmq); 1175 if (ret) 1176 goto out; 1177 1178 /* 1179 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 1180 * the timestamp calculation for us. 1181 */ 1182 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1183 1184 /* We found a timestamp, no need to continue. */ 1185 if (cs_timestamp) 1186 break; 1187 1188 /* 1189 * We didn't find a timestamp so empty all the traceid packet 1190 * queues before looking for another timestamp packet, either 1191 * in the current data block or a new one. Packets that were 1192 * just decoded are useless since no timestamp has been 1193 * associated with them. As such simply discard them. 1194 */ 1195 cs_etm__clear_all_packet_queues(etmq); 1196 } 1197 1198 /* 1199 * We have a timestamp. Add it to the min heap to reflect when 1200 * instructions conveyed by the range packets of this traceID queue 1201 * started to execute. Once the same has been done for all the traceID 1202 * queues of each etmq, redenring and decoding can start in 1203 * chronological order. 1204 * 1205 * Note that packets decoded above are still in the traceID's packet 1206 * queue and will be processed in cs_etm__process_timestamped_queues(). 1207 */ 1208 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1209 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 1210 out: 1211 return ret; 1212 } 1213 1214 static inline 1215 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 1216 struct cs_etm_traceid_queue *tidq) 1217 { 1218 struct branch_stack *bs_src = tidq->last_branch_rb; 1219 struct branch_stack *bs_dst = tidq->last_branch; 1220 size_t nr = 0; 1221 1222 /* 1223 * Set the number of records before early exit: ->nr is used to 1224 * determine how many branches to copy from ->entries. 1225 */ 1226 bs_dst->nr = bs_src->nr; 1227 1228 /* 1229 * Early exit when there is nothing to copy. 1230 */ 1231 if (!bs_src->nr) 1232 return; 1233 1234 /* 1235 * As bs_src->entries is a circular buffer, we need to copy from it in 1236 * two steps. First, copy the branches from the most recently inserted 1237 * branch ->last_branch_pos until the end of bs_src->entries buffer. 1238 */ 1239 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 1240 memcpy(&bs_dst->entries[0], 1241 &bs_src->entries[tidq->last_branch_pos], 1242 sizeof(struct branch_entry) * nr); 1243 1244 /* 1245 * If we wrapped around at least once, the branches from the beginning 1246 * of the bs_src->entries buffer and until the ->last_branch_pos element 1247 * are older valid branches: copy them over. The total number of 1248 * branches copied over will be equal to the number of branches asked by 1249 * the user in last_branch_sz. 1250 */ 1251 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 1252 memcpy(&bs_dst->entries[nr], 1253 &bs_src->entries[0], 1254 sizeof(struct branch_entry) * tidq->last_branch_pos); 1255 } 1256 } 1257 1258 static inline 1259 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 1260 { 1261 tidq->last_branch_pos = 0; 1262 tidq->last_branch_rb->nr = 0; 1263 } 1264 1265 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 1266 u8 trace_chan_id, u64 addr) 1267 { 1268 u8 instrBytes[2]; 1269 1270 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), 1271 instrBytes, 0); 1272 /* 1273 * T32 instruction size is indicated by bits[15:11] of the first 1274 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 1275 * denote a 32-bit instruction. 1276 */ 1277 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 1278 } 1279 1280 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 1281 { 1282 /* 1283 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't 1284 * appear in samples. 1285 */ 1286 if (packet->sample_type == CS_ETM_DISCONTINUITY || 1287 packet->sample_type == CS_ETM_EXCEPTION) 1288 return 0; 1289 1290 return packet->start_addr; 1291 } 1292 1293 static inline 1294 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 1295 { 1296 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 1297 if (packet->sample_type == CS_ETM_DISCONTINUITY) 1298 return 0; 1299 1300 return packet->end_addr - packet->last_instr_size; 1301 } 1302 1303 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 1304 u64 trace_chan_id, 1305 const struct cs_etm_packet *packet, 1306 u64 offset) 1307 { 1308 if (packet->isa == CS_ETM_ISA_T32) { 1309 u64 addr = packet->start_addr; 1310 1311 while (offset) { 1312 addr += cs_etm__t32_instr_size(etmq, 1313 trace_chan_id, addr); 1314 offset--; 1315 } 1316 return addr; 1317 } 1318 1319 /* Assume a 4 byte instruction size (A32/A64) */ 1320 return packet->start_addr + offset * 4; 1321 } 1322 1323 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 1324 struct cs_etm_traceid_queue *tidq) 1325 { 1326 struct branch_stack *bs = tidq->last_branch_rb; 1327 struct branch_entry *be; 1328 1329 /* 1330 * The branches are recorded in a circular buffer in reverse 1331 * chronological order: we start recording from the last element of the 1332 * buffer down. After writing the first element of the stack, move the 1333 * insert position back to the end of the buffer. 1334 */ 1335 if (!tidq->last_branch_pos) 1336 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 1337 1338 tidq->last_branch_pos -= 1; 1339 1340 be = &bs->entries[tidq->last_branch_pos]; 1341 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 1342 be->to = cs_etm__first_executed_instr(tidq->packet); 1343 /* No support for mispredict */ 1344 be->flags.mispred = 0; 1345 be->flags.predicted = 1; 1346 1347 /* 1348 * Increment bs->nr until reaching the number of last branches asked by 1349 * the user on the command line. 1350 */ 1351 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 1352 bs->nr += 1; 1353 } 1354 1355 static int cs_etm__inject_event(union perf_event *event, 1356 struct perf_sample *sample, u64 type) 1357 { 1358 event->header.size = perf_event__sample_event_size(sample, type, 0); 1359 return perf_event__synthesize_sample(event, type, 0, sample); 1360 } 1361 1362 1363 static int 1364 cs_etm__get_trace(struct cs_etm_queue *etmq) 1365 { 1366 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1367 struct auxtrace_buffer *old_buffer = aux_buffer; 1368 struct auxtrace_queue *queue; 1369 1370 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1371 1372 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1373 1374 /* If no more data, drop the previous auxtrace_buffer and return */ 1375 if (!aux_buffer) { 1376 if (old_buffer) 1377 auxtrace_buffer__drop_data(old_buffer); 1378 etmq->buf_len = 0; 1379 return 0; 1380 } 1381 1382 etmq->buffer = aux_buffer; 1383 1384 /* If the aux_buffer doesn't have data associated, try to load it */ 1385 if (!aux_buffer->data) { 1386 /* get the file desc associated with the perf data file */ 1387 int fd = perf_data__fd(etmq->etm->session->data); 1388 1389 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1390 if (!aux_buffer->data) 1391 return -ENOMEM; 1392 } 1393 1394 /* If valid, drop the previous buffer */ 1395 if (old_buffer) 1396 auxtrace_buffer__drop_data(old_buffer); 1397 1398 etmq->buf_used = 0; 1399 etmq->buf_len = aux_buffer->size; 1400 etmq->buf = aux_buffer->data; 1401 1402 return etmq->buf_len; 1403 } 1404 1405 static void cs_etm__set_thread(struct cs_etm_queue *etmq, 1406 struct cs_etm_traceid_queue *tidq, pid_t tid, 1407 ocsd_ex_level el) 1408 { 1409 struct machine *machine = cs_etm__get_machine(etmq, el); 1410 1411 if (tid != -1) { 1412 thread__zput(tidq->thread); 1413 tidq->thread = machine__find_thread(machine, -1, tid); 1414 } 1415 1416 /* Couldn't find a known thread */ 1417 if (!tidq->thread) 1418 tidq->thread = machine__idle_thread(machine); 1419 1420 tidq->el = el; 1421 } 1422 1423 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, 1424 u8 trace_chan_id, ocsd_ex_level el) 1425 { 1426 struct cs_etm_traceid_queue *tidq; 1427 1428 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1429 if (!tidq) 1430 return -EINVAL; 1431 1432 cs_etm__set_thread(etmq, tidq, tid, el); 1433 return 0; 1434 } 1435 1436 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1437 { 1438 return !!etmq->etm->timeless_decoding; 1439 } 1440 1441 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1442 u64 trace_chan_id, 1443 const struct cs_etm_packet *packet, 1444 struct perf_sample *sample) 1445 { 1446 /* 1447 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1448 * packet, so directly bail out with 'insn_len' = 0. 1449 */ 1450 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1451 sample->insn_len = 0; 1452 return; 1453 } 1454 1455 /* 1456 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1457 * cs_etm__t32_instr_size(). 1458 */ 1459 if (packet->isa == CS_ETM_ISA_T32) 1460 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1461 sample->ip); 1462 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1463 else 1464 sample->insn_len = 4; 1465 1466 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, 1467 (void *)sample->insn, 0); 1468 } 1469 1470 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) 1471 { 1472 struct cs_etm_auxtrace *etm = etmq->etm; 1473 1474 if (etm->has_virtual_ts) 1475 return tsc_to_perf_time(cs_timestamp, &etm->tc); 1476 else 1477 return cs_timestamp; 1478 } 1479 1480 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, 1481 struct cs_etm_traceid_queue *tidq) 1482 { 1483 struct cs_etm_auxtrace *etm = etmq->etm; 1484 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; 1485 1486 if (!etm->timeless_decoding && etm->has_virtual_ts) 1487 return packet_queue->cs_timestamp; 1488 else 1489 return etm->latest_kernel_timestamp; 1490 } 1491 1492 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1493 struct cs_etm_traceid_queue *tidq, 1494 u64 addr, u64 period) 1495 { 1496 int ret = 0; 1497 struct cs_etm_auxtrace *etm = etmq->etm; 1498 union perf_event *event = tidq->event_buf; 1499 struct perf_sample sample = {.ip = 0,}; 1500 1501 event->sample.header.type = PERF_RECORD_SAMPLE; 1502 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); 1503 event->sample.header.size = sizeof(struct perf_event_header); 1504 1505 /* Set time field based on etm auxtrace config. */ 1506 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1507 1508 sample.ip = addr; 1509 sample.pid = thread__pid(tidq->thread); 1510 sample.tid = thread__tid(tidq->thread); 1511 sample.id = etmq->etm->instructions_id; 1512 sample.stream_id = etmq->etm->instructions_id; 1513 sample.period = period; 1514 sample.cpu = tidq->packet->cpu; 1515 sample.flags = tidq->prev_packet->flags; 1516 sample.cpumode = event->sample.header.misc; 1517 1518 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1519 1520 if (etm->synth_opts.last_branch) 1521 sample.branch_stack = tidq->last_branch; 1522 1523 if (etm->synth_opts.inject) { 1524 ret = cs_etm__inject_event(event, &sample, 1525 etm->instructions_sample_type); 1526 if (ret) 1527 return ret; 1528 } 1529 1530 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1531 1532 if (ret) 1533 pr_err( 1534 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1535 ret); 1536 1537 return ret; 1538 } 1539 1540 /* 1541 * The cs etm packet encodes an instruction range between a branch target 1542 * and the next taken branch. Generate sample accordingly. 1543 */ 1544 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1545 struct cs_etm_traceid_queue *tidq) 1546 { 1547 int ret = 0; 1548 struct cs_etm_auxtrace *etm = etmq->etm; 1549 struct perf_sample sample = {.ip = 0,}; 1550 union perf_event *event = tidq->event_buf; 1551 struct dummy_branch_stack { 1552 u64 nr; 1553 u64 hw_idx; 1554 struct branch_entry entries; 1555 } dummy_bs; 1556 u64 ip; 1557 1558 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1559 1560 event->sample.header.type = PERF_RECORD_SAMPLE; 1561 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, 1562 tidq->prev_packet_el); 1563 event->sample.header.size = sizeof(struct perf_event_header); 1564 1565 /* Set time field based on etm auxtrace config. */ 1566 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1567 1568 sample.ip = ip; 1569 sample.pid = thread__pid(tidq->prev_packet_thread); 1570 sample.tid = thread__tid(tidq->prev_packet_thread); 1571 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1572 sample.id = etmq->etm->branches_id; 1573 sample.stream_id = etmq->etm->branches_id; 1574 sample.period = 1; 1575 sample.cpu = tidq->packet->cpu; 1576 sample.flags = tidq->prev_packet->flags; 1577 sample.cpumode = event->sample.header.misc; 1578 1579 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1580 &sample); 1581 1582 /* 1583 * perf report cannot handle events without a branch stack 1584 */ 1585 if (etm->synth_opts.last_branch) { 1586 dummy_bs = (struct dummy_branch_stack){ 1587 .nr = 1, 1588 .hw_idx = -1ULL, 1589 .entries = { 1590 .from = sample.ip, 1591 .to = sample.addr, 1592 }, 1593 }; 1594 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1595 } 1596 1597 if (etm->synth_opts.inject) { 1598 ret = cs_etm__inject_event(event, &sample, 1599 etm->branches_sample_type); 1600 if (ret) 1601 return ret; 1602 } 1603 1604 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1605 1606 if (ret) 1607 pr_err( 1608 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1609 ret); 1610 1611 return ret; 1612 } 1613 1614 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1615 struct perf_session *session) 1616 { 1617 struct evlist *evlist = session->evlist; 1618 struct evsel *evsel; 1619 struct perf_event_attr attr; 1620 bool found = false; 1621 u64 id; 1622 int err; 1623 1624 evlist__for_each_entry(evlist, evsel) { 1625 if (evsel->core.attr.type == etm->pmu_type) { 1626 found = true; 1627 break; 1628 } 1629 } 1630 1631 if (!found) { 1632 pr_debug("No selected events with CoreSight Trace data\n"); 1633 return 0; 1634 } 1635 1636 memset(&attr, 0, sizeof(struct perf_event_attr)); 1637 attr.size = sizeof(struct perf_event_attr); 1638 attr.type = PERF_TYPE_HARDWARE; 1639 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1640 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1641 PERF_SAMPLE_PERIOD; 1642 if (etm->timeless_decoding) 1643 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1644 else 1645 attr.sample_type |= PERF_SAMPLE_TIME; 1646 1647 attr.exclude_user = evsel->core.attr.exclude_user; 1648 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1649 attr.exclude_hv = evsel->core.attr.exclude_hv; 1650 attr.exclude_host = evsel->core.attr.exclude_host; 1651 attr.exclude_guest = evsel->core.attr.exclude_guest; 1652 attr.sample_id_all = evsel->core.attr.sample_id_all; 1653 attr.read_format = evsel->core.attr.read_format; 1654 1655 /* create new id val to be a fixed offset from evsel id */ 1656 id = evsel->core.id[0] + 1000000000; 1657 1658 if (!id) 1659 id = 1; 1660 1661 if (etm->synth_opts.branches) { 1662 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1663 attr.sample_period = 1; 1664 attr.sample_type |= PERF_SAMPLE_ADDR; 1665 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1666 if (err) 1667 return err; 1668 etm->branches_sample_type = attr.sample_type; 1669 etm->branches_id = id; 1670 id += 1; 1671 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1672 } 1673 1674 if (etm->synth_opts.last_branch) { 1675 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1676 /* 1677 * We don't use the hardware index, but the sample generation 1678 * code uses the new format branch_stack with this field, 1679 * so the event attributes must indicate that it's present. 1680 */ 1681 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1682 } 1683 1684 if (etm->synth_opts.instructions) { 1685 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1686 attr.sample_period = etm->synth_opts.period; 1687 etm->instructions_sample_period = attr.sample_period; 1688 err = perf_session__deliver_synth_attr_event(session, &attr, id); 1689 if (err) 1690 return err; 1691 etm->instructions_sample_type = attr.sample_type; 1692 etm->instructions_id = id; 1693 id += 1; 1694 } 1695 1696 return 0; 1697 } 1698 1699 static int cs_etm__sample(struct cs_etm_queue *etmq, 1700 struct cs_etm_traceid_queue *tidq) 1701 { 1702 struct cs_etm_auxtrace *etm = etmq->etm; 1703 int ret; 1704 u8 trace_chan_id = tidq->trace_chan_id; 1705 u64 instrs_prev; 1706 1707 /* Get instructions remainder from previous packet */ 1708 instrs_prev = tidq->period_instructions; 1709 1710 tidq->period_instructions += tidq->packet->instr_count; 1711 1712 /* 1713 * Record a branch when the last instruction in 1714 * PREV_PACKET is a branch. 1715 */ 1716 if (etm->synth_opts.last_branch && 1717 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1718 tidq->prev_packet->last_instr_taken_branch) 1719 cs_etm__update_last_branch_rb(etmq, tidq); 1720 1721 if (etm->synth_opts.instructions && 1722 tidq->period_instructions >= etm->instructions_sample_period) { 1723 /* 1724 * Emit instruction sample periodically 1725 * TODO: allow period to be defined in cycles and clock time 1726 */ 1727 1728 /* 1729 * Below diagram demonstrates the instruction samples 1730 * generation flows: 1731 * 1732 * Instrs Instrs Instrs Instrs 1733 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1734 * | | | | 1735 * V V V V 1736 * -------------------------------------------------- 1737 * ^ ^ 1738 * | | 1739 * Period Period 1740 * instructions(Pi) instructions(Pi') 1741 * 1742 * | | 1743 * \---------------- -----------------/ 1744 * V 1745 * tidq->packet->instr_count 1746 * 1747 * Instrs Sample(n...) are the synthesised samples occurring 1748 * every etm->instructions_sample_period instructions - as 1749 * defined on the perf command line. Sample(n) is being the 1750 * last sample before the current etm packet, n+1 to n+3 1751 * samples are generated from the current etm packet. 1752 * 1753 * tidq->packet->instr_count represents the number of 1754 * instructions in the current etm packet. 1755 * 1756 * Period instructions (Pi) contains the number of 1757 * instructions executed after the sample point(n) from the 1758 * previous etm packet. This will always be less than 1759 * etm->instructions_sample_period. 1760 * 1761 * When generate new samples, it combines with two parts 1762 * instructions, one is the tail of the old packet and another 1763 * is the head of the new coming packet, to generate 1764 * sample(n+1); sample(n+2) and sample(n+3) consume the 1765 * instructions with sample period. After sample(n+3), the rest 1766 * instructions will be used by later packet and it is assigned 1767 * to tidq->period_instructions for next round calculation. 1768 */ 1769 1770 /* 1771 * Get the initial offset into the current packet instructions; 1772 * entry conditions ensure that instrs_prev is less than 1773 * etm->instructions_sample_period. 1774 */ 1775 u64 offset = etm->instructions_sample_period - instrs_prev; 1776 u64 addr; 1777 1778 /* Prepare last branches for instruction sample */ 1779 if (etm->synth_opts.last_branch) 1780 cs_etm__copy_last_branch_rb(etmq, tidq); 1781 1782 while (tidq->period_instructions >= 1783 etm->instructions_sample_period) { 1784 /* 1785 * Calculate the address of the sampled instruction (-1 1786 * as sample is reported as though instruction has just 1787 * been executed, but PC has not advanced to next 1788 * instruction) 1789 */ 1790 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1791 tidq->packet, offset - 1); 1792 ret = cs_etm__synth_instruction_sample( 1793 etmq, tidq, addr, 1794 etm->instructions_sample_period); 1795 if (ret) 1796 return ret; 1797 1798 offset += etm->instructions_sample_period; 1799 tidq->period_instructions -= 1800 etm->instructions_sample_period; 1801 } 1802 } 1803 1804 if (etm->synth_opts.branches) { 1805 bool generate_sample = false; 1806 1807 /* Generate sample for tracing on packet */ 1808 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1809 generate_sample = true; 1810 1811 /* Generate sample for branch taken packet */ 1812 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1813 tidq->prev_packet->last_instr_taken_branch) 1814 generate_sample = true; 1815 1816 if (generate_sample) { 1817 ret = cs_etm__synth_branch_sample(etmq, tidq); 1818 if (ret) 1819 return ret; 1820 } 1821 } 1822 1823 cs_etm__packet_swap(etm, tidq); 1824 1825 return 0; 1826 } 1827 1828 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1829 { 1830 /* 1831 * When the exception packet is inserted, whether the last instruction 1832 * in previous range packet is taken branch or not, we need to force 1833 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1834 * to generate branch sample for the instruction range before the 1835 * exception is trapped to kernel or before the exception returning. 1836 * 1837 * The exception packet includes the dummy address values, so don't 1838 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1839 * for generating instruction and branch samples. 1840 */ 1841 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1842 tidq->prev_packet->last_instr_taken_branch = true; 1843 1844 return 0; 1845 } 1846 1847 static int cs_etm__flush(struct cs_etm_queue *etmq, 1848 struct cs_etm_traceid_queue *tidq) 1849 { 1850 int err = 0; 1851 struct cs_etm_auxtrace *etm = etmq->etm; 1852 1853 /* Handle start tracing packet */ 1854 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1855 goto swap_packet; 1856 1857 if (etmq->etm->synth_opts.last_branch && 1858 etmq->etm->synth_opts.instructions && 1859 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1860 u64 addr; 1861 1862 /* Prepare last branches for instruction sample */ 1863 cs_etm__copy_last_branch_rb(etmq, tidq); 1864 1865 /* 1866 * Generate a last branch event for the branches left in the 1867 * circular buffer at the end of the trace. 1868 * 1869 * Use the address of the end of the last reported execution 1870 * range 1871 */ 1872 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1873 1874 err = cs_etm__synth_instruction_sample( 1875 etmq, tidq, addr, 1876 tidq->period_instructions); 1877 if (err) 1878 return err; 1879 1880 tidq->period_instructions = 0; 1881 1882 } 1883 1884 if (etm->synth_opts.branches && 1885 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1886 err = cs_etm__synth_branch_sample(etmq, tidq); 1887 if (err) 1888 return err; 1889 } 1890 1891 swap_packet: 1892 cs_etm__packet_swap(etm, tidq); 1893 1894 /* Reset last branches after flush the trace */ 1895 if (etm->synth_opts.last_branch) 1896 cs_etm__reset_last_branch_rb(tidq); 1897 1898 return err; 1899 } 1900 1901 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1902 struct cs_etm_traceid_queue *tidq) 1903 { 1904 int err; 1905 1906 /* 1907 * It has no new packet coming and 'etmq->packet' contains the stale 1908 * packet which was set at the previous time with packets swapping; 1909 * so skip to generate branch sample to avoid stale packet. 1910 * 1911 * For this case only flush branch stack and generate a last branch 1912 * event for the branches left in the circular buffer at the end of 1913 * the trace. 1914 */ 1915 if (etmq->etm->synth_opts.last_branch && 1916 etmq->etm->synth_opts.instructions && 1917 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1918 u64 addr; 1919 1920 /* Prepare last branches for instruction sample */ 1921 cs_etm__copy_last_branch_rb(etmq, tidq); 1922 1923 /* 1924 * Use the address of the end of the last reported execution 1925 * range. 1926 */ 1927 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1928 1929 err = cs_etm__synth_instruction_sample( 1930 etmq, tidq, addr, 1931 tidq->period_instructions); 1932 if (err) 1933 return err; 1934 1935 tidq->period_instructions = 0; 1936 } 1937 1938 return 0; 1939 } 1940 /* 1941 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1942 * if need be. 1943 * Returns: < 0 if error 1944 * = 0 if no more auxtrace_buffer to read 1945 * > 0 if the current buffer isn't empty yet 1946 */ 1947 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1948 { 1949 int ret; 1950 1951 if (!etmq->buf_len) { 1952 ret = cs_etm__get_trace(etmq); 1953 if (ret <= 0) 1954 return ret; 1955 /* 1956 * We cannot assume consecutive blocks in the data file 1957 * are contiguous, reset the decoder to force re-sync. 1958 */ 1959 ret = cs_etm_decoder__reset(etmq->decoder); 1960 if (ret) 1961 return ret; 1962 } 1963 1964 return etmq->buf_len; 1965 } 1966 1967 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 1968 struct cs_etm_packet *packet, 1969 u64 end_addr) 1970 { 1971 /* Initialise to keep compiler happy */ 1972 u16 instr16 = 0; 1973 u32 instr32 = 0; 1974 u64 addr; 1975 1976 switch (packet->isa) { 1977 case CS_ETM_ISA_T32: 1978 /* 1979 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1980 * 1981 * b'15 b'8 1982 * +-----------------+--------+ 1983 * | 1 1 0 1 1 1 1 1 | imm8 | 1984 * +-----------------+--------+ 1985 * 1986 * According to the specification, it only defines SVC for T32 1987 * with 16 bits instruction and has no definition for 32bits; 1988 * so below only read 2 bytes as instruction size for T32. 1989 */ 1990 addr = end_addr - 2; 1991 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), 1992 (u8 *)&instr16, 0); 1993 if ((instr16 & 0xFF00) == 0xDF00) 1994 return true; 1995 1996 break; 1997 case CS_ETM_ISA_A32: 1998 /* 1999 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 2000 * 2001 * b'31 b'28 b'27 b'24 2002 * +---------+---------+-------------------------+ 2003 * | !1111 | 1 1 1 1 | imm24 | 2004 * +---------+---------+-------------------------+ 2005 */ 2006 addr = end_addr - 4; 2007 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2008 (u8 *)&instr32, 0); 2009 if ((instr32 & 0x0F000000) == 0x0F000000 && 2010 (instr32 & 0xF0000000) != 0xF0000000) 2011 return true; 2012 2013 break; 2014 case CS_ETM_ISA_A64: 2015 /* 2016 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 2017 * 2018 * b'31 b'21 b'4 b'0 2019 * +-----------------------+---------+-----------+ 2020 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 2021 * +-----------------------+---------+-----------+ 2022 */ 2023 addr = end_addr - 4; 2024 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2025 (u8 *)&instr32, 0); 2026 if ((instr32 & 0xFFE0001F) == 0xd4000001) 2027 return true; 2028 2029 break; 2030 case CS_ETM_ISA_UNKNOWN: 2031 default: 2032 break; 2033 } 2034 2035 return false; 2036 } 2037 2038 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 2039 struct cs_etm_traceid_queue *tidq, u64 magic) 2040 { 2041 u8 trace_chan_id = tidq->trace_chan_id; 2042 struct cs_etm_packet *packet = tidq->packet; 2043 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2044 2045 if (magic == __perf_cs_etmv3_magic) 2046 if (packet->exception_number == CS_ETMV3_EXC_SVC) 2047 return true; 2048 2049 /* 2050 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 2051 * HVC cases; need to check if it's SVC instruction based on 2052 * packet address. 2053 */ 2054 if (magic == __perf_cs_etmv4_magic) { 2055 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2056 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2057 prev_packet->end_addr)) 2058 return true; 2059 } 2060 2061 return false; 2062 } 2063 2064 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 2065 u64 magic) 2066 { 2067 struct cs_etm_packet *packet = tidq->packet; 2068 2069 if (magic == __perf_cs_etmv3_magic) 2070 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 2071 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 2072 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 2073 packet->exception_number == CS_ETMV3_EXC_IRQ || 2074 packet->exception_number == CS_ETMV3_EXC_FIQ) 2075 return true; 2076 2077 if (magic == __perf_cs_etmv4_magic) 2078 if (packet->exception_number == CS_ETMV4_EXC_RESET || 2079 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 2080 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 2081 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 2082 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 2083 packet->exception_number == CS_ETMV4_EXC_IRQ || 2084 packet->exception_number == CS_ETMV4_EXC_FIQ) 2085 return true; 2086 2087 return false; 2088 } 2089 2090 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 2091 struct cs_etm_traceid_queue *tidq, 2092 u64 magic) 2093 { 2094 u8 trace_chan_id = tidq->trace_chan_id; 2095 struct cs_etm_packet *packet = tidq->packet; 2096 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2097 2098 if (magic == __perf_cs_etmv3_magic) 2099 if (packet->exception_number == CS_ETMV3_EXC_SMC || 2100 packet->exception_number == CS_ETMV3_EXC_HYP || 2101 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 2102 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 2103 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 2104 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 2105 packet->exception_number == CS_ETMV3_EXC_GENERIC) 2106 return true; 2107 2108 if (magic == __perf_cs_etmv4_magic) { 2109 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 2110 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 2111 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 2112 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 2113 return true; 2114 2115 /* 2116 * For CS_ETMV4_EXC_CALL, except SVC other instructions 2117 * (SMC, HVC) are taken as sync exceptions. 2118 */ 2119 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2120 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2121 prev_packet->end_addr)) 2122 return true; 2123 2124 /* 2125 * ETMv4 has 5 bits for exception number; if the numbers 2126 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 2127 * they are implementation defined exceptions. 2128 * 2129 * For this case, simply take it as sync exception. 2130 */ 2131 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 2132 packet->exception_number <= CS_ETMV4_EXC_END) 2133 return true; 2134 } 2135 2136 return false; 2137 } 2138 2139 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 2140 struct cs_etm_traceid_queue *tidq) 2141 { 2142 struct cs_etm_packet *packet = tidq->packet; 2143 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2144 u8 trace_chan_id = tidq->trace_chan_id; 2145 u64 magic; 2146 int ret; 2147 2148 switch (packet->sample_type) { 2149 case CS_ETM_RANGE: 2150 /* 2151 * Immediate branch instruction without neither link nor 2152 * return flag, it's normal branch instruction within 2153 * the function. 2154 */ 2155 if (packet->last_instr_type == OCSD_INSTR_BR && 2156 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 2157 packet->flags = PERF_IP_FLAG_BRANCH; 2158 2159 if (packet->last_instr_cond) 2160 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 2161 } 2162 2163 /* 2164 * Immediate branch instruction with link (e.g. BL), this is 2165 * branch instruction for function call. 2166 */ 2167 if (packet->last_instr_type == OCSD_INSTR_BR && 2168 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2169 packet->flags = PERF_IP_FLAG_BRANCH | 2170 PERF_IP_FLAG_CALL; 2171 2172 /* 2173 * Indirect branch instruction with link (e.g. BLR), this is 2174 * branch instruction for function call. 2175 */ 2176 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2177 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2178 packet->flags = PERF_IP_FLAG_BRANCH | 2179 PERF_IP_FLAG_CALL; 2180 2181 /* 2182 * Indirect branch instruction with subtype of 2183 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 2184 * function return for A32/T32. 2185 */ 2186 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2187 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 2188 packet->flags = PERF_IP_FLAG_BRANCH | 2189 PERF_IP_FLAG_RETURN; 2190 2191 /* 2192 * Indirect branch instruction without link (e.g. BR), usually 2193 * this is used for function return, especially for functions 2194 * within dynamic link lib. 2195 */ 2196 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2197 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 2198 packet->flags = PERF_IP_FLAG_BRANCH | 2199 PERF_IP_FLAG_RETURN; 2200 2201 /* Return instruction for function return. */ 2202 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2203 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 2204 packet->flags = PERF_IP_FLAG_BRANCH | 2205 PERF_IP_FLAG_RETURN; 2206 2207 /* 2208 * Decoder might insert a discontinuity in the middle of 2209 * instruction packets, fixup prev_packet with flag 2210 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 2211 */ 2212 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 2213 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2214 PERF_IP_FLAG_TRACE_BEGIN; 2215 2216 /* 2217 * If the previous packet is an exception return packet 2218 * and the return address just follows SVC instruction, 2219 * it needs to calibrate the previous packet sample flags 2220 * as PERF_IP_FLAG_SYSCALLRET. 2221 */ 2222 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 2223 PERF_IP_FLAG_RETURN | 2224 PERF_IP_FLAG_INTERRUPT) && 2225 cs_etm__is_svc_instr(etmq, trace_chan_id, 2226 packet, packet->start_addr)) 2227 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2228 PERF_IP_FLAG_RETURN | 2229 PERF_IP_FLAG_SYSCALLRET; 2230 break; 2231 case CS_ETM_DISCONTINUITY: 2232 /* 2233 * The trace is discontinuous, if the previous packet is 2234 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 2235 * for previous packet. 2236 */ 2237 if (prev_packet->sample_type == CS_ETM_RANGE) 2238 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2239 PERF_IP_FLAG_TRACE_END; 2240 break; 2241 case CS_ETM_EXCEPTION: 2242 ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); 2243 if (ret) 2244 return ret; 2245 2246 /* The exception is for system call. */ 2247 if (cs_etm__is_syscall(etmq, tidq, magic)) 2248 packet->flags = PERF_IP_FLAG_BRANCH | 2249 PERF_IP_FLAG_CALL | 2250 PERF_IP_FLAG_SYSCALLRET; 2251 /* 2252 * The exceptions are triggered by external signals from bus, 2253 * interrupt controller, debug module, PE reset or halt. 2254 */ 2255 else if (cs_etm__is_async_exception(tidq, magic)) 2256 packet->flags = PERF_IP_FLAG_BRANCH | 2257 PERF_IP_FLAG_CALL | 2258 PERF_IP_FLAG_ASYNC | 2259 PERF_IP_FLAG_INTERRUPT; 2260 /* 2261 * Otherwise, exception is caused by trap, instruction & 2262 * data fault, or alignment errors. 2263 */ 2264 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 2265 packet->flags = PERF_IP_FLAG_BRANCH | 2266 PERF_IP_FLAG_CALL | 2267 PERF_IP_FLAG_INTERRUPT; 2268 2269 /* 2270 * When the exception packet is inserted, since exception 2271 * packet is not used standalone for generating samples 2272 * and it's affiliation to the previous instruction range 2273 * packet; so set previous range packet flags to tell perf 2274 * it is an exception taken branch. 2275 */ 2276 if (prev_packet->sample_type == CS_ETM_RANGE) 2277 prev_packet->flags = packet->flags; 2278 break; 2279 case CS_ETM_EXCEPTION_RET: 2280 /* 2281 * When the exception return packet is inserted, since 2282 * exception return packet is not used standalone for 2283 * generating samples and it's affiliation to the previous 2284 * instruction range packet; so set previous range packet 2285 * flags to tell perf it is an exception return branch. 2286 * 2287 * The exception return can be for either system call or 2288 * other exception types; unfortunately the packet doesn't 2289 * contain exception type related info so we cannot decide 2290 * the exception type purely based on exception return packet. 2291 * If we record the exception number from exception packet and 2292 * reuse it for exception return packet, this is not reliable 2293 * due the trace can be discontinuity or the interrupt can 2294 * be nested, thus the recorded exception number cannot be 2295 * used for exception return packet for these two cases. 2296 * 2297 * For exception return packet, we only need to distinguish the 2298 * packet is for system call or for other types. Thus the 2299 * decision can be deferred when receive the next packet which 2300 * contains the return address, based on the return address we 2301 * can read out the previous instruction and check if it's a 2302 * system call instruction and then calibrate the sample flag 2303 * as needed. 2304 */ 2305 if (prev_packet->sample_type == CS_ETM_RANGE) 2306 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2307 PERF_IP_FLAG_RETURN | 2308 PERF_IP_FLAG_INTERRUPT; 2309 break; 2310 case CS_ETM_EMPTY: 2311 default: 2312 break; 2313 } 2314 2315 return 0; 2316 } 2317 2318 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 2319 { 2320 int ret = 0; 2321 size_t processed = 0; 2322 2323 /* 2324 * Packets are decoded and added to the decoder's packet queue 2325 * until the decoder packet processing callback has requested that 2326 * processing stops or there is nothing left in the buffer. Normal 2327 * operations that stop processing are a timestamp packet or a full 2328 * decoder buffer queue. 2329 */ 2330 ret = cs_etm_decoder__process_data_block(etmq->decoder, 2331 etmq->offset, 2332 &etmq->buf[etmq->buf_used], 2333 etmq->buf_len, 2334 &processed); 2335 if (ret) 2336 goto out; 2337 2338 etmq->offset += processed; 2339 etmq->buf_used += processed; 2340 etmq->buf_len -= processed; 2341 2342 out: 2343 return ret; 2344 } 2345 2346 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 2347 struct cs_etm_traceid_queue *tidq) 2348 { 2349 int ret; 2350 struct cs_etm_packet_queue *packet_queue; 2351 2352 packet_queue = &tidq->packet_queue; 2353 2354 /* Process each packet in this chunk */ 2355 while (1) { 2356 ret = cs_etm_decoder__get_packet(packet_queue, 2357 tidq->packet); 2358 if (ret <= 0) 2359 /* 2360 * Stop processing this chunk on 2361 * end of data or error 2362 */ 2363 break; 2364 2365 /* 2366 * Since packet addresses are swapped in packet 2367 * handling within below switch() statements, 2368 * thus setting sample flags must be called 2369 * prior to switch() statement to use address 2370 * information before packets swapping. 2371 */ 2372 ret = cs_etm__set_sample_flags(etmq, tidq); 2373 if (ret < 0) 2374 break; 2375 2376 switch (tidq->packet->sample_type) { 2377 case CS_ETM_RANGE: 2378 /* 2379 * If the packet contains an instruction 2380 * range, generate instruction sequence 2381 * events. 2382 */ 2383 cs_etm__sample(etmq, tidq); 2384 break; 2385 case CS_ETM_EXCEPTION: 2386 case CS_ETM_EXCEPTION_RET: 2387 /* 2388 * If the exception packet is coming, 2389 * make sure the previous instruction 2390 * range packet to be handled properly. 2391 */ 2392 cs_etm__exception(tidq); 2393 break; 2394 case CS_ETM_DISCONTINUITY: 2395 /* 2396 * Discontinuity in trace, flush 2397 * previous branch stack 2398 */ 2399 cs_etm__flush(etmq, tidq); 2400 break; 2401 case CS_ETM_EMPTY: 2402 /* 2403 * Should not receive empty packet, 2404 * report error. 2405 */ 2406 pr_err("CS ETM Trace: empty packet\n"); 2407 return -EINVAL; 2408 default: 2409 break; 2410 } 2411 } 2412 2413 return ret; 2414 } 2415 2416 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2417 { 2418 int idx; 2419 struct int_node *inode; 2420 struct cs_etm_traceid_queue *tidq; 2421 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2422 2423 intlist__for_each_entry(inode, traceid_queues_list) { 2424 idx = (int)(intptr_t)inode->priv; 2425 tidq = etmq->traceid_queues[idx]; 2426 2427 /* Ignore return value */ 2428 cs_etm__process_traceid_queue(etmq, tidq); 2429 2430 /* 2431 * Generate an instruction sample with the remaining 2432 * branchstack entries. 2433 */ 2434 cs_etm__flush(etmq, tidq); 2435 } 2436 } 2437 2438 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) 2439 { 2440 int err = 0; 2441 struct cs_etm_traceid_queue *tidq; 2442 2443 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2444 if (!tidq) 2445 return -EINVAL; 2446 2447 /* Go through each buffer in the queue and decode them one by one */ 2448 while (1) { 2449 err = cs_etm__get_data_block(etmq); 2450 if (err <= 0) 2451 return err; 2452 2453 /* Run trace decoder until buffer consumed or end of trace */ 2454 do { 2455 err = cs_etm__decode_data_block(etmq); 2456 if (err) 2457 return err; 2458 2459 /* 2460 * Process each packet in this chunk, nothing to do if 2461 * an error occurs other than hoping the next one will 2462 * be better. 2463 */ 2464 err = cs_etm__process_traceid_queue(etmq, tidq); 2465 2466 } while (etmq->buf_len); 2467 2468 if (err == 0) 2469 /* Flush any remaining branch stack entries */ 2470 err = cs_etm__end_block(etmq, tidq); 2471 } 2472 2473 return err; 2474 } 2475 2476 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) 2477 { 2478 int idx, err = 0; 2479 struct cs_etm_traceid_queue *tidq; 2480 struct int_node *inode; 2481 2482 /* Go through each buffer in the queue and decode them one by one */ 2483 while (1) { 2484 err = cs_etm__get_data_block(etmq); 2485 if (err <= 0) 2486 return err; 2487 2488 /* Run trace decoder until buffer consumed or end of trace */ 2489 do { 2490 err = cs_etm__decode_data_block(etmq); 2491 if (err) 2492 return err; 2493 2494 /* 2495 * cs_etm__run_per_thread_timeless_decoder() runs on a 2496 * single traceID queue because each TID has a separate 2497 * buffer. But here in per-cpu mode we need to iterate 2498 * over each channel instead. 2499 */ 2500 intlist__for_each_entry(inode, 2501 etmq->traceid_queues_list) { 2502 idx = (int)(intptr_t)inode->priv; 2503 tidq = etmq->traceid_queues[idx]; 2504 cs_etm__process_traceid_queue(etmq, tidq); 2505 } 2506 } while (etmq->buf_len); 2507 2508 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2509 idx = (int)(intptr_t)inode->priv; 2510 tidq = etmq->traceid_queues[idx]; 2511 /* Flush any remaining branch stack entries */ 2512 err = cs_etm__end_block(etmq, tidq); 2513 if (err) 2514 return err; 2515 } 2516 } 2517 2518 return err; 2519 } 2520 2521 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2522 pid_t tid) 2523 { 2524 unsigned int i; 2525 struct auxtrace_queues *queues = &etm->queues; 2526 2527 for (i = 0; i < queues->nr_queues; i++) { 2528 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2529 struct cs_etm_queue *etmq = queue->priv; 2530 struct cs_etm_traceid_queue *tidq; 2531 2532 if (!etmq) 2533 continue; 2534 2535 if (etm->per_thread_decoding) { 2536 tidq = cs_etm__etmq_get_traceid_queue( 2537 etmq, CS_ETM_PER_THREAD_TRACEID); 2538 2539 if (!tidq) 2540 continue; 2541 2542 if (tid == -1 || thread__tid(tidq->thread) == tid) 2543 cs_etm__run_per_thread_timeless_decoder(etmq); 2544 } else 2545 cs_etm__run_per_cpu_timeless_decoder(etmq); 2546 } 2547 2548 return 0; 2549 } 2550 2551 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) 2552 { 2553 int ret = 0; 2554 unsigned int cs_queue_nr, queue_nr, i; 2555 u8 trace_chan_id; 2556 u64 cs_timestamp; 2557 struct auxtrace_queue *queue; 2558 struct cs_etm_queue *etmq; 2559 struct cs_etm_traceid_queue *tidq; 2560 2561 /* 2562 * Pre-populate the heap with one entry from each queue so that we can 2563 * start processing in time order across all queues. 2564 */ 2565 for (i = 0; i < etm->queues.nr_queues; i++) { 2566 etmq = etm->queues.queue_array[i].priv; 2567 if (!etmq) 2568 continue; 2569 2570 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); 2571 if (ret) 2572 return ret; 2573 } 2574 2575 while (1) { 2576 if (!etm->heap.heap_cnt) 2577 goto out; 2578 2579 /* Take the entry at the top of the min heap */ 2580 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2581 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2582 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2583 queue = &etm->queues.queue_array[queue_nr]; 2584 etmq = queue->priv; 2585 2586 /* 2587 * Remove the top entry from the heap since we are about 2588 * to process it. 2589 */ 2590 auxtrace_heap__pop(&etm->heap); 2591 2592 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2593 if (!tidq) { 2594 /* 2595 * No traceID queue has been allocated for this traceID, 2596 * which means something somewhere went very wrong. No 2597 * other choice than simply exit. 2598 */ 2599 ret = -EINVAL; 2600 goto out; 2601 } 2602 2603 /* 2604 * Packets associated with this timestamp are already in 2605 * the etmq's traceID queue, so process them. 2606 */ 2607 ret = cs_etm__process_traceid_queue(etmq, tidq); 2608 if (ret < 0) 2609 goto out; 2610 2611 /* 2612 * Packets for this timestamp have been processed, time to 2613 * move on to the next timestamp, fetching a new auxtrace_buffer 2614 * if need be. 2615 */ 2616 refetch: 2617 ret = cs_etm__get_data_block(etmq); 2618 if (ret < 0) 2619 goto out; 2620 2621 /* 2622 * No more auxtrace_buffers to process in this etmq, simply 2623 * move on to another entry in the auxtrace_heap. 2624 */ 2625 if (!ret) 2626 continue; 2627 2628 ret = cs_etm__decode_data_block(etmq); 2629 if (ret) 2630 goto out; 2631 2632 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2633 2634 if (!cs_timestamp) { 2635 /* 2636 * Function cs_etm__decode_data_block() returns when 2637 * there is no more traces to decode in the current 2638 * auxtrace_buffer OR when a timestamp has been 2639 * encountered on any of the traceID queues. Since we 2640 * did not get a timestamp, there is no more traces to 2641 * process in this auxtrace_buffer. As such empty and 2642 * flush all traceID queues. 2643 */ 2644 cs_etm__clear_all_traceid_queues(etmq); 2645 2646 /* Fetch another auxtrace_buffer for this etmq */ 2647 goto refetch; 2648 } 2649 2650 /* 2651 * Add to the min heap the timestamp for packets that have 2652 * just been decoded. They will be processed and synthesized 2653 * during the next call to cs_etm__process_traceid_queue() for 2654 * this queue/traceID. 2655 */ 2656 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2657 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 2658 } 2659 2660 out: 2661 return ret; 2662 } 2663 2664 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2665 union perf_event *event) 2666 { 2667 struct thread *th; 2668 2669 if (etm->timeless_decoding) 2670 return 0; 2671 2672 /* 2673 * Add the tid/pid to the log so that we can get a match when we get a 2674 * contextID from the decoder. Only track for the host: only kernel 2675 * trace is supported for guests which wouldn't need pids so this should 2676 * be fine. 2677 */ 2678 th = machine__findnew_thread(&etm->session->machines.host, 2679 event->itrace_start.pid, 2680 event->itrace_start.tid); 2681 if (!th) 2682 return -ENOMEM; 2683 2684 thread__put(th); 2685 2686 return 0; 2687 } 2688 2689 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2690 union perf_event *event) 2691 { 2692 struct thread *th; 2693 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2694 2695 /* 2696 * Context switch in per-thread mode are irrelevant since perf 2697 * will start/stop tracing as the process is scheduled. 2698 */ 2699 if (etm->timeless_decoding) 2700 return 0; 2701 2702 /* 2703 * SWITCH_IN events carry the next process to be switched out while 2704 * SWITCH_OUT events carry the process to be switched in. As such 2705 * we don't care about IN events. 2706 */ 2707 if (!out) 2708 return 0; 2709 2710 /* 2711 * Add the tid/pid to the log so that we can get a match when we get a 2712 * contextID from the decoder. Only track for the host: only kernel 2713 * trace is supported for guests which wouldn't need pids so this should 2714 * be fine. 2715 */ 2716 th = machine__findnew_thread(&etm->session->machines.host, 2717 event->context_switch.next_prev_pid, 2718 event->context_switch.next_prev_tid); 2719 if (!th) 2720 return -ENOMEM; 2721 2722 thread__put(th); 2723 2724 return 0; 2725 } 2726 2727 static int cs_etm__process_event(struct perf_session *session, 2728 union perf_event *event, 2729 struct perf_sample *sample, 2730 const struct perf_tool *tool) 2731 { 2732 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2733 struct cs_etm_auxtrace, 2734 auxtrace); 2735 2736 if (dump_trace) 2737 return 0; 2738 2739 if (!tool->ordered_events) { 2740 pr_err("CoreSight ETM Trace requires ordered events\n"); 2741 return -EINVAL; 2742 } 2743 2744 switch (event->header.type) { 2745 case PERF_RECORD_EXIT: 2746 /* 2747 * Don't need to wait for cs_etm__flush_events() in per-thread mode to 2748 * start the decode because we know there will be no more trace from 2749 * this thread. All this does is emit samples earlier than waiting for 2750 * the flush in other modes, but with timestamps it makes sense to wait 2751 * for flush so that events from different threads are interleaved 2752 * properly. 2753 */ 2754 if (etm->per_thread_decoding && etm->timeless_decoding) 2755 return cs_etm__process_timeless_queues(etm, 2756 event->fork.tid); 2757 break; 2758 2759 case PERF_RECORD_ITRACE_START: 2760 return cs_etm__process_itrace_start(etm, event); 2761 2762 case PERF_RECORD_SWITCH_CPU_WIDE: 2763 return cs_etm__process_switch_cpu_wide(etm, event); 2764 2765 case PERF_RECORD_AUX: 2766 /* 2767 * Record the latest kernel timestamp available in the header 2768 * for samples so that synthesised samples occur from this point 2769 * onwards. 2770 */ 2771 if (sample->time && (sample->time != (u64)-1)) 2772 etm->latest_kernel_timestamp = sample->time; 2773 break; 2774 2775 default: 2776 break; 2777 } 2778 2779 return 0; 2780 } 2781 2782 static void dump_queued_data(struct cs_etm_auxtrace *etm, 2783 struct perf_record_auxtrace *event) 2784 { 2785 struct auxtrace_buffer *buf; 2786 unsigned int i; 2787 /* 2788 * Find all buffers with same reference in the queues and dump them. 2789 * This is because the queues can contain multiple entries of the same 2790 * buffer that were split on aux records. 2791 */ 2792 for (i = 0; i < etm->queues.nr_queues; ++i) 2793 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) 2794 if (buf->reference == event->reference) 2795 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); 2796 } 2797 2798 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2799 union perf_event *event, 2800 const struct perf_tool *tool __maybe_unused) 2801 { 2802 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2803 struct cs_etm_auxtrace, 2804 auxtrace); 2805 if (!etm->data_queued) { 2806 struct auxtrace_buffer *buffer; 2807 off_t data_offset; 2808 int fd = perf_data__fd(session->data); 2809 bool is_pipe = perf_data__is_pipe(session->data); 2810 int err; 2811 int idx = event->auxtrace.idx; 2812 2813 if (is_pipe) 2814 data_offset = 0; 2815 else { 2816 data_offset = lseek(fd, 0, SEEK_CUR); 2817 if (data_offset == -1) 2818 return -errno; 2819 } 2820 2821 err = auxtrace_queues__add_event(&etm->queues, session, 2822 event, data_offset, &buffer); 2823 if (err) 2824 return err; 2825 2826 if (dump_trace) 2827 if (auxtrace_buffer__get_data(buffer, fd)) { 2828 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); 2829 auxtrace_buffer__put_data(buffer); 2830 } 2831 } else if (dump_trace) 2832 dump_queued_data(etm, &event->auxtrace); 2833 2834 return 0; 2835 } 2836 2837 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) 2838 { 2839 struct evsel *evsel; 2840 struct evlist *evlist = etm->session->evlist; 2841 2842 /* Override timeless mode with user input from --itrace=Z */ 2843 if (etm->synth_opts.timeless_decoding) { 2844 etm->timeless_decoding = true; 2845 return 0; 2846 } 2847 2848 /* 2849 * Find the cs_etm evsel and look at what its timestamp setting was 2850 */ 2851 evlist__for_each_entry(evlist, evsel) 2852 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { 2853 etm->timeless_decoding = 2854 !(evsel->core.attr.config & BIT(ETM_OPT_TS)); 2855 return 0; 2856 } 2857 2858 pr_err("CS ETM: Couldn't find ETM evsel\n"); 2859 return -EINVAL; 2860 } 2861 2862 /* 2863 * Read a single cpu parameter block from the auxtrace_info priv block. 2864 * 2865 * For version 1 there is a per cpu nr_params entry. If we are handling 2866 * version 1 file, then there may be less, the same, or more params 2867 * indicated by this value than the compile time number we understand. 2868 * 2869 * For a version 0 info block, there are a fixed number, and we need to 2870 * fill out the nr_param value in the metadata we create. 2871 */ 2872 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 2873 int out_blk_size, int nr_params_v0) 2874 { 2875 u64 *metadata = NULL; 2876 int hdr_version; 2877 int nr_in_params, nr_out_params, nr_cmn_params; 2878 int i, k; 2879 2880 metadata = zalloc(sizeof(*metadata) * out_blk_size); 2881 if (!metadata) 2882 return NULL; 2883 2884 /* read block current index & version */ 2885 i = *buff_in_offset; 2886 hdr_version = buff_in[CS_HEADER_VERSION]; 2887 2888 if (!hdr_version) { 2889 /* read version 0 info block into a version 1 metadata block */ 2890 nr_in_params = nr_params_v0; 2891 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 2892 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 2893 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 2894 /* remaining block params at offset +1 from source */ 2895 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 2896 metadata[k + 1] = buff_in[i + k]; 2897 /* version 0 has 2 common params */ 2898 nr_cmn_params = 2; 2899 } else { 2900 /* read version 1 info block - input and output nr_params may differ */ 2901 /* version 1 has 3 common params */ 2902 nr_cmn_params = 3; 2903 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 2904 2905 /* if input has more params than output - skip excess */ 2906 nr_out_params = nr_in_params + nr_cmn_params; 2907 if (nr_out_params > out_blk_size) 2908 nr_out_params = out_blk_size; 2909 2910 for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 2911 metadata[k] = buff_in[i + k]; 2912 2913 /* record the actual nr params we copied */ 2914 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 2915 } 2916 2917 /* adjust in offset by number of in params used */ 2918 i += nr_in_params + nr_cmn_params; 2919 *buff_in_offset = i; 2920 return metadata; 2921 } 2922 2923 /** 2924 * Puts a fragment of an auxtrace buffer into the auxtrace queues based 2925 * on the bounds of aux_event, if it matches with the buffer that's at 2926 * file_offset. 2927 * 2928 * Normally, whole auxtrace buffers would be added to the queue. But we 2929 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder 2930 * is reset across each buffer, so splitting the buffers up in advance has 2931 * the same effect. 2932 */ 2933 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, 2934 struct perf_record_aux *aux_event, struct perf_sample *sample) 2935 { 2936 int err; 2937 char buf[PERF_SAMPLE_MAX_SIZE]; 2938 union perf_event *auxtrace_event_union; 2939 struct perf_record_auxtrace *auxtrace_event; 2940 union perf_event auxtrace_fragment; 2941 __u64 aux_offset, aux_size; 2942 enum cs_etm_format format; 2943 2944 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2945 struct cs_etm_auxtrace, 2946 auxtrace); 2947 2948 /* 2949 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got 2950 * from looping through the auxtrace index. 2951 */ 2952 err = perf_session__peek_event(session, file_offset, buf, 2953 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); 2954 if (err) 2955 return err; 2956 auxtrace_event = &auxtrace_event_union->auxtrace; 2957 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) 2958 return -EINVAL; 2959 2960 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || 2961 auxtrace_event->header.size != sz) { 2962 return -EINVAL; 2963 } 2964 2965 /* 2966 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See 2967 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a 2968 * CPU as we set this always for the AUX_OUTPUT_HW_ID event. 2969 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. 2970 * Return 'not found' if mismatch. 2971 */ 2972 if (auxtrace_event->cpu == (__u32) -1) { 2973 etm->per_thread_decoding = true; 2974 if (auxtrace_event->tid != sample->tid) 2975 return 1; 2976 } else if (auxtrace_event->cpu != sample->cpu) { 2977 if (etm->per_thread_decoding) { 2978 /* 2979 * Found a per-cpu buffer after a per-thread one was 2980 * already found 2981 */ 2982 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); 2983 return -EINVAL; 2984 } 2985 return 1; 2986 } 2987 2988 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { 2989 /* 2990 * Clamp size in snapshot mode. The buffer size is clamped in 2991 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect 2992 * the buffer size. 2993 */ 2994 aux_size = min(aux_event->aux_size, auxtrace_event->size); 2995 2996 /* 2997 * In this mode, the head also points to the end of the buffer so aux_offset 2998 * needs to have the size subtracted so it points to the beginning as in normal mode 2999 */ 3000 aux_offset = aux_event->aux_offset - aux_size; 3001 } else { 3002 aux_size = aux_event->aux_size; 3003 aux_offset = aux_event->aux_offset; 3004 } 3005 3006 if (aux_offset >= auxtrace_event->offset && 3007 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { 3008 struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv; 3009 3010 /* 3011 * If this AUX event was inside this buffer somewhere, create a new auxtrace event 3012 * based on the sizes of the aux event, and queue that fragment. 3013 */ 3014 auxtrace_fragment.auxtrace = *auxtrace_event; 3015 auxtrace_fragment.auxtrace.size = aux_size; 3016 auxtrace_fragment.auxtrace.offset = aux_offset; 3017 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; 3018 3019 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 3020 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); 3021 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, 3022 file_offset, NULL); 3023 if (err) 3024 return err; 3025 3026 format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ? 3027 UNFORMATTED : FORMATTED; 3028 if (etmq->format != UNSET && format != etmq->format) { 3029 pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n"); 3030 return -EINVAL; 3031 } 3032 etmq->format = format; 3033 return 0; 3034 } 3035 3036 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ 3037 return 1; 3038 } 3039 3040 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, 3041 u64 offset __maybe_unused, void *data __maybe_unused) 3042 { 3043 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ 3044 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { 3045 (*(int *)data)++; /* increment found count */ 3046 return cs_etm__process_aux_output_hw_id(session, event); 3047 } 3048 return 0; 3049 } 3050 3051 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, 3052 u64 offset __maybe_unused, void *data __maybe_unused) 3053 { 3054 struct perf_sample sample; 3055 int ret; 3056 struct auxtrace_index_entry *ent; 3057 struct auxtrace_index *auxtrace_index; 3058 struct evsel *evsel; 3059 size_t i; 3060 3061 /* Don't care about any other events, we're only queuing buffers for AUX events */ 3062 if (event->header.type != PERF_RECORD_AUX) 3063 return 0; 3064 3065 if (event->header.size < sizeof(struct perf_record_aux)) 3066 return -EINVAL; 3067 3068 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ 3069 if (!event->aux.aux_size) 3070 return 0; 3071 3072 /* 3073 * Parse the sample, we need the sample_id_all data that comes after the event so that the 3074 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. 3075 */ 3076 evsel = evlist__event2evsel(session->evlist, event); 3077 if (!evsel) 3078 return -EINVAL; 3079 ret = evsel__parse_sample(evsel, event, &sample); 3080 if (ret) 3081 return ret; 3082 3083 /* 3084 * Loop through the auxtrace index to find the buffer that matches up with this aux event. 3085 */ 3086 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { 3087 for (i = 0; i < auxtrace_index->nr; i++) { 3088 ent = &auxtrace_index->entries[i]; 3089 ret = cs_etm__queue_aux_fragment(session, ent->file_offset, 3090 ent->sz, &event->aux, &sample); 3091 /* 3092 * Stop search on error or successful values. Continue search on 3093 * 1 ('not found') 3094 */ 3095 if (ret != 1) 3096 return ret; 3097 } 3098 } 3099 3100 /* 3101 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but 3102 * don't exit with an error because it will still be possible to decode other aux records. 3103 */ 3104 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 3105 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); 3106 return 0; 3107 } 3108 3109 static int cs_etm__queue_aux_records(struct perf_session *session) 3110 { 3111 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, 3112 struct auxtrace_index, list); 3113 if (index && index->nr > 0) 3114 return perf_session__peek_events(session, session->header.data_offset, 3115 session->header.data_size, 3116 cs_etm__queue_aux_records_cb, NULL); 3117 3118 /* 3119 * We would get here if there are no entries in the index (either no auxtrace 3120 * buffers or no index at all). Fail silently as there is the possibility of 3121 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still 3122 * false. 3123 * 3124 * In that scenario, buffers will not be split by AUX records. 3125 */ 3126 return 0; 3127 } 3128 3129 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ 3130 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) 3131 3132 /* 3133 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual 3134 * timestamps). 3135 */ 3136 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) 3137 { 3138 int j; 3139 3140 for (j = 0; j < num_cpu; j++) { 3141 switch (metadata[j][CS_ETM_MAGIC]) { 3142 case __perf_cs_etmv4_magic: 3143 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) 3144 return false; 3145 break; 3146 case __perf_cs_ete_magic: 3147 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) 3148 return false; 3149 break; 3150 default: 3151 /* Unknown / unsupported magic number. */ 3152 return false; 3153 } 3154 } 3155 return true; 3156 } 3157 3158 /* map trace ids to correct metadata block, from information in metadata */ 3159 static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu, 3160 u64 **metadata) 3161 { 3162 u64 cs_etm_magic; 3163 u8 trace_chan_id; 3164 int i, err; 3165 3166 for (i = 0; i < num_cpu; i++) { 3167 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3168 switch (cs_etm_magic) { 3169 case __perf_cs_etmv3_magic: 3170 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3171 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); 3172 break; 3173 case __perf_cs_etmv4_magic: 3174 case __perf_cs_ete_magic: 3175 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3176 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); 3177 break; 3178 default: 3179 /* unknown magic number */ 3180 return -EINVAL; 3181 } 3182 err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]); 3183 if (err) 3184 return err; 3185 } 3186 return 0; 3187 } 3188 3189 /* 3190 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the 3191 * unused value to reduce the number of unneeded decoders created. 3192 */ 3193 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) 3194 { 3195 u64 cs_etm_magic; 3196 int i; 3197 3198 for (i = 0; i < num_cpu; i++) { 3199 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3200 switch (cs_etm_magic) { 3201 case __perf_cs_etmv3_magic: 3202 if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) 3203 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; 3204 break; 3205 case __perf_cs_etmv4_magic: 3206 case __perf_cs_ete_magic: 3207 if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) 3208 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; 3209 break; 3210 default: 3211 /* unknown magic number */ 3212 return -EINVAL; 3213 } 3214 } 3215 return 0; 3216 } 3217 3218 /* 3219 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX 3220 * (formatted or not) packets to create the decoders. 3221 */ 3222 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) 3223 { 3224 struct cs_etm_decoder_params d_params; 3225 3226 /* 3227 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 3228 * needed. 3229 */ 3230 int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1; 3231 3232 /* Use metadata to fill in trace parameters for trace decoder */ 3233 struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders); 3234 3235 if (!t_params) 3236 goto out_free; 3237 3238 if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format, 3239 etmq->queue_nr, decoders)) 3240 goto out_free; 3241 3242 /* Set decoder parameters to decode trace packets */ 3243 if (cs_etm__init_decoder_params(&d_params, etmq, 3244 dump_trace ? CS_ETM_OPERATION_PRINT : 3245 CS_ETM_OPERATION_DECODE)) 3246 goto out_free; 3247 3248 etmq->decoder = cs_etm_decoder__new(decoders, &d_params, 3249 t_params); 3250 3251 if (!etmq->decoder) 3252 goto out_free; 3253 3254 /* 3255 * Register a function to handle all memory accesses required by 3256 * the trace decoder library. 3257 */ 3258 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 3259 0x0L, ((u64) -1L), 3260 cs_etm__mem_access)) 3261 goto out_free_decoder; 3262 3263 zfree(&t_params); 3264 return 0; 3265 3266 out_free_decoder: 3267 cs_etm_decoder__free(etmq->decoder); 3268 out_free: 3269 zfree(&t_params); 3270 return -EINVAL; 3271 } 3272 3273 static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) 3274 { 3275 struct auxtrace_queues *queues = &etm->queues; 3276 3277 for (unsigned int i = 0; i < queues->nr_queues; i++) { 3278 bool empty = list_empty(&queues->queue_array[i].head); 3279 struct cs_etm_queue *etmq = queues->queue_array[i].priv; 3280 int ret; 3281 3282 /* 3283 * Don't create decoders for empty queues, mainly because 3284 * etmq->format is unknown for empty queues. 3285 */ 3286 assert(empty == (etmq->format == UNSET)); 3287 if (empty) 3288 continue; 3289 3290 ret = cs_etm__create_queue_decoders(etmq); 3291 if (ret) 3292 return ret; 3293 } 3294 return 0; 3295 } 3296 3297 int cs_etm__process_auxtrace_info_full(union perf_event *event, 3298 struct perf_session *session) 3299 { 3300 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3301 struct cs_etm_auxtrace *etm = NULL; 3302 struct perf_record_time_conv *tc = &session->time_conv; 3303 int event_header_size = sizeof(struct perf_event_header); 3304 int total_size = auxtrace_info->header.size; 3305 int priv_size = 0; 3306 int num_cpu, max_cpu = 0; 3307 int err = 0; 3308 int aux_hw_id_found; 3309 int i; 3310 u64 *ptr = NULL; 3311 u64 **metadata = NULL; 3312 3313 /* First the global part */ 3314 ptr = (u64 *) auxtrace_info->priv; 3315 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; 3316 metadata = zalloc(sizeof(*metadata) * num_cpu); 3317 if (!metadata) 3318 return -ENOMEM; 3319 3320 /* Start parsing after the common part of the header */ 3321 i = CS_HEADER_VERSION_MAX; 3322 3323 /* 3324 * The metadata is stored in the auxtrace_info section and encodes 3325 * the configuration of the ARM embedded trace macrocell which is 3326 * required by the trace decoder to properly decode the trace due 3327 * to its highly compressed nature. 3328 */ 3329 for (int j = 0; j < num_cpu; j++) { 3330 if (ptr[i] == __perf_cs_etmv3_magic) { 3331 metadata[j] = 3332 cs_etm__create_meta_blk(ptr, &i, 3333 CS_ETM_PRIV_MAX, 3334 CS_ETM_NR_TRC_PARAMS_V0); 3335 } else if (ptr[i] == __perf_cs_etmv4_magic) { 3336 metadata[j] = 3337 cs_etm__create_meta_blk(ptr, &i, 3338 CS_ETMV4_PRIV_MAX, 3339 CS_ETMV4_NR_TRC_PARAMS_V0); 3340 } else if (ptr[i] == __perf_cs_ete_magic) { 3341 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); 3342 } else { 3343 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", 3344 ptr[i]); 3345 err = -EINVAL; 3346 goto err_free_metadata; 3347 } 3348 3349 if (!metadata[j]) { 3350 err = -ENOMEM; 3351 goto err_free_metadata; 3352 } 3353 3354 if ((int) metadata[j][CS_ETM_CPU] > max_cpu) 3355 max_cpu = metadata[j][CS_ETM_CPU]; 3356 } 3357 3358 /* 3359 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 3360 * CS_ETMV4_PRIV_MAX mark how many double words are in the 3361 * global metadata, and each cpu's metadata respectively. 3362 * The following tests if the correct number of double words was 3363 * present in the auxtrace info section. 3364 */ 3365 priv_size = total_size - event_header_size - INFO_HEADER_SIZE; 3366 if (i * 8 != priv_size) { 3367 err = -EINVAL; 3368 goto err_free_metadata; 3369 } 3370 3371 etm = zalloc(sizeof(*etm)); 3372 3373 if (!etm) { 3374 err = -ENOMEM; 3375 goto err_free_metadata; 3376 } 3377 3378 /* 3379 * As all the ETMs run at the same exception level, the system should 3380 * have the same PID format crossing CPUs. So cache the PID format 3381 * and reuse it for sequential decoding. 3382 */ 3383 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); 3384 3385 err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1); 3386 if (err) 3387 goto err_free_etm; 3388 3389 for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) { 3390 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j); 3391 if (err) 3392 goto err_free_queues; 3393 } 3394 3395 if (session->itrace_synth_opts->set) { 3396 etm->synth_opts = *session->itrace_synth_opts; 3397 } else { 3398 itrace_synth_opts__set_default(&etm->synth_opts, 3399 session->itrace_synth_opts->default_no_sample); 3400 etm->synth_opts.callchain = false; 3401 } 3402 3403 etm->session = session; 3404 3405 etm->num_cpu = num_cpu; 3406 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); 3407 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); 3408 etm->metadata = metadata; 3409 etm->auxtrace_type = auxtrace_info->type; 3410 3411 if (etm->synth_opts.use_timestamp) 3412 /* 3413 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3414 * therefore the decoder cannot know if the timestamp trace is 3415 * same with the kernel time. 3416 * 3417 * If a user has knowledge for the working platform and can 3418 * specify itrace option 'T' to tell decoder to forcely use the 3419 * traced timestamp as the kernel time. 3420 */ 3421 etm->has_virtual_ts = true; 3422 else 3423 /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3424 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3425 3426 if (!etm->has_virtual_ts) 3427 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3428 "The time field of the samples will not be set accurately.\n" 3429 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3430 "you can specify the itrace option 'T' for timestamp decoding\n" 3431 "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3432 3433 etm->auxtrace.process_event = cs_etm__process_event; 3434 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 3435 etm->auxtrace.flush_events = cs_etm__flush_events; 3436 etm->auxtrace.free_events = cs_etm__free_events; 3437 etm->auxtrace.free = cs_etm__free; 3438 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; 3439 session->auxtrace = &etm->auxtrace; 3440 3441 err = cs_etm__setup_timeless_decoding(etm); 3442 if (err) 3443 return err; 3444 3445 etm->tc.time_shift = tc->time_shift; 3446 etm->tc.time_mult = tc->time_mult; 3447 etm->tc.time_zero = tc->time_zero; 3448 if (event_contains(*tc, time_cycles)) { 3449 etm->tc.time_cycles = tc->time_cycles; 3450 etm->tc.time_mask = tc->time_mask; 3451 etm->tc.cap_user_time_zero = tc->cap_user_time_zero; 3452 etm->tc.cap_user_time_short = tc->cap_user_time_short; 3453 } 3454 err = cs_etm__synth_events(etm, session); 3455 if (err) 3456 goto err_free_queues; 3457 3458 err = cs_etm__queue_aux_records(session); 3459 if (err) 3460 goto err_free_queues; 3461 3462 /* 3463 * Map Trace ID values to CPU metadata. 3464 * 3465 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the 3466 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata 3467 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. 3468 * 3469 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3470 * the same IDs as the old algorithm as far as is possible, unless there are clashes 3471 * in which case a different value will be used. This means an older perf may still 3472 * be able to record and read files generate on a newer system. 3473 * 3474 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3475 * those packets. If they are there then the values will be mapped and plugged into 3476 * the metadata. We then set any remaining metadata values with the used flag to a 3477 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. 3478 * 3479 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3480 * then we map Trace ID values to CPU directly from the metadata - clearing any unused 3481 * flags if present. 3482 */ 3483 3484 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ 3485 aux_hw_id_found = 0; 3486 err = perf_session__peek_events(session, session->header.data_offset, 3487 session->header.data_size, 3488 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); 3489 if (err) 3490 goto err_free_queues; 3491 3492 /* if HW ID found then clear any unused metadata ID values */ 3493 if (aux_hw_id_found) 3494 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); 3495 /* otherwise, this is a file with metadata values only, map from metadata */ 3496 else 3497 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); 3498 3499 if (err) 3500 goto err_free_queues; 3501 3502 err = cs_etm__create_decoders(etm); 3503 if (err) 3504 goto err_free_queues; 3505 3506 etm->data_queued = etm->queues.populated; 3507 return 0; 3508 3509 err_free_queues: 3510 auxtrace_queues__free(&etm->queues); 3511 session->auxtrace = NULL; 3512 err_free_etm: 3513 zfree(&etm); 3514 err_free_metadata: 3515 /* No need to check @metadata[j], free(NULL) is supported */ 3516 for (int j = 0; j < num_cpu; j++) 3517 zfree(&metadata[j]); 3518 zfree(&metadata); 3519 return err; 3520 } 3521