1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/kernel.h> 10 #include <linux/bitfield.h> 11 #include <linux/bitops.h> 12 #include <linux/coresight-pmu.h> 13 #include <linux/err.h> 14 #include <linux/log2.h> 15 #include <linux/types.h> 16 #include <linux/zalloc.h> 17 18 #include <stdlib.h> 19 20 #include "auxtrace.h" 21 #include "color.h" 22 #include "cs-etm.h" 23 #include "cs-etm-decoder/cs-etm-decoder.h" 24 #include "debug.h" 25 #include "dso.h" 26 #include "evlist.h" 27 #include "intlist.h" 28 #include "machine.h" 29 #include "map.h" 30 #include "perf.h" 31 #include "session.h" 32 #include "map_symbol.h" 33 #include "branch.h" 34 #include "symbol.h" 35 #include "tool.h" 36 #include "thread.h" 37 #include "thread-stack.h" 38 #include "tsc.h" 39 #include <tools/libc_compat.h> 40 #include "util/synthetic-events.h" 41 #include "util/util.h" 42 43 struct cs_etm_auxtrace { 44 struct auxtrace auxtrace; 45 struct auxtrace_queues queues; 46 struct auxtrace_heap heap; 47 struct itrace_synth_opts synth_opts; 48 struct perf_session *session; 49 struct perf_tsc_conversion tc; 50 51 /* 52 * Timeless has no timestamps in the trace so overlapping mmap lookups 53 * are less accurate but produces smaller trace data. We use context IDs 54 * in the trace instead of matching timestamps with fork records so 55 * they're not really needed in the general case. Overlapping mmaps 56 * happen in cases like between a fork and an exec. 57 */ 58 bool timeless_decoding; 59 60 /* 61 * Per-thread ignores the trace channel ID and instead assumes that 62 * everything in a buffer comes from the same process regardless of 63 * which CPU it ran on. It also implies no context IDs so the TID is 64 * taken from the auxtrace buffer. 65 */ 66 bool per_thread_decoding; 67 bool snapshot_mode; 68 bool data_queued; 69 bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */ 70 71 int num_cpu; 72 u64 latest_kernel_timestamp; 73 u32 auxtrace_type; 74 u64 branches_sample_type; 75 u64 branches_id; 76 u64 instructions_sample_type; 77 u64 instructions_sample_period; 78 u64 instructions_id; 79 u64 **metadata; 80 unsigned int pmu_type; 81 enum cs_etm_pid_fmt pid_fmt; 82 }; 83 84 struct cs_etm_traceid_queue { 85 u8 trace_chan_id; 86 u64 period_instructions; 87 size_t last_branch_pos; 88 union perf_event *event_buf; 89 struct thread *thread; 90 struct thread *prev_packet_thread; 91 ocsd_ex_level prev_packet_el; 92 ocsd_ex_level el; 93 struct branch_stack *last_branch; 94 struct branch_stack *last_branch_rb; 95 struct cs_etm_packet *prev_packet; 96 struct cs_etm_packet *packet; 97 struct cs_etm_packet_queue packet_queue; 98 }; 99 100 struct cs_etm_queue { 101 struct cs_etm_auxtrace *etm; 102 struct cs_etm_decoder *decoder; 103 struct auxtrace_buffer *buffer; 104 unsigned int queue_nr; 105 u8 pending_timestamp_chan_id; 106 u64 offset; 107 const unsigned char *buf; 108 size_t buf_len, buf_used; 109 /* Conversion between traceID and index in traceid_queues array */ 110 struct intlist *traceid_queues_list; 111 struct cs_etm_traceid_queue **traceid_queues; 112 }; 113 114 /* RB tree for quick conversion between traceID and metadata pointers */ 115 static struct intlist *traceid_list; 116 117 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); 118 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 119 pid_t tid); 120 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 121 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 122 123 /* PTMs ETMIDR [11:8] set to b0011 */ 124 #define ETMIDR_PTM_VERSION 0x00000300 125 126 /* 127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 128 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 129 * encode the etm queue number as the upper 16 bit and the channel as 130 * the lower 16 bit. 131 */ 132 #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \ 133 (queue_nr << 16 | trace_chan_id) 134 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 135 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 136 137 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 138 { 139 etmidr &= ETMIDR_PTM_VERSION; 140 141 if (etmidr == ETMIDR_PTM_VERSION) 142 return CS_ETM_PROTO_PTM; 143 144 return CS_ETM_PROTO_ETMV3; 145 } 146 147 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 148 { 149 struct int_node *inode; 150 u64 *metadata; 151 152 inode = intlist__find(traceid_list, trace_chan_id); 153 if (!inode) 154 return -EINVAL; 155 156 metadata = inode->priv; 157 *magic = metadata[CS_ETM_MAGIC]; 158 return 0; 159 } 160 161 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 162 { 163 struct int_node *inode; 164 u64 *metadata; 165 166 inode = intlist__find(traceid_list, trace_chan_id); 167 if (!inode) 168 return -EINVAL; 169 170 metadata = inode->priv; 171 *cpu = (int)metadata[CS_ETM_CPU]; 172 return 0; 173 } 174 175 /* 176 * The returned PID format is presented as an enum: 177 * 178 * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. 179 * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. 180 * CS_ETM_PIDFMT_NONE: No context IDs 181 * 182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 183 * are enabled at the same time when the session runs on an EL2 kernel. 184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be 185 * recorded in the trace data, the tool will selectively use 186 * CONTEXTIDR_EL2 as PID. 187 * 188 * The result is cached in etm->pid_fmt so this function only needs to be called 189 * when processing the aux info. 190 */ 191 static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata) 192 { 193 u64 val; 194 195 if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 196 val = metadata[CS_ETM_ETMCR]; 197 /* CONTEXTIDR is traced */ 198 if (val & BIT(ETM_OPT_CTXTID)) 199 return CS_ETM_PIDFMT_CTXTID; 200 } else { 201 val = metadata[CS_ETMV4_TRCCONFIGR]; 202 /* CONTEXTIDR_EL2 is traced */ 203 if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) 204 return CS_ETM_PIDFMT_CTXTID2; 205 /* CONTEXTIDR_EL1 is traced */ 206 else if (val & BIT(ETM4_CFG_BIT_CTXTID)) 207 return CS_ETM_PIDFMT_CTXTID; 208 } 209 210 return CS_ETM_PIDFMT_NONE; 211 } 212 213 enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq) 214 { 215 return etmq->etm->pid_fmt; 216 } 217 218 static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 219 { 220 struct int_node *inode; 221 222 /* Get an RB node for this CPU */ 223 inode = intlist__findnew(traceid_list, trace_chan_id); 224 225 /* Something went wrong, no need to continue */ 226 if (!inode) 227 return -ENOMEM; 228 229 /* 230 * The node for that CPU should not be taken. 231 * Back out if that's the case. 232 */ 233 if (inode->priv) 234 return -EINVAL; 235 236 /* All good, associate the traceID with the metadata pointer */ 237 inode->priv = cpu_metadata; 238 239 return 0; 240 } 241 242 static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) 243 { 244 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 245 246 switch (cs_etm_magic) { 247 case __perf_cs_etmv3_magic: 248 *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & 249 CORESIGHT_TRACE_ID_VAL_MASK); 250 break; 251 case __perf_cs_etmv4_magic: 252 case __perf_cs_ete_magic: 253 *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & 254 CORESIGHT_TRACE_ID_VAL_MASK); 255 break; 256 default: 257 return -EINVAL; 258 } 259 return 0; 260 } 261 262 /* 263 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. 265 */ 266 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 267 { 268 u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; 269 270 switch (cs_etm_magic) { 271 case __perf_cs_etmv3_magic: 272 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; 273 break; 274 case __perf_cs_etmv4_magic: 275 case __perf_cs_ete_magic: 276 cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; 277 break; 278 279 default: 280 return -EINVAL; 281 } 282 return 0; 283 } 284 285 /* 286 * Get a metadata index for a specific cpu from an array. 287 * 288 */ 289 static int get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu) 290 { 291 int i; 292 293 for (i = 0; i < etm->num_cpu; i++) { 294 if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { 295 return i; 296 } 297 } 298 299 return -1; 300 } 301 302 /* 303 * Get a metadata for a specific cpu from an array. 304 * 305 */ 306 static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu) 307 { 308 int idx = get_cpu_data_idx(etm, cpu); 309 310 return (idx != -1) ? etm->metadata[idx] : NULL; 311 } 312 313 /* 314 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. 315 * 316 * The payload associates the Trace ID and the CPU. 317 * The routine is tolerant of seeing multiple packets with the same association, 318 * but a CPU / Trace ID association changing during a session is an error. 319 */ 320 static int cs_etm__process_aux_output_hw_id(struct perf_session *session, 321 union perf_event *event) 322 { 323 struct cs_etm_auxtrace *etm; 324 struct perf_sample sample; 325 struct int_node *inode; 326 struct evsel *evsel; 327 u64 *cpu_data; 328 u64 hw_id; 329 int cpu, version, err; 330 u8 trace_chan_id, curr_chan_id; 331 332 /* extract and parse the HW ID */ 333 hw_id = event->aux_output_hw_id.hw_id; 334 version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); 335 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); 336 337 /* check that we can handle this version */ 338 if (version > CS_AUX_HW_ID_CURR_VERSION) { 339 pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", 340 version); 341 return -EINVAL; 342 } 343 344 /* get access to the etm metadata */ 345 etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); 346 if (!etm || !etm->metadata) 347 return -EINVAL; 348 349 /* parse the sample to get the CPU */ 350 evsel = evlist__event2evsel(session->evlist, event); 351 if (!evsel) 352 return -EINVAL; 353 err = evsel__parse_sample(evsel, event, &sample); 354 if (err) 355 return err; 356 cpu = sample.cpu; 357 if (cpu == -1) { 358 /* no CPU in the sample - possibly recorded with an old version of perf */ 359 pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); 360 return -EINVAL; 361 } 362 363 /* See if the ID is mapped to a CPU, and it matches the current CPU */ 364 inode = intlist__find(traceid_list, trace_chan_id); 365 if (inode) { 366 cpu_data = inode->priv; 367 if ((int)cpu_data[CS_ETM_CPU] != cpu) { 368 pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); 369 return -EINVAL; 370 } 371 372 /* check that the mapped ID matches */ 373 err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data); 374 if (err) 375 return err; 376 if (curr_chan_id != trace_chan_id) { 377 pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); 378 return -EINVAL; 379 } 380 381 /* mapped and matched - return OK */ 382 return 0; 383 } 384 385 cpu_data = get_cpu_data(etm, cpu); 386 if (cpu_data == NULL) 387 return err; 388 389 /* not one we've seen before - lets map it */ 390 err = cs_etm__map_trace_id(trace_chan_id, cpu_data); 391 if (err) 392 return err; 393 394 /* 395 * if we are picking up the association from the packet, need to plug 396 * the correct trace ID into the metadata for setting up decoders later. 397 */ 398 err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); 399 return err; 400 } 401 402 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 403 u8 trace_chan_id) 404 { 405 /* 406 * When a timestamp packet is encountered the backend code 407 * is stopped so that the front end has time to process packets 408 * that were accumulated in the traceID queue. Since there can 409 * be more than one channel per cs_etm_queue, we need to specify 410 * what traceID queue needs servicing. 411 */ 412 etmq->pending_timestamp_chan_id = trace_chan_id; 413 } 414 415 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 416 u8 *trace_chan_id) 417 { 418 struct cs_etm_packet_queue *packet_queue; 419 420 if (!etmq->pending_timestamp_chan_id) 421 return 0; 422 423 if (trace_chan_id) 424 *trace_chan_id = etmq->pending_timestamp_chan_id; 425 426 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 427 etmq->pending_timestamp_chan_id); 428 if (!packet_queue) 429 return 0; 430 431 /* Acknowledge pending status */ 432 etmq->pending_timestamp_chan_id = 0; 433 434 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 435 return packet_queue->cs_timestamp; 436 } 437 438 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 439 { 440 int i; 441 442 queue->head = 0; 443 queue->tail = 0; 444 queue->packet_count = 0; 445 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 446 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 447 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 448 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 449 queue->packet_buffer[i].instr_count = 0; 450 queue->packet_buffer[i].last_instr_taken_branch = false; 451 queue->packet_buffer[i].last_instr_size = 0; 452 queue->packet_buffer[i].last_instr_type = 0; 453 queue->packet_buffer[i].last_instr_subtype = 0; 454 queue->packet_buffer[i].last_instr_cond = 0; 455 queue->packet_buffer[i].flags = 0; 456 queue->packet_buffer[i].exception_number = UINT32_MAX; 457 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 458 queue->packet_buffer[i].cpu = INT_MIN; 459 } 460 } 461 462 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 463 { 464 int idx; 465 struct int_node *inode; 466 struct cs_etm_traceid_queue *tidq; 467 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 468 469 intlist__for_each_entry(inode, traceid_queues_list) { 470 idx = (int)(intptr_t)inode->priv; 471 tidq = etmq->traceid_queues[idx]; 472 cs_etm__clear_packet_queue(&tidq->packet_queue); 473 } 474 } 475 476 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 477 struct cs_etm_traceid_queue *tidq, 478 u8 trace_chan_id) 479 { 480 int rc = -ENOMEM; 481 struct auxtrace_queue *queue; 482 struct cs_etm_auxtrace *etm = etmq->etm; 483 484 cs_etm__clear_packet_queue(&tidq->packet_queue); 485 486 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 487 tidq->trace_chan_id = trace_chan_id; 488 tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; 489 tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, 490 queue->tid); 491 tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); 492 493 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 494 if (!tidq->packet) 495 goto out; 496 497 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 498 if (!tidq->prev_packet) 499 goto out_free; 500 501 if (etm->synth_opts.last_branch) { 502 size_t sz = sizeof(struct branch_stack); 503 504 sz += etm->synth_opts.last_branch_sz * 505 sizeof(struct branch_entry); 506 tidq->last_branch = zalloc(sz); 507 if (!tidq->last_branch) 508 goto out_free; 509 tidq->last_branch_rb = zalloc(sz); 510 if (!tidq->last_branch_rb) 511 goto out_free; 512 } 513 514 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 515 if (!tidq->event_buf) 516 goto out_free; 517 518 return 0; 519 520 out_free: 521 zfree(&tidq->last_branch_rb); 522 zfree(&tidq->last_branch); 523 zfree(&tidq->prev_packet); 524 zfree(&tidq->packet); 525 out: 526 return rc; 527 } 528 529 static struct cs_etm_traceid_queue 530 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 531 { 532 int idx; 533 struct int_node *inode; 534 struct intlist *traceid_queues_list; 535 struct cs_etm_traceid_queue *tidq, **traceid_queues; 536 struct cs_etm_auxtrace *etm = etmq->etm; 537 538 if (etm->per_thread_decoding) 539 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 540 541 traceid_queues_list = etmq->traceid_queues_list; 542 543 /* 544 * Check if the traceid_queue exist for this traceID by looking 545 * in the queue list. 546 */ 547 inode = intlist__find(traceid_queues_list, trace_chan_id); 548 if (inode) { 549 idx = (int)(intptr_t)inode->priv; 550 return etmq->traceid_queues[idx]; 551 } 552 553 /* We couldn't find a traceid_queue for this traceID, allocate one */ 554 tidq = malloc(sizeof(*tidq)); 555 if (!tidq) 556 return NULL; 557 558 memset(tidq, 0, sizeof(*tidq)); 559 560 /* Get a valid index for the new traceid_queue */ 561 idx = intlist__nr_entries(traceid_queues_list); 562 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 563 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 564 if (!inode) 565 goto out_free; 566 567 /* Associate this traceID with this index */ 568 inode->priv = (void *)(intptr_t)idx; 569 570 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 571 goto out_free; 572 573 /* Grow the traceid_queues array by one unit */ 574 traceid_queues = etmq->traceid_queues; 575 traceid_queues = reallocarray(traceid_queues, 576 idx + 1, 577 sizeof(*traceid_queues)); 578 579 /* 580 * On failure reallocarray() returns NULL and the original block of 581 * memory is left untouched. 582 */ 583 if (!traceid_queues) 584 goto out_free; 585 586 traceid_queues[idx] = tidq; 587 etmq->traceid_queues = traceid_queues; 588 589 return etmq->traceid_queues[idx]; 590 591 out_free: 592 /* 593 * Function intlist__remove() removes the inode from the list 594 * and delete the memory associated to it. 595 */ 596 intlist__remove(traceid_queues_list, inode); 597 free(tidq); 598 599 return NULL; 600 } 601 602 struct cs_etm_packet_queue 603 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 604 { 605 struct cs_etm_traceid_queue *tidq; 606 607 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 608 if (tidq) 609 return &tidq->packet_queue; 610 611 return NULL; 612 } 613 614 static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, 615 struct cs_etm_traceid_queue *tidq) 616 { 617 struct cs_etm_packet *tmp; 618 619 if (etm->synth_opts.branches || etm->synth_opts.last_branch || 620 etm->synth_opts.instructions) { 621 /* 622 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 623 * the next incoming packet. 624 * 625 * Threads and exception levels are also tracked for both the 626 * previous and current packets. This is because the previous 627 * packet is used for the 'from' IP for branch samples, so the 628 * thread at that time must also be assigned to that sample. 629 * Across discontinuity packets the thread can change, so by 630 * tracking the thread for the previous packet the branch sample 631 * will have the correct info. 632 */ 633 tmp = tidq->packet; 634 tidq->packet = tidq->prev_packet; 635 tidq->prev_packet = tmp; 636 tidq->prev_packet_el = tidq->el; 637 thread__put(tidq->prev_packet_thread); 638 tidq->prev_packet_thread = thread__get(tidq->thread); 639 } 640 } 641 642 static void cs_etm__packet_dump(const char *pkt_string) 643 { 644 const char *color = PERF_COLOR_BLUE; 645 int len = strlen(pkt_string); 646 647 if (len && (pkt_string[len-1] == '\n')) 648 color_fprintf(stdout, color, " %s", pkt_string); 649 else 650 color_fprintf(stdout, color, " %s\n", pkt_string); 651 652 fflush(stdout); 653 } 654 655 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 656 struct cs_etm_auxtrace *etm, int t_idx, 657 int m_idx, u32 etmidr) 658 { 659 u64 **metadata = etm->metadata; 660 661 t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 662 t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; 663 t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; 664 } 665 666 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 667 struct cs_etm_auxtrace *etm, int t_idx, 668 int m_idx) 669 { 670 u64 **metadata = etm->metadata; 671 672 t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i; 673 t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0]; 674 t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1]; 675 t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2]; 676 t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8]; 677 t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR]; 678 t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR]; 679 } 680 681 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 682 struct cs_etm_auxtrace *etm, int t_idx, 683 int m_idx) 684 { 685 u64 **metadata = etm->metadata; 686 687 t_params[t_idx].protocol = CS_ETM_PROTO_ETE; 688 t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0]; 689 t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1]; 690 t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2]; 691 t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8]; 692 t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR]; 693 t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR]; 694 t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH]; 695 } 696 697 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 698 struct cs_etm_auxtrace *etm, 699 bool formatted, 700 int sample_cpu, 701 int decoders) 702 { 703 int t_idx, m_idx; 704 u32 etmidr; 705 u64 architecture; 706 707 for (t_idx = 0; t_idx < decoders; t_idx++) { 708 if (formatted) 709 m_idx = t_idx; 710 else { 711 m_idx = get_cpu_data_idx(etm, sample_cpu); 712 if (m_idx == -1) { 713 pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n"); 714 m_idx = 0; 715 } 716 } 717 718 architecture = etm->metadata[m_idx][CS_ETM_MAGIC]; 719 720 switch (architecture) { 721 case __perf_cs_etmv3_magic: 722 etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR]; 723 cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr); 724 break; 725 case __perf_cs_etmv4_magic: 726 cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx); 727 break; 728 case __perf_cs_ete_magic: 729 cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx); 730 break; 731 default: 732 return -EINVAL; 733 } 734 } 735 736 return 0; 737 } 738 739 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 740 struct cs_etm_queue *etmq, 741 enum cs_etm_decoder_operation mode, 742 bool formatted) 743 { 744 int ret = -EINVAL; 745 746 if (!(mode < CS_ETM_OPERATION_MAX)) 747 goto out; 748 749 d_params->packet_printer = cs_etm__packet_dump; 750 d_params->operation = mode; 751 d_params->data = etmq; 752 d_params->formatted = formatted; 753 d_params->fsyncs = false; 754 d_params->hsyncs = false; 755 d_params->frame_aligned = true; 756 757 ret = 0; 758 out: 759 return ret; 760 } 761 762 static void cs_etm__dump_event(struct cs_etm_queue *etmq, 763 struct auxtrace_buffer *buffer) 764 { 765 int ret; 766 const char *color = PERF_COLOR_BLUE; 767 size_t buffer_used = 0; 768 769 fprintf(stdout, "\n"); 770 color_fprintf(stdout, color, 771 ". ... CoreSight %s Trace data: size %#zx bytes\n", 772 cs_etm_decoder__get_name(etmq->decoder), buffer->size); 773 774 do { 775 size_t consumed; 776 777 ret = cs_etm_decoder__process_data_block( 778 etmq->decoder, buffer->offset, 779 &((u8 *)buffer->data)[buffer_used], 780 buffer->size - buffer_used, &consumed); 781 if (ret) 782 break; 783 784 buffer_used += consumed; 785 } while (buffer_used < buffer->size); 786 787 cs_etm_decoder__reset(etmq->decoder); 788 } 789 790 static int cs_etm__flush_events(struct perf_session *session, 791 struct perf_tool *tool) 792 { 793 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 794 struct cs_etm_auxtrace, 795 auxtrace); 796 if (dump_trace) 797 return 0; 798 799 if (!tool->ordered_events) 800 return -EINVAL; 801 802 if (etm->timeless_decoding) { 803 /* 804 * Pass tid = -1 to process all queues. But likely they will have 805 * already been processed on PERF_RECORD_EXIT anyway. 806 */ 807 return cs_etm__process_timeless_queues(etm, -1); 808 } 809 810 return cs_etm__process_timestamped_queues(etm); 811 } 812 813 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 814 { 815 int idx; 816 uintptr_t priv; 817 struct int_node *inode, *tmp; 818 struct cs_etm_traceid_queue *tidq; 819 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 820 821 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 822 priv = (uintptr_t)inode->priv; 823 idx = priv; 824 825 /* Free this traceid_queue from the array */ 826 tidq = etmq->traceid_queues[idx]; 827 thread__zput(tidq->thread); 828 thread__zput(tidq->prev_packet_thread); 829 zfree(&tidq->event_buf); 830 zfree(&tidq->last_branch); 831 zfree(&tidq->last_branch_rb); 832 zfree(&tidq->prev_packet); 833 zfree(&tidq->packet); 834 zfree(&tidq); 835 836 /* 837 * Function intlist__remove() removes the inode from the list 838 * and delete the memory associated to it. 839 */ 840 intlist__remove(traceid_queues_list, inode); 841 } 842 843 /* Then the RB tree itself */ 844 intlist__delete(traceid_queues_list); 845 etmq->traceid_queues_list = NULL; 846 847 /* finally free the traceid_queues array */ 848 zfree(&etmq->traceid_queues); 849 } 850 851 static void cs_etm__free_queue(void *priv) 852 { 853 struct cs_etm_queue *etmq = priv; 854 855 if (!etmq) 856 return; 857 858 cs_etm_decoder__free(etmq->decoder); 859 cs_etm__free_traceid_queues(etmq); 860 free(etmq); 861 } 862 863 static void cs_etm__free_events(struct perf_session *session) 864 { 865 unsigned int i; 866 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 867 struct cs_etm_auxtrace, 868 auxtrace); 869 struct auxtrace_queues *queues = &aux->queues; 870 871 for (i = 0; i < queues->nr_queues; i++) { 872 cs_etm__free_queue(queues->queue_array[i].priv); 873 queues->queue_array[i].priv = NULL; 874 } 875 876 auxtrace_queues__free(queues); 877 } 878 879 static void cs_etm__free(struct perf_session *session) 880 { 881 int i; 882 struct int_node *inode, *tmp; 883 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 884 struct cs_etm_auxtrace, 885 auxtrace); 886 cs_etm__free_events(session); 887 session->auxtrace = NULL; 888 889 /* First remove all traceID/metadata nodes for the RB tree */ 890 intlist__for_each_entry_safe(inode, tmp, traceid_list) 891 intlist__remove(traceid_list, inode); 892 /* Then the RB tree itself */ 893 intlist__delete(traceid_list); 894 895 for (i = 0; i < aux->num_cpu; i++) 896 zfree(&aux->metadata[i]); 897 898 zfree(&aux->metadata); 899 zfree(&aux); 900 } 901 902 static bool cs_etm__evsel_is_auxtrace(struct perf_session *session, 903 struct evsel *evsel) 904 { 905 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 906 struct cs_etm_auxtrace, 907 auxtrace); 908 909 return evsel->core.attr.type == aux->pmu_type; 910 } 911 912 static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq, 913 ocsd_ex_level el) 914 { 915 enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq); 916 917 /* 918 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels 919 * running at EL1 assume everything is the host. 920 */ 921 if (pid_fmt == CS_ETM_PIDFMT_CTXTID) 922 return &etmq->etm->session->machines.host; 923 924 /* 925 * Not perfect, but otherwise assume anything in EL1 is the default 926 * guest, and everything else is the host. Distinguishing between guest 927 * and host userspaces isn't currently supported either. Neither is 928 * multiple guest support. All this does is reduce the likeliness of 929 * decode errors where we look into the host kernel maps when it should 930 * have been the guest maps. 931 */ 932 switch (el) { 933 case ocsd_EL1: 934 return machines__find_guest(&etmq->etm->session->machines, 935 DEFAULT_GUEST_KERNEL_ID); 936 case ocsd_EL3: 937 case ocsd_EL2: 938 case ocsd_EL0: 939 case ocsd_EL_unknown: 940 default: 941 return &etmq->etm->session->machines.host; 942 } 943 } 944 945 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, 946 ocsd_ex_level el) 947 { 948 struct machine *machine = cs_etm__get_machine(etmq, el); 949 950 if (address >= machine__kernel_start(machine)) { 951 if (machine__is_host(machine)) 952 return PERF_RECORD_MISC_KERNEL; 953 else 954 return PERF_RECORD_MISC_GUEST_KERNEL; 955 } else { 956 if (machine__is_host(machine)) 957 return PERF_RECORD_MISC_USER; 958 else { 959 /* 960 * Can't really happen at the moment because 961 * cs_etm__get_machine() will always return 962 * machines.host for any non EL1 trace. 963 */ 964 return PERF_RECORD_MISC_GUEST_USER; 965 } 966 } 967 } 968 969 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 970 u64 address, size_t size, u8 *buffer, 971 const ocsd_mem_space_acc_t mem_space) 972 { 973 u8 cpumode; 974 u64 offset; 975 int len; 976 struct addr_location al; 977 struct dso *dso; 978 struct cs_etm_traceid_queue *tidq; 979 int ret = 0; 980 981 if (!etmq) 982 return 0; 983 984 addr_location__init(&al); 985 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 986 if (!tidq) 987 goto out; 988 989 /* 990 * We've already tracked EL along side the PID in cs_etm__set_thread() 991 * so double check that it matches what OpenCSD thinks as well. It 992 * doesn't distinguish between EL0 and EL1 for this mem access callback 993 * so we had to do the extra tracking. Skip validation if it's any of 994 * the 'any' values. 995 */ 996 if (!(mem_space == OCSD_MEM_SPACE_ANY || 997 mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { 998 if (mem_space & OCSD_MEM_SPACE_EL1N) { 999 /* Includes both non secure EL1 and EL0 */ 1000 assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); 1001 } else if (mem_space & OCSD_MEM_SPACE_EL2) 1002 assert(tidq->el == ocsd_EL2); 1003 else if (mem_space & OCSD_MEM_SPACE_EL3) 1004 assert(tidq->el == ocsd_EL3); 1005 } 1006 1007 cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); 1008 1009 if (!thread__find_map(tidq->thread, cpumode, address, &al)) 1010 goto out; 1011 1012 dso = map__dso(al.map); 1013 if (!dso) 1014 goto out; 1015 1016 if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR && 1017 dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) 1018 goto out; 1019 1020 offset = map__map_ip(al.map, address); 1021 1022 map__load(al.map); 1023 1024 len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), 1025 offset, buffer, size); 1026 1027 if (len <= 0) { 1028 ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" 1029 " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); 1030 if (!dso__auxtrace_warned(dso)) { 1031 pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", 1032 address, 1033 dso__long_name(dso) ? dso__long_name(dso) : "Unknown"); 1034 dso__set_auxtrace_warned(dso); 1035 } 1036 goto out; 1037 } 1038 ret = len; 1039 out: 1040 addr_location__exit(&al); 1041 return ret; 1042 } 1043 1044 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, 1045 bool formatted, int sample_cpu) 1046 { 1047 struct cs_etm_decoder_params d_params; 1048 struct cs_etm_trace_params *t_params = NULL; 1049 struct cs_etm_queue *etmq; 1050 /* 1051 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 1052 * needed. 1053 */ 1054 int decoders = formatted ? etm->num_cpu : 1; 1055 1056 etmq = zalloc(sizeof(*etmq)); 1057 if (!etmq) 1058 return NULL; 1059 1060 etmq->traceid_queues_list = intlist__new(NULL); 1061 if (!etmq->traceid_queues_list) 1062 goto out_free; 1063 1064 /* Use metadata to fill in trace parameters for trace decoder */ 1065 t_params = zalloc(sizeof(*t_params) * decoders); 1066 1067 if (!t_params) 1068 goto out_free; 1069 1070 if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders)) 1071 goto out_free; 1072 1073 /* Set decoder parameters to decode trace packets */ 1074 if (cs_etm__init_decoder_params(&d_params, etmq, 1075 dump_trace ? CS_ETM_OPERATION_PRINT : 1076 CS_ETM_OPERATION_DECODE, 1077 formatted)) 1078 goto out_free; 1079 1080 etmq->decoder = cs_etm_decoder__new(decoders, &d_params, 1081 t_params); 1082 1083 if (!etmq->decoder) 1084 goto out_free; 1085 1086 /* 1087 * Register a function to handle all memory accesses required by 1088 * the trace decoder library. 1089 */ 1090 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 1091 0x0L, ((u64) -1L), 1092 cs_etm__mem_access)) 1093 goto out_free_decoder; 1094 1095 zfree(&t_params); 1096 return etmq; 1097 1098 out_free_decoder: 1099 cs_etm_decoder__free(etmq->decoder); 1100 out_free: 1101 intlist__delete(etmq->traceid_queues_list); 1102 free(etmq); 1103 1104 return NULL; 1105 } 1106 1107 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 1108 struct auxtrace_queue *queue, 1109 unsigned int queue_nr, 1110 bool formatted, 1111 int sample_cpu) 1112 { 1113 struct cs_etm_queue *etmq = queue->priv; 1114 1115 if (list_empty(&queue->head) || etmq) 1116 return 0; 1117 1118 etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu); 1119 1120 if (!etmq) 1121 return -ENOMEM; 1122 1123 queue->priv = etmq; 1124 etmq->etm = etm; 1125 etmq->queue_nr = queue_nr; 1126 etmq->offset = 0; 1127 1128 return 0; 1129 } 1130 1131 static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, 1132 struct cs_etm_queue *etmq, 1133 unsigned int queue_nr) 1134 { 1135 int ret = 0; 1136 unsigned int cs_queue_nr; 1137 u8 trace_chan_id; 1138 u64 cs_timestamp; 1139 1140 /* 1141 * We are under a CPU-wide trace scenario. As such we need to know 1142 * when the code that generated the traces started to execute so that 1143 * it can be correlated with execution on other CPUs. So we get a 1144 * handle on the beginning of traces and decode until we find a 1145 * timestamp. The timestamp is then added to the auxtrace min heap 1146 * in order to know what nibble (of all the etmqs) to decode first. 1147 */ 1148 while (1) { 1149 /* 1150 * Fetch an aux_buffer from this etmq. Bail if no more 1151 * blocks or an error has been encountered. 1152 */ 1153 ret = cs_etm__get_data_block(etmq); 1154 if (ret <= 0) 1155 goto out; 1156 1157 /* 1158 * Run decoder on the trace block. The decoder will stop when 1159 * encountering a CS timestamp, a full packet queue or the end of 1160 * trace for that block. 1161 */ 1162 ret = cs_etm__decode_data_block(etmq); 1163 if (ret) 1164 goto out; 1165 1166 /* 1167 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 1168 * the timestamp calculation for us. 1169 */ 1170 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 1171 1172 /* We found a timestamp, no need to continue. */ 1173 if (cs_timestamp) 1174 break; 1175 1176 /* 1177 * We didn't find a timestamp so empty all the traceid packet 1178 * queues before looking for another timestamp packet, either 1179 * in the current data block or a new one. Packets that were 1180 * just decoded are useless since no timestamp has been 1181 * associated with them. As such simply discard them. 1182 */ 1183 cs_etm__clear_all_packet_queues(etmq); 1184 } 1185 1186 /* 1187 * We have a timestamp. Add it to the min heap to reflect when 1188 * instructions conveyed by the range packets of this traceID queue 1189 * started to execute. Once the same has been done for all the traceID 1190 * queues of each etmq, redenring and decoding can start in 1191 * chronological order. 1192 * 1193 * Note that packets decoded above are still in the traceID's packet 1194 * queue and will be processed in cs_etm__process_timestamped_queues(). 1195 */ 1196 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 1197 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 1198 out: 1199 return ret; 1200 } 1201 1202 static inline 1203 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 1204 struct cs_etm_traceid_queue *tidq) 1205 { 1206 struct branch_stack *bs_src = tidq->last_branch_rb; 1207 struct branch_stack *bs_dst = tidq->last_branch; 1208 size_t nr = 0; 1209 1210 /* 1211 * Set the number of records before early exit: ->nr is used to 1212 * determine how many branches to copy from ->entries. 1213 */ 1214 bs_dst->nr = bs_src->nr; 1215 1216 /* 1217 * Early exit when there is nothing to copy. 1218 */ 1219 if (!bs_src->nr) 1220 return; 1221 1222 /* 1223 * As bs_src->entries is a circular buffer, we need to copy from it in 1224 * two steps. First, copy the branches from the most recently inserted 1225 * branch ->last_branch_pos until the end of bs_src->entries buffer. 1226 */ 1227 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 1228 memcpy(&bs_dst->entries[0], 1229 &bs_src->entries[tidq->last_branch_pos], 1230 sizeof(struct branch_entry) * nr); 1231 1232 /* 1233 * If we wrapped around at least once, the branches from the beginning 1234 * of the bs_src->entries buffer and until the ->last_branch_pos element 1235 * are older valid branches: copy them over. The total number of 1236 * branches copied over will be equal to the number of branches asked by 1237 * the user in last_branch_sz. 1238 */ 1239 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 1240 memcpy(&bs_dst->entries[nr], 1241 &bs_src->entries[0], 1242 sizeof(struct branch_entry) * tidq->last_branch_pos); 1243 } 1244 } 1245 1246 static inline 1247 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 1248 { 1249 tidq->last_branch_pos = 0; 1250 tidq->last_branch_rb->nr = 0; 1251 } 1252 1253 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 1254 u8 trace_chan_id, u64 addr) 1255 { 1256 u8 instrBytes[2]; 1257 1258 cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), 1259 instrBytes, 0); 1260 /* 1261 * T32 instruction size is indicated by bits[15:11] of the first 1262 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 1263 * denote a 32-bit instruction. 1264 */ 1265 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 1266 } 1267 1268 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 1269 { 1270 /* 1271 * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't 1272 * appear in samples. 1273 */ 1274 if (packet->sample_type == CS_ETM_DISCONTINUITY || 1275 packet->sample_type == CS_ETM_EXCEPTION) 1276 return 0; 1277 1278 return packet->start_addr; 1279 } 1280 1281 static inline 1282 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 1283 { 1284 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 1285 if (packet->sample_type == CS_ETM_DISCONTINUITY) 1286 return 0; 1287 1288 return packet->end_addr - packet->last_instr_size; 1289 } 1290 1291 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 1292 u64 trace_chan_id, 1293 const struct cs_etm_packet *packet, 1294 u64 offset) 1295 { 1296 if (packet->isa == CS_ETM_ISA_T32) { 1297 u64 addr = packet->start_addr; 1298 1299 while (offset) { 1300 addr += cs_etm__t32_instr_size(etmq, 1301 trace_chan_id, addr); 1302 offset--; 1303 } 1304 return addr; 1305 } 1306 1307 /* Assume a 4 byte instruction size (A32/A64) */ 1308 return packet->start_addr + offset * 4; 1309 } 1310 1311 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 1312 struct cs_etm_traceid_queue *tidq) 1313 { 1314 struct branch_stack *bs = tidq->last_branch_rb; 1315 struct branch_entry *be; 1316 1317 /* 1318 * The branches are recorded in a circular buffer in reverse 1319 * chronological order: we start recording from the last element of the 1320 * buffer down. After writing the first element of the stack, move the 1321 * insert position back to the end of the buffer. 1322 */ 1323 if (!tidq->last_branch_pos) 1324 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 1325 1326 tidq->last_branch_pos -= 1; 1327 1328 be = &bs->entries[tidq->last_branch_pos]; 1329 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 1330 be->to = cs_etm__first_executed_instr(tidq->packet); 1331 /* No support for mispredict */ 1332 be->flags.mispred = 0; 1333 be->flags.predicted = 1; 1334 1335 /* 1336 * Increment bs->nr until reaching the number of last branches asked by 1337 * the user on the command line. 1338 */ 1339 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 1340 bs->nr += 1; 1341 } 1342 1343 static int cs_etm__inject_event(union perf_event *event, 1344 struct perf_sample *sample, u64 type) 1345 { 1346 event->header.size = perf_event__sample_event_size(sample, type, 0); 1347 return perf_event__synthesize_sample(event, type, 0, sample); 1348 } 1349 1350 1351 static int 1352 cs_etm__get_trace(struct cs_etm_queue *etmq) 1353 { 1354 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1355 struct auxtrace_buffer *old_buffer = aux_buffer; 1356 struct auxtrace_queue *queue; 1357 1358 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1359 1360 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1361 1362 /* If no more data, drop the previous auxtrace_buffer and return */ 1363 if (!aux_buffer) { 1364 if (old_buffer) 1365 auxtrace_buffer__drop_data(old_buffer); 1366 etmq->buf_len = 0; 1367 return 0; 1368 } 1369 1370 etmq->buffer = aux_buffer; 1371 1372 /* If the aux_buffer doesn't have data associated, try to load it */ 1373 if (!aux_buffer->data) { 1374 /* get the file desc associated with the perf data file */ 1375 int fd = perf_data__fd(etmq->etm->session->data); 1376 1377 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1378 if (!aux_buffer->data) 1379 return -ENOMEM; 1380 } 1381 1382 /* If valid, drop the previous buffer */ 1383 if (old_buffer) 1384 auxtrace_buffer__drop_data(old_buffer); 1385 1386 etmq->buf_used = 0; 1387 etmq->buf_len = aux_buffer->size; 1388 etmq->buf = aux_buffer->data; 1389 1390 return etmq->buf_len; 1391 } 1392 1393 static void cs_etm__set_thread(struct cs_etm_queue *etmq, 1394 struct cs_etm_traceid_queue *tidq, pid_t tid, 1395 ocsd_ex_level el) 1396 { 1397 struct machine *machine = cs_etm__get_machine(etmq, el); 1398 1399 if (tid != -1) { 1400 thread__zput(tidq->thread); 1401 tidq->thread = machine__find_thread(machine, -1, tid); 1402 } 1403 1404 /* Couldn't find a known thread */ 1405 if (!tidq->thread) 1406 tidq->thread = machine__idle_thread(machine); 1407 1408 tidq->el = el; 1409 } 1410 1411 int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, 1412 u8 trace_chan_id, ocsd_ex_level el) 1413 { 1414 struct cs_etm_traceid_queue *tidq; 1415 1416 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1417 if (!tidq) 1418 return -EINVAL; 1419 1420 cs_etm__set_thread(etmq, tidq, tid, el); 1421 return 0; 1422 } 1423 1424 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1425 { 1426 return !!etmq->etm->timeless_decoding; 1427 } 1428 1429 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1430 u64 trace_chan_id, 1431 const struct cs_etm_packet *packet, 1432 struct perf_sample *sample) 1433 { 1434 /* 1435 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1436 * packet, so directly bail out with 'insn_len' = 0. 1437 */ 1438 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1439 sample->insn_len = 0; 1440 return; 1441 } 1442 1443 /* 1444 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1445 * cs_etm__t32_instr_size(). 1446 */ 1447 if (packet->isa == CS_ETM_ISA_T32) 1448 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1449 sample->ip); 1450 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1451 else 1452 sample->insn_len = 4; 1453 1454 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, 1455 (void *)sample->insn, 0); 1456 } 1457 1458 u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) 1459 { 1460 struct cs_etm_auxtrace *etm = etmq->etm; 1461 1462 if (etm->has_virtual_ts) 1463 return tsc_to_perf_time(cs_timestamp, &etm->tc); 1464 else 1465 return cs_timestamp; 1466 } 1467 1468 static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, 1469 struct cs_etm_traceid_queue *tidq) 1470 { 1471 struct cs_etm_auxtrace *etm = etmq->etm; 1472 struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue; 1473 1474 if (!etm->timeless_decoding && etm->has_virtual_ts) 1475 return packet_queue->cs_timestamp; 1476 else 1477 return etm->latest_kernel_timestamp; 1478 } 1479 1480 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1481 struct cs_etm_traceid_queue *tidq, 1482 u64 addr, u64 period) 1483 { 1484 int ret = 0; 1485 struct cs_etm_auxtrace *etm = etmq->etm; 1486 union perf_event *event = tidq->event_buf; 1487 struct perf_sample sample = {.ip = 0,}; 1488 1489 event->sample.header.type = PERF_RECORD_SAMPLE; 1490 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); 1491 event->sample.header.size = sizeof(struct perf_event_header); 1492 1493 /* Set time field based on etm auxtrace config. */ 1494 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1495 1496 sample.ip = addr; 1497 sample.pid = thread__pid(tidq->thread); 1498 sample.tid = thread__tid(tidq->thread); 1499 sample.id = etmq->etm->instructions_id; 1500 sample.stream_id = etmq->etm->instructions_id; 1501 sample.period = period; 1502 sample.cpu = tidq->packet->cpu; 1503 sample.flags = tidq->prev_packet->flags; 1504 sample.cpumode = event->sample.header.misc; 1505 1506 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1507 1508 if (etm->synth_opts.last_branch) 1509 sample.branch_stack = tidq->last_branch; 1510 1511 if (etm->synth_opts.inject) { 1512 ret = cs_etm__inject_event(event, &sample, 1513 etm->instructions_sample_type); 1514 if (ret) 1515 return ret; 1516 } 1517 1518 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1519 1520 if (ret) 1521 pr_err( 1522 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1523 ret); 1524 1525 return ret; 1526 } 1527 1528 /* 1529 * The cs etm packet encodes an instruction range between a branch target 1530 * and the next taken branch. Generate sample accordingly. 1531 */ 1532 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1533 struct cs_etm_traceid_queue *tidq) 1534 { 1535 int ret = 0; 1536 struct cs_etm_auxtrace *etm = etmq->etm; 1537 struct perf_sample sample = {.ip = 0,}; 1538 union perf_event *event = tidq->event_buf; 1539 struct dummy_branch_stack { 1540 u64 nr; 1541 u64 hw_idx; 1542 struct branch_entry entries; 1543 } dummy_bs; 1544 u64 ip; 1545 1546 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1547 1548 event->sample.header.type = PERF_RECORD_SAMPLE; 1549 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, 1550 tidq->prev_packet_el); 1551 event->sample.header.size = sizeof(struct perf_event_header); 1552 1553 /* Set time field based on etm auxtrace config. */ 1554 sample.time = cs_etm__resolve_sample_time(etmq, tidq); 1555 1556 sample.ip = ip; 1557 sample.pid = thread__pid(tidq->prev_packet_thread); 1558 sample.tid = thread__tid(tidq->prev_packet_thread); 1559 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1560 sample.id = etmq->etm->branches_id; 1561 sample.stream_id = etmq->etm->branches_id; 1562 sample.period = 1; 1563 sample.cpu = tidq->packet->cpu; 1564 sample.flags = tidq->prev_packet->flags; 1565 sample.cpumode = event->sample.header.misc; 1566 1567 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1568 &sample); 1569 1570 /* 1571 * perf report cannot handle events without a branch stack 1572 */ 1573 if (etm->synth_opts.last_branch) { 1574 dummy_bs = (struct dummy_branch_stack){ 1575 .nr = 1, 1576 .hw_idx = -1ULL, 1577 .entries = { 1578 .from = sample.ip, 1579 .to = sample.addr, 1580 }, 1581 }; 1582 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1583 } 1584 1585 if (etm->synth_opts.inject) { 1586 ret = cs_etm__inject_event(event, &sample, 1587 etm->branches_sample_type); 1588 if (ret) 1589 return ret; 1590 } 1591 1592 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1593 1594 if (ret) 1595 pr_err( 1596 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1597 ret); 1598 1599 return ret; 1600 } 1601 1602 struct cs_etm_synth { 1603 struct perf_tool dummy_tool; 1604 struct perf_session *session; 1605 }; 1606 1607 static int cs_etm__event_synth(struct perf_tool *tool, 1608 union perf_event *event, 1609 struct perf_sample *sample __maybe_unused, 1610 struct machine *machine __maybe_unused) 1611 { 1612 struct cs_etm_synth *cs_etm_synth = 1613 container_of(tool, struct cs_etm_synth, dummy_tool); 1614 1615 return perf_session__deliver_synth_event(cs_etm_synth->session, 1616 event, NULL); 1617 } 1618 1619 static int cs_etm__synth_event(struct perf_session *session, 1620 struct perf_event_attr *attr, u64 id) 1621 { 1622 struct cs_etm_synth cs_etm_synth; 1623 1624 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 1625 cs_etm_synth.session = session; 1626 1627 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 1628 &id, cs_etm__event_synth); 1629 } 1630 1631 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1632 struct perf_session *session) 1633 { 1634 struct evlist *evlist = session->evlist; 1635 struct evsel *evsel; 1636 struct perf_event_attr attr; 1637 bool found = false; 1638 u64 id; 1639 int err; 1640 1641 evlist__for_each_entry(evlist, evsel) { 1642 if (evsel->core.attr.type == etm->pmu_type) { 1643 found = true; 1644 break; 1645 } 1646 } 1647 1648 if (!found) { 1649 pr_debug("No selected events with CoreSight Trace data\n"); 1650 return 0; 1651 } 1652 1653 memset(&attr, 0, sizeof(struct perf_event_attr)); 1654 attr.size = sizeof(struct perf_event_attr); 1655 attr.type = PERF_TYPE_HARDWARE; 1656 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1657 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1658 PERF_SAMPLE_PERIOD; 1659 if (etm->timeless_decoding) 1660 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1661 else 1662 attr.sample_type |= PERF_SAMPLE_TIME; 1663 1664 attr.exclude_user = evsel->core.attr.exclude_user; 1665 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1666 attr.exclude_hv = evsel->core.attr.exclude_hv; 1667 attr.exclude_host = evsel->core.attr.exclude_host; 1668 attr.exclude_guest = evsel->core.attr.exclude_guest; 1669 attr.sample_id_all = evsel->core.attr.sample_id_all; 1670 attr.read_format = evsel->core.attr.read_format; 1671 1672 /* create new id val to be a fixed offset from evsel id */ 1673 id = evsel->core.id[0] + 1000000000; 1674 1675 if (!id) 1676 id = 1; 1677 1678 if (etm->synth_opts.branches) { 1679 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1680 attr.sample_period = 1; 1681 attr.sample_type |= PERF_SAMPLE_ADDR; 1682 err = cs_etm__synth_event(session, &attr, id); 1683 if (err) 1684 return err; 1685 etm->branches_sample_type = attr.sample_type; 1686 etm->branches_id = id; 1687 id += 1; 1688 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1689 } 1690 1691 if (etm->synth_opts.last_branch) { 1692 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1693 /* 1694 * We don't use the hardware index, but the sample generation 1695 * code uses the new format branch_stack with this field, 1696 * so the event attributes must indicate that it's present. 1697 */ 1698 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 1699 } 1700 1701 if (etm->synth_opts.instructions) { 1702 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1703 attr.sample_period = etm->synth_opts.period; 1704 etm->instructions_sample_period = attr.sample_period; 1705 err = cs_etm__synth_event(session, &attr, id); 1706 if (err) 1707 return err; 1708 etm->instructions_sample_type = attr.sample_type; 1709 etm->instructions_id = id; 1710 id += 1; 1711 } 1712 1713 return 0; 1714 } 1715 1716 static int cs_etm__sample(struct cs_etm_queue *etmq, 1717 struct cs_etm_traceid_queue *tidq) 1718 { 1719 struct cs_etm_auxtrace *etm = etmq->etm; 1720 int ret; 1721 u8 trace_chan_id = tidq->trace_chan_id; 1722 u64 instrs_prev; 1723 1724 /* Get instructions remainder from previous packet */ 1725 instrs_prev = tidq->period_instructions; 1726 1727 tidq->period_instructions += tidq->packet->instr_count; 1728 1729 /* 1730 * Record a branch when the last instruction in 1731 * PREV_PACKET is a branch. 1732 */ 1733 if (etm->synth_opts.last_branch && 1734 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1735 tidq->prev_packet->last_instr_taken_branch) 1736 cs_etm__update_last_branch_rb(etmq, tidq); 1737 1738 if (etm->synth_opts.instructions && 1739 tidq->period_instructions >= etm->instructions_sample_period) { 1740 /* 1741 * Emit instruction sample periodically 1742 * TODO: allow period to be defined in cycles and clock time 1743 */ 1744 1745 /* 1746 * Below diagram demonstrates the instruction samples 1747 * generation flows: 1748 * 1749 * Instrs Instrs Instrs Instrs 1750 * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) 1751 * | | | | 1752 * V V V V 1753 * -------------------------------------------------- 1754 * ^ ^ 1755 * | | 1756 * Period Period 1757 * instructions(Pi) instructions(Pi') 1758 * 1759 * | | 1760 * \---------------- -----------------/ 1761 * V 1762 * tidq->packet->instr_count 1763 * 1764 * Instrs Sample(n...) are the synthesised samples occurring 1765 * every etm->instructions_sample_period instructions - as 1766 * defined on the perf command line. Sample(n) is being the 1767 * last sample before the current etm packet, n+1 to n+3 1768 * samples are generated from the current etm packet. 1769 * 1770 * tidq->packet->instr_count represents the number of 1771 * instructions in the current etm packet. 1772 * 1773 * Period instructions (Pi) contains the number of 1774 * instructions executed after the sample point(n) from the 1775 * previous etm packet. This will always be less than 1776 * etm->instructions_sample_period. 1777 * 1778 * When generate new samples, it combines with two parts 1779 * instructions, one is the tail of the old packet and another 1780 * is the head of the new coming packet, to generate 1781 * sample(n+1); sample(n+2) and sample(n+3) consume the 1782 * instructions with sample period. After sample(n+3), the rest 1783 * instructions will be used by later packet and it is assigned 1784 * to tidq->period_instructions for next round calculation. 1785 */ 1786 1787 /* 1788 * Get the initial offset into the current packet instructions; 1789 * entry conditions ensure that instrs_prev is less than 1790 * etm->instructions_sample_period. 1791 */ 1792 u64 offset = etm->instructions_sample_period - instrs_prev; 1793 u64 addr; 1794 1795 /* Prepare last branches for instruction sample */ 1796 if (etm->synth_opts.last_branch) 1797 cs_etm__copy_last_branch_rb(etmq, tidq); 1798 1799 while (tidq->period_instructions >= 1800 etm->instructions_sample_period) { 1801 /* 1802 * Calculate the address of the sampled instruction (-1 1803 * as sample is reported as though instruction has just 1804 * been executed, but PC has not advanced to next 1805 * instruction) 1806 */ 1807 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1808 tidq->packet, offset - 1); 1809 ret = cs_etm__synth_instruction_sample( 1810 etmq, tidq, addr, 1811 etm->instructions_sample_period); 1812 if (ret) 1813 return ret; 1814 1815 offset += etm->instructions_sample_period; 1816 tidq->period_instructions -= 1817 etm->instructions_sample_period; 1818 } 1819 } 1820 1821 if (etm->synth_opts.branches) { 1822 bool generate_sample = false; 1823 1824 /* Generate sample for tracing on packet */ 1825 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1826 generate_sample = true; 1827 1828 /* Generate sample for branch taken packet */ 1829 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1830 tidq->prev_packet->last_instr_taken_branch) 1831 generate_sample = true; 1832 1833 if (generate_sample) { 1834 ret = cs_etm__synth_branch_sample(etmq, tidq); 1835 if (ret) 1836 return ret; 1837 } 1838 } 1839 1840 cs_etm__packet_swap(etm, tidq); 1841 1842 return 0; 1843 } 1844 1845 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1846 { 1847 /* 1848 * When the exception packet is inserted, whether the last instruction 1849 * in previous range packet is taken branch or not, we need to force 1850 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1851 * to generate branch sample for the instruction range before the 1852 * exception is trapped to kernel or before the exception returning. 1853 * 1854 * The exception packet includes the dummy address values, so don't 1855 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1856 * for generating instruction and branch samples. 1857 */ 1858 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1859 tidq->prev_packet->last_instr_taken_branch = true; 1860 1861 return 0; 1862 } 1863 1864 static int cs_etm__flush(struct cs_etm_queue *etmq, 1865 struct cs_etm_traceid_queue *tidq) 1866 { 1867 int err = 0; 1868 struct cs_etm_auxtrace *etm = etmq->etm; 1869 1870 /* Handle start tracing packet */ 1871 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1872 goto swap_packet; 1873 1874 if (etmq->etm->synth_opts.last_branch && 1875 etmq->etm->synth_opts.instructions && 1876 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1877 u64 addr; 1878 1879 /* Prepare last branches for instruction sample */ 1880 cs_etm__copy_last_branch_rb(etmq, tidq); 1881 1882 /* 1883 * Generate a last branch event for the branches left in the 1884 * circular buffer at the end of the trace. 1885 * 1886 * Use the address of the end of the last reported execution 1887 * range 1888 */ 1889 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1890 1891 err = cs_etm__synth_instruction_sample( 1892 etmq, tidq, addr, 1893 tidq->period_instructions); 1894 if (err) 1895 return err; 1896 1897 tidq->period_instructions = 0; 1898 1899 } 1900 1901 if (etm->synth_opts.branches && 1902 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1903 err = cs_etm__synth_branch_sample(etmq, tidq); 1904 if (err) 1905 return err; 1906 } 1907 1908 swap_packet: 1909 cs_etm__packet_swap(etm, tidq); 1910 1911 /* Reset last branches after flush the trace */ 1912 if (etm->synth_opts.last_branch) 1913 cs_etm__reset_last_branch_rb(tidq); 1914 1915 return err; 1916 } 1917 1918 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1919 struct cs_etm_traceid_queue *tidq) 1920 { 1921 int err; 1922 1923 /* 1924 * It has no new packet coming and 'etmq->packet' contains the stale 1925 * packet which was set at the previous time with packets swapping; 1926 * so skip to generate branch sample to avoid stale packet. 1927 * 1928 * For this case only flush branch stack and generate a last branch 1929 * event for the branches left in the circular buffer at the end of 1930 * the trace. 1931 */ 1932 if (etmq->etm->synth_opts.last_branch && 1933 etmq->etm->synth_opts.instructions && 1934 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1935 u64 addr; 1936 1937 /* Prepare last branches for instruction sample */ 1938 cs_etm__copy_last_branch_rb(etmq, tidq); 1939 1940 /* 1941 * Use the address of the end of the last reported execution 1942 * range. 1943 */ 1944 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1945 1946 err = cs_etm__synth_instruction_sample( 1947 etmq, tidq, addr, 1948 tidq->period_instructions); 1949 if (err) 1950 return err; 1951 1952 tidq->period_instructions = 0; 1953 } 1954 1955 return 0; 1956 } 1957 /* 1958 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1959 * if need be. 1960 * Returns: < 0 if error 1961 * = 0 if no more auxtrace_buffer to read 1962 * > 0 if the current buffer isn't empty yet 1963 */ 1964 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1965 { 1966 int ret; 1967 1968 if (!etmq->buf_len) { 1969 ret = cs_etm__get_trace(etmq); 1970 if (ret <= 0) 1971 return ret; 1972 /* 1973 * We cannot assume consecutive blocks in the data file 1974 * are contiguous, reset the decoder to force re-sync. 1975 */ 1976 ret = cs_etm_decoder__reset(etmq->decoder); 1977 if (ret) 1978 return ret; 1979 } 1980 1981 return etmq->buf_len; 1982 } 1983 1984 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 1985 struct cs_etm_packet *packet, 1986 u64 end_addr) 1987 { 1988 /* Initialise to keep compiler happy */ 1989 u16 instr16 = 0; 1990 u32 instr32 = 0; 1991 u64 addr; 1992 1993 switch (packet->isa) { 1994 case CS_ETM_ISA_T32: 1995 /* 1996 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1997 * 1998 * b'15 b'8 1999 * +-----------------+--------+ 2000 * | 1 1 0 1 1 1 1 1 | imm8 | 2001 * +-----------------+--------+ 2002 * 2003 * According to the specification, it only defines SVC for T32 2004 * with 16 bits instruction and has no definition for 32bits; 2005 * so below only read 2 bytes as instruction size for T32. 2006 */ 2007 addr = end_addr - 2; 2008 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), 2009 (u8 *)&instr16, 0); 2010 if ((instr16 & 0xFF00) == 0xDF00) 2011 return true; 2012 2013 break; 2014 case CS_ETM_ISA_A32: 2015 /* 2016 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 2017 * 2018 * b'31 b'28 b'27 b'24 2019 * +---------+---------+-------------------------+ 2020 * | !1111 | 1 1 1 1 | imm24 | 2021 * +---------+---------+-------------------------+ 2022 */ 2023 addr = end_addr - 4; 2024 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2025 (u8 *)&instr32, 0); 2026 if ((instr32 & 0x0F000000) == 0x0F000000 && 2027 (instr32 & 0xF0000000) != 0xF0000000) 2028 return true; 2029 2030 break; 2031 case CS_ETM_ISA_A64: 2032 /* 2033 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 2034 * 2035 * b'31 b'21 b'4 b'0 2036 * +-----------------------+---------+-----------+ 2037 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 2038 * +-----------------------+---------+-----------+ 2039 */ 2040 addr = end_addr - 4; 2041 cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), 2042 (u8 *)&instr32, 0); 2043 if ((instr32 & 0xFFE0001F) == 0xd4000001) 2044 return true; 2045 2046 break; 2047 case CS_ETM_ISA_UNKNOWN: 2048 default: 2049 break; 2050 } 2051 2052 return false; 2053 } 2054 2055 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 2056 struct cs_etm_traceid_queue *tidq, u64 magic) 2057 { 2058 u8 trace_chan_id = tidq->trace_chan_id; 2059 struct cs_etm_packet *packet = tidq->packet; 2060 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2061 2062 if (magic == __perf_cs_etmv3_magic) 2063 if (packet->exception_number == CS_ETMV3_EXC_SVC) 2064 return true; 2065 2066 /* 2067 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 2068 * HVC cases; need to check if it's SVC instruction based on 2069 * packet address. 2070 */ 2071 if (magic == __perf_cs_etmv4_magic) { 2072 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2073 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2074 prev_packet->end_addr)) 2075 return true; 2076 } 2077 2078 return false; 2079 } 2080 2081 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 2082 u64 magic) 2083 { 2084 struct cs_etm_packet *packet = tidq->packet; 2085 2086 if (magic == __perf_cs_etmv3_magic) 2087 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 2088 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 2089 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 2090 packet->exception_number == CS_ETMV3_EXC_IRQ || 2091 packet->exception_number == CS_ETMV3_EXC_FIQ) 2092 return true; 2093 2094 if (magic == __perf_cs_etmv4_magic) 2095 if (packet->exception_number == CS_ETMV4_EXC_RESET || 2096 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 2097 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 2098 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 2099 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 2100 packet->exception_number == CS_ETMV4_EXC_IRQ || 2101 packet->exception_number == CS_ETMV4_EXC_FIQ) 2102 return true; 2103 2104 return false; 2105 } 2106 2107 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 2108 struct cs_etm_traceid_queue *tidq, 2109 u64 magic) 2110 { 2111 u8 trace_chan_id = tidq->trace_chan_id; 2112 struct cs_etm_packet *packet = tidq->packet; 2113 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2114 2115 if (magic == __perf_cs_etmv3_magic) 2116 if (packet->exception_number == CS_ETMV3_EXC_SMC || 2117 packet->exception_number == CS_ETMV3_EXC_HYP || 2118 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 2119 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 2120 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 2121 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 2122 packet->exception_number == CS_ETMV3_EXC_GENERIC) 2123 return true; 2124 2125 if (magic == __perf_cs_etmv4_magic) { 2126 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 2127 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 2128 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 2129 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 2130 return true; 2131 2132 /* 2133 * For CS_ETMV4_EXC_CALL, except SVC other instructions 2134 * (SMC, HVC) are taken as sync exceptions. 2135 */ 2136 if (packet->exception_number == CS_ETMV4_EXC_CALL && 2137 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 2138 prev_packet->end_addr)) 2139 return true; 2140 2141 /* 2142 * ETMv4 has 5 bits for exception number; if the numbers 2143 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 2144 * they are implementation defined exceptions. 2145 * 2146 * For this case, simply take it as sync exception. 2147 */ 2148 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 2149 packet->exception_number <= CS_ETMV4_EXC_END) 2150 return true; 2151 } 2152 2153 return false; 2154 } 2155 2156 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 2157 struct cs_etm_traceid_queue *tidq) 2158 { 2159 struct cs_etm_packet *packet = tidq->packet; 2160 struct cs_etm_packet *prev_packet = tidq->prev_packet; 2161 u8 trace_chan_id = tidq->trace_chan_id; 2162 u64 magic; 2163 int ret; 2164 2165 switch (packet->sample_type) { 2166 case CS_ETM_RANGE: 2167 /* 2168 * Immediate branch instruction without neither link nor 2169 * return flag, it's normal branch instruction within 2170 * the function. 2171 */ 2172 if (packet->last_instr_type == OCSD_INSTR_BR && 2173 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 2174 packet->flags = PERF_IP_FLAG_BRANCH; 2175 2176 if (packet->last_instr_cond) 2177 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 2178 } 2179 2180 /* 2181 * Immediate branch instruction with link (e.g. BL), this is 2182 * branch instruction for function call. 2183 */ 2184 if (packet->last_instr_type == OCSD_INSTR_BR && 2185 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2186 packet->flags = PERF_IP_FLAG_BRANCH | 2187 PERF_IP_FLAG_CALL; 2188 2189 /* 2190 * Indirect branch instruction with link (e.g. BLR), this is 2191 * branch instruction for function call. 2192 */ 2193 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2194 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 2195 packet->flags = PERF_IP_FLAG_BRANCH | 2196 PERF_IP_FLAG_CALL; 2197 2198 /* 2199 * Indirect branch instruction with subtype of 2200 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 2201 * function return for A32/T32. 2202 */ 2203 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2204 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 2205 packet->flags = PERF_IP_FLAG_BRANCH | 2206 PERF_IP_FLAG_RETURN; 2207 2208 /* 2209 * Indirect branch instruction without link (e.g. BR), usually 2210 * this is used for function return, especially for functions 2211 * within dynamic link lib. 2212 */ 2213 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2214 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 2215 packet->flags = PERF_IP_FLAG_BRANCH | 2216 PERF_IP_FLAG_RETURN; 2217 2218 /* Return instruction for function return. */ 2219 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 2220 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 2221 packet->flags = PERF_IP_FLAG_BRANCH | 2222 PERF_IP_FLAG_RETURN; 2223 2224 /* 2225 * Decoder might insert a discontinuity in the middle of 2226 * instruction packets, fixup prev_packet with flag 2227 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 2228 */ 2229 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 2230 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2231 PERF_IP_FLAG_TRACE_BEGIN; 2232 2233 /* 2234 * If the previous packet is an exception return packet 2235 * and the return address just follows SVC instruction, 2236 * it needs to calibrate the previous packet sample flags 2237 * as PERF_IP_FLAG_SYSCALLRET. 2238 */ 2239 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 2240 PERF_IP_FLAG_RETURN | 2241 PERF_IP_FLAG_INTERRUPT) && 2242 cs_etm__is_svc_instr(etmq, trace_chan_id, 2243 packet, packet->start_addr)) 2244 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2245 PERF_IP_FLAG_RETURN | 2246 PERF_IP_FLAG_SYSCALLRET; 2247 break; 2248 case CS_ETM_DISCONTINUITY: 2249 /* 2250 * The trace is discontinuous, if the previous packet is 2251 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 2252 * for previous packet. 2253 */ 2254 if (prev_packet->sample_type == CS_ETM_RANGE) 2255 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 2256 PERF_IP_FLAG_TRACE_END; 2257 break; 2258 case CS_ETM_EXCEPTION: 2259 ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 2260 if (ret) 2261 return ret; 2262 2263 /* The exception is for system call. */ 2264 if (cs_etm__is_syscall(etmq, tidq, magic)) 2265 packet->flags = PERF_IP_FLAG_BRANCH | 2266 PERF_IP_FLAG_CALL | 2267 PERF_IP_FLAG_SYSCALLRET; 2268 /* 2269 * The exceptions are triggered by external signals from bus, 2270 * interrupt controller, debug module, PE reset or halt. 2271 */ 2272 else if (cs_etm__is_async_exception(tidq, magic)) 2273 packet->flags = PERF_IP_FLAG_BRANCH | 2274 PERF_IP_FLAG_CALL | 2275 PERF_IP_FLAG_ASYNC | 2276 PERF_IP_FLAG_INTERRUPT; 2277 /* 2278 * Otherwise, exception is caused by trap, instruction & 2279 * data fault, or alignment errors. 2280 */ 2281 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 2282 packet->flags = PERF_IP_FLAG_BRANCH | 2283 PERF_IP_FLAG_CALL | 2284 PERF_IP_FLAG_INTERRUPT; 2285 2286 /* 2287 * When the exception packet is inserted, since exception 2288 * packet is not used standalone for generating samples 2289 * and it's affiliation to the previous instruction range 2290 * packet; so set previous range packet flags to tell perf 2291 * it is an exception taken branch. 2292 */ 2293 if (prev_packet->sample_type == CS_ETM_RANGE) 2294 prev_packet->flags = packet->flags; 2295 break; 2296 case CS_ETM_EXCEPTION_RET: 2297 /* 2298 * When the exception return packet is inserted, since 2299 * exception return packet is not used standalone for 2300 * generating samples and it's affiliation to the previous 2301 * instruction range packet; so set previous range packet 2302 * flags to tell perf it is an exception return branch. 2303 * 2304 * The exception return can be for either system call or 2305 * other exception types; unfortunately the packet doesn't 2306 * contain exception type related info so we cannot decide 2307 * the exception type purely based on exception return packet. 2308 * If we record the exception number from exception packet and 2309 * reuse it for exception return packet, this is not reliable 2310 * due the trace can be discontinuity or the interrupt can 2311 * be nested, thus the recorded exception number cannot be 2312 * used for exception return packet for these two cases. 2313 * 2314 * For exception return packet, we only need to distinguish the 2315 * packet is for system call or for other types. Thus the 2316 * decision can be deferred when receive the next packet which 2317 * contains the return address, based on the return address we 2318 * can read out the previous instruction and check if it's a 2319 * system call instruction and then calibrate the sample flag 2320 * as needed. 2321 */ 2322 if (prev_packet->sample_type == CS_ETM_RANGE) 2323 prev_packet->flags = PERF_IP_FLAG_BRANCH | 2324 PERF_IP_FLAG_RETURN | 2325 PERF_IP_FLAG_INTERRUPT; 2326 break; 2327 case CS_ETM_EMPTY: 2328 default: 2329 break; 2330 } 2331 2332 return 0; 2333 } 2334 2335 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 2336 { 2337 int ret = 0; 2338 size_t processed = 0; 2339 2340 /* 2341 * Packets are decoded and added to the decoder's packet queue 2342 * until the decoder packet processing callback has requested that 2343 * processing stops or there is nothing left in the buffer. Normal 2344 * operations that stop processing are a timestamp packet or a full 2345 * decoder buffer queue. 2346 */ 2347 ret = cs_etm_decoder__process_data_block(etmq->decoder, 2348 etmq->offset, 2349 &etmq->buf[etmq->buf_used], 2350 etmq->buf_len, 2351 &processed); 2352 if (ret) 2353 goto out; 2354 2355 etmq->offset += processed; 2356 etmq->buf_used += processed; 2357 etmq->buf_len -= processed; 2358 2359 out: 2360 return ret; 2361 } 2362 2363 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 2364 struct cs_etm_traceid_queue *tidq) 2365 { 2366 int ret; 2367 struct cs_etm_packet_queue *packet_queue; 2368 2369 packet_queue = &tidq->packet_queue; 2370 2371 /* Process each packet in this chunk */ 2372 while (1) { 2373 ret = cs_etm_decoder__get_packet(packet_queue, 2374 tidq->packet); 2375 if (ret <= 0) 2376 /* 2377 * Stop processing this chunk on 2378 * end of data or error 2379 */ 2380 break; 2381 2382 /* 2383 * Since packet addresses are swapped in packet 2384 * handling within below switch() statements, 2385 * thus setting sample flags must be called 2386 * prior to switch() statement to use address 2387 * information before packets swapping. 2388 */ 2389 ret = cs_etm__set_sample_flags(etmq, tidq); 2390 if (ret < 0) 2391 break; 2392 2393 switch (tidq->packet->sample_type) { 2394 case CS_ETM_RANGE: 2395 /* 2396 * If the packet contains an instruction 2397 * range, generate instruction sequence 2398 * events. 2399 */ 2400 cs_etm__sample(etmq, tidq); 2401 break; 2402 case CS_ETM_EXCEPTION: 2403 case CS_ETM_EXCEPTION_RET: 2404 /* 2405 * If the exception packet is coming, 2406 * make sure the previous instruction 2407 * range packet to be handled properly. 2408 */ 2409 cs_etm__exception(tidq); 2410 break; 2411 case CS_ETM_DISCONTINUITY: 2412 /* 2413 * Discontinuity in trace, flush 2414 * previous branch stack 2415 */ 2416 cs_etm__flush(etmq, tidq); 2417 break; 2418 case CS_ETM_EMPTY: 2419 /* 2420 * Should not receive empty packet, 2421 * report error. 2422 */ 2423 pr_err("CS ETM Trace: empty packet\n"); 2424 return -EINVAL; 2425 default: 2426 break; 2427 } 2428 } 2429 2430 return ret; 2431 } 2432 2433 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 2434 { 2435 int idx; 2436 struct int_node *inode; 2437 struct cs_etm_traceid_queue *tidq; 2438 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2439 2440 intlist__for_each_entry(inode, traceid_queues_list) { 2441 idx = (int)(intptr_t)inode->priv; 2442 tidq = etmq->traceid_queues[idx]; 2443 2444 /* Ignore return value */ 2445 cs_etm__process_traceid_queue(etmq, tidq); 2446 2447 /* 2448 * Generate an instruction sample with the remaining 2449 * branchstack entries. 2450 */ 2451 cs_etm__flush(etmq, tidq); 2452 } 2453 } 2454 2455 static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq) 2456 { 2457 int err = 0; 2458 struct cs_etm_traceid_queue *tidq; 2459 2460 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2461 if (!tidq) 2462 return -EINVAL; 2463 2464 /* Go through each buffer in the queue and decode them one by one */ 2465 while (1) { 2466 err = cs_etm__get_data_block(etmq); 2467 if (err <= 0) 2468 return err; 2469 2470 /* Run trace decoder until buffer consumed or end of trace */ 2471 do { 2472 err = cs_etm__decode_data_block(etmq); 2473 if (err) 2474 return err; 2475 2476 /* 2477 * Process each packet in this chunk, nothing to do if 2478 * an error occurs other than hoping the next one will 2479 * be better. 2480 */ 2481 err = cs_etm__process_traceid_queue(etmq, tidq); 2482 2483 } while (etmq->buf_len); 2484 2485 if (err == 0) 2486 /* Flush any remaining branch stack entries */ 2487 err = cs_etm__end_block(etmq, tidq); 2488 } 2489 2490 return err; 2491 } 2492 2493 static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq) 2494 { 2495 int idx, err = 0; 2496 struct cs_etm_traceid_queue *tidq; 2497 struct int_node *inode; 2498 2499 /* Go through each buffer in the queue and decode them one by one */ 2500 while (1) { 2501 err = cs_etm__get_data_block(etmq); 2502 if (err <= 0) 2503 return err; 2504 2505 /* Run trace decoder until buffer consumed or end of trace */ 2506 do { 2507 err = cs_etm__decode_data_block(etmq); 2508 if (err) 2509 return err; 2510 2511 /* 2512 * cs_etm__run_per_thread_timeless_decoder() runs on a 2513 * single traceID queue because each TID has a separate 2514 * buffer. But here in per-cpu mode we need to iterate 2515 * over each channel instead. 2516 */ 2517 intlist__for_each_entry(inode, 2518 etmq->traceid_queues_list) { 2519 idx = (int)(intptr_t)inode->priv; 2520 tidq = etmq->traceid_queues[idx]; 2521 cs_etm__process_traceid_queue(etmq, tidq); 2522 } 2523 } while (etmq->buf_len); 2524 2525 intlist__for_each_entry(inode, etmq->traceid_queues_list) { 2526 idx = (int)(intptr_t)inode->priv; 2527 tidq = etmq->traceid_queues[idx]; 2528 /* Flush any remaining branch stack entries */ 2529 err = cs_etm__end_block(etmq, tidq); 2530 if (err) 2531 return err; 2532 } 2533 } 2534 2535 return err; 2536 } 2537 2538 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2539 pid_t tid) 2540 { 2541 unsigned int i; 2542 struct auxtrace_queues *queues = &etm->queues; 2543 2544 for (i = 0; i < queues->nr_queues; i++) { 2545 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2546 struct cs_etm_queue *etmq = queue->priv; 2547 struct cs_etm_traceid_queue *tidq; 2548 2549 if (!etmq) 2550 continue; 2551 2552 if (etm->per_thread_decoding) { 2553 tidq = cs_etm__etmq_get_traceid_queue( 2554 etmq, CS_ETM_PER_THREAD_TRACEID); 2555 2556 if (!tidq) 2557 continue; 2558 2559 if (tid == -1 || thread__tid(tidq->thread) == tid) 2560 cs_etm__run_per_thread_timeless_decoder(etmq); 2561 } else 2562 cs_etm__run_per_cpu_timeless_decoder(etmq); 2563 } 2564 2565 return 0; 2566 } 2567 2568 static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) 2569 { 2570 int ret = 0; 2571 unsigned int cs_queue_nr, queue_nr, i; 2572 u8 trace_chan_id; 2573 u64 cs_timestamp; 2574 struct auxtrace_queue *queue; 2575 struct cs_etm_queue *etmq; 2576 struct cs_etm_traceid_queue *tidq; 2577 2578 /* 2579 * Pre-populate the heap with one entry from each queue so that we can 2580 * start processing in time order across all queues. 2581 */ 2582 for (i = 0; i < etm->queues.nr_queues; i++) { 2583 etmq = etm->queues.queue_array[i].priv; 2584 if (!etmq) 2585 continue; 2586 2587 ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); 2588 if (ret) 2589 return ret; 2590 } 2591 2592 while (1) { 2593 if (!etm->heap.heap_cnt) 2594 goto out; 2595 2596 /* Take the entry at the top of the min heap */ 2597 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2598 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2599 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2600 queue = &etm->queues.queue_array[queue_nr]; 2601 etmq = queue->priv; 2602 2603 /* 2604 * Remove the top entry from the heap since we are about 2605 * to process it. 2606 */ 2607 auxtrace_heap__pop(&etm->heap); 2608 2609 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2610 if (!tidq) { 2611 /* 2612 * No traceID queue has been allocated for this traceID, 2613 * which means something somewhere went very wrong. No 2614 * other choice than simply exit. 2615 */ 2616 ret = -EINVAL; 2617 goto out; 2618 } 2619 2620 /* 2621 * Packets associated with this timestamp are already in 2622 * the etmq's traceID queue, so process them. 2623 */ 2624 ret = cs_etm__process_traceid_queue(etmq, tidq); 2625 if (ret < 0) 2626 goto out; 2627 2628 /* 2629 * Packets for this timestamp have been processed, time to 2630 * move on to the next timestamp, fetching a new auxtrace_buffer 2631 * if need be. 2632 */ 2633 refetch: 2634 ret = cs_etm__get_data_block(etmq); 2635 if (ret < 0) 2636 goto out; 2637 2638 /* 2639 * No more auxtrace_buffers to process in this etmq, simply 2640 * move on to another entry in the auxtrace_heap. 2641 */ 2642 if (!ret) 2643 continue; 2644 2645 ret = cs_etm__decode_data_block(etmq); 2646 if (ret) 2647 goto out; 2648 2649 cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2650 2651 if (!cs_timestamp) { 2652 /* 2653 * Function cs_etm__decode_data_block() returns when 2654 * there is no more traces to decode in the current 2655 * auxtrace_buffer OR when a timestamp has been 2656 * encountered on any of the traceID queues. Since we 2657 * did not get a timestamp, there is no more traces to 2658 * process in this auxtrace_buffer. As such empty and 2659 * flush all traceID queues. 2660 */ 2661 cs_etm__clear_all_traceid_queues(etmq); 2662 2663 /* Fetch another auxtrace_buffer for this etmq */ 2664 goto refetch; 2665 } 2666 2667 /* 2668 * Add to the min heap the timestamp for packets that have 2669 * just been decoded. They will be processed and synthesized 2670 * during the next call to cs_etm__process_traceid_queue() for 2671 * this queue/traceID. 2672 */ 2673 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2674 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); 2675 } 2676 2677 out: 2678 return ret; 2679 } 2680 2681 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2682 union perf_event *event) 2683 { 2684 struct thread *th; 2685 2686 if (etm->timeless_decoding) 2687 return 0; 2688 2689 /* 2690 * Add the tid/pid to the log so that we can get a match when we get a 2691 * contextID from the decoder. Only track for the host: only kernel 2692 * trace is supported for guests which wouldn't need pids so this should 2693 * be fine. 2694 */ 2695 th = machine__findnew_thread(&etm->session->machines.host, 2696 event->itrace_start.pid, 2697 event->itrace_start.tid); 2698 if (!th) 2699 return -ENOMEM; 2700 2701 thread__put(th); 2702 2703 return 0; 2704 } 2705 2706 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2707 union perf_event *event) 2708 { 2709 struct thread *th; 2710 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2711 2712 /* 2713 * Context switch in per-thread mode are irrelevant since perf 2714 * will start/stop tracing as the process is scheduled. 2715 */ 2716 if (etm->timeless_decoding) 2717 return 0; 2718 2719 /* 2720 * SWITCH_IN events carry the next process to be switched out while 2721 * SWITCH_OUT events carry the process to be switched in. As such 2722 * we don't care about IN events. 2723 */ 2724 if (!out) 2725 return 0; 2726 2727 /* 2728 * Add the tid/pid to the log so that we can get a match when we get a 2729 * contextID from the decoder. Only track for the host: only kernel 2730 * trace is supported for guests which wouldn't need pids so this should 2731 * be fine. 2732 */ 2733 th = machine__findnew_thread(&etm->session->machines.host, 2734 event->context_switch.next_prev_pid, 2735 event->context_switch.next_prev_tid); 2736 if (!th) 2737 return -ENOMEM; 2738 2739 thread__put(th); 2740 2741 return 0; 2742 } 2743 2744 static int cs_etm__process_event(struct perf_session *session, 2745 union perf_event *event, 2746 struct perf_sample *sample, 2747 struct perf_tool *tool) 2748 { 2749 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2750 struct cs_etm_auxtrace, 2751 auxtrace); 2752 2753 if (dump_trace) 2754 return 0; 2755 2756 if (!tool->ordered_events) { 2757 pr_err("CoreSight ETM Trace requires ordered events\n"); 2758 return -EINVAL; 2759 } 2760 2761 switch (event->header.type) { 2762 case PERF_RECORD_EXIT: 2763 /* 2764 * Don't need to wait for cs_etm__flush_events() in per-thread mode to 2765 * start the decode because we know there will be no more trace from 2766 * this thread. All this does is emit samples earlier than waiting for 2767 * the flush in other modes, but with timestamps it makes sense to wait 2768 * for flush so that events from different threads are interleaved 2769 * properly. 2770 */ 2771 if (etm->per_thread_decoding && etm->timeless_decoding) 2772 return cs_etm__process_timeless_queues(etm, 2773 event->fork.tid); 2774 break; 2775 2776 case PERF_RECORD_ITRACE_START: 2777 return cs_etm__process_itrace_start(etm, event); 2778 2779 case PERF_RECORD_SWITCH_CPU_WIDE: 2780 return cs_etm__process_switch_cpu_wide(etm, event); 2781 2782 case PERF_RECORD_AUX: 2783 /* 2784 * Record the latest kernel timestamp available in the header 2785 * for samples so that synthesised samples occur from this point 2786 * onwards. 2787 */ 2788 if (sample->time && (sample->time != (u64)-1)) 2789 etm->latest_kernel_timestamp = sample->time; 2790 break; 2791 2792 default: 2793 break; 2794 } 2795 2796 return 0; 2797 } 2798 2799 static void dump_queued_data(struct cs_etm_auxtrace *etm, 2800 struct perf_record_auxtrace *event) 2801 { 2802 struct auxtrace_buffer *buf; 2803 unsigned int i; 2804 /* 2805 * Find all buffers with same reference in the queues and dump them. 2806 * This is because the queues can contain multiple entries of the same 2807 * buffer that were split on aux records. 2808 */ 2809 for (i = 0; i < etm->queues.nr_queues; ++i) 2810 list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) 2811 if (buf->reference == event->reference) 2812 cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); 2813 } 2814 2815 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2816 union perf_event *event, 2817 struct perf_tool *tool __maybe_unused) 2818 { 2819 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2820 struct cs_etm_auxtrace, 2821 auxtrace); 2822 if (!etm->data_queued) { 2823 struct auxtrace_buffer *buffer; 2824 off_t data_offset; 2825 int fd = perf_data__fd(session->data); 2826 bool is_pipe = perf_data__is_pipe(session->data); 2827 int err; 2828 int idx = event->auxtrace.idx; 2829 2830 if (is_pipe) 2831 data_offset = 0; 2832 else { 2833 data_offset = lseek(fd, 0, SEEK_CUR); 2834 if (data_offset == -1) 2835 return -errno; 2836 } 2837 2838 err = auxtrace_queues__add_event(&etm->queues, session, 2839 event, data_offset, &buffer); 2840 if (err) 2841 return err; 2842 2843 /* 2844 * Knowing if the trace is formatted or not requires a lookup of 2845 * the aux record so only works in non-piped mode where data is 2846 * queued in cs_etm__queue_aux_records(). Always assume 2847 * formatted in piped mode (true). 2848 */ 2849 err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], 2850 idx, true, -1); 2851 if (err) 2852 return err; 2853 2854 if (dump_trace) 2855 if (auxtrace_buffer__get_data(buffer, fd)) { 2856 cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); 2857 auxtrace_buffer__put_data(buffer); 2858 } 2859 } else if (dump_trace) 2860 dump_queued_data(etm, &event->auxtrace); 2861 2862 return 0; 2863 } 2864 2865 static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm) 2866 { 2867 struct evsel *evsel; 2868 struct evlist *evlist = etm->session->evlist; 2869 2870 /* Override timeless mode with user input from --itrace=Z */ 2871 if (etm->synth_opts.timeless_decoding) { 2872 etm->timeless_decoding = true; 2873 return 0; 2874 } 2875 2876 /* 2877 * Find the cs_etm evsel and look at what its timestamp setting was 2878 */ 2879 evlist__for_each_entry(evlist, evsel) 2880 if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) { 2881 etm->timeless_decoding = 2882 !(evsel->core.attr.config & BIT(ETM_OPT_TS)); 2883 return 0; 2884 } 2885 2886 pr_err("CS ETM: Couldn't find ETM evsel\n"); 2887 return -EINVAL; 2888 } 2889 2890 /* 2891 * Read a single cpu parameter block from the auxtrace_info priv block. 2892 * 2893 * For version 1 there is a per cpu nr_params entry. If we are handling 2894 * version 1 file, then there may be less, the same, or more params 2895 * indicated by this value than the compile time number we understand. 2896 * 2897 * For a version 0 info block, there are a fixed number, and we need to 2898 * fill out the nr_param value in the metadata we create. 2899 */ 2900 static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, 2901 int out_blk_size, int nr_params_v0) 2902 { 2903 u64 *metadata = NULL; 2904 int hdr_version; 2905 int nr_in_params, nr_out_params, nr_cmn_params; 2906 int i, k; 2907 2908 metadata = zalloc(sizeof(*metadata) * out_blk_size); 2909 if (!metadata) 2910 return NULL; 2911 2912 /* read block current index & version */ 2913 i = *buff_in_offset; 2914 hdr_version = buff_in[CS_HEADER_VERSION]; 2915 2916 if (!hdr_version) { 2917 /* read version 0 info block into a version 1 metadata block */ 2918 nr_in_params = nr_params_v0; 2919 metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; 2920 metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; 2921 metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; 2922 /* remaining block params at offset +1 from source */ 2923 for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) 2924 metadata[k + 1] = buff_in[i + k]; 2925 /* version 0 has 2 common params */ 2926 nr_cmn_params = 2; 2927 } else { 2928 /* read version 1 info block - input and output nr_params may differ */ 2929 /* version 1 has 3 common params */ 2930 nr_cmn_params = 3; 2931 nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; 2932 2933 /* if input has more params than output - skip excess */ 2934 nr_out_params = nr_in_params + nr_cmn_params; 2935 if (nr_out_params > out_blk_size) 2936 nr_out_params = out_blk_size; 2937 2938 for (k = CS_ETM_MAGIC; k < nr_out_params; k++) 2939 metadata[k] = buff_in[i + k]; 2940 2941 /* record the actual nr params we copied */ 2942 metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; 2943 } 2944 2945 /* adjust in offset by number of in params used */ 2946 i += nr_in_params + nr_cmn_params; 2947 *buff_in_offset = i; 2948 return metadata; 2949 } 2950 2951 /** 2952 * Puts a fragment of an auxtrace buffer into the auxtrace queues based 2953 * on the bounds of aux_event, if it matches with the buffer that's at 2954 * file_offset. 2955 * 2956 * Normally, whole auxtrace buffers would be added to the queue. But we 2957 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder 2958 * is reset across each buffer, so splitting the buffers up in advance has 2959 * the same effect. 2960 */ 2961 static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, 2962 struct perf_record_aux *aux_event, struct perf_sample *sample) 2963 { 2964 int err; 2965 char buf[PERF_SAMPLE_MAX_SIZE]; 2966 union perf_event *auxtrace_event_union; 2967 struct perf_record_auxtrace *auxtrace_event; 2968 union perf_event auxtrace_fragment; 2969 __u64 aux_offset, aux_size; 2970 __u32 idx; 2971 bool formatted; 2972 2973 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2974 struct cs_etm_auxtrace, 2975 auxtrace); 2976 2977 /* 2978 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got 2979 * from looping through the auxtrace index. 2980 */ 2981 err = perf_session__peek_event(session, file_offset, buf, 2982 PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); 2983 if (err) 2984 return err; 2985 auxtrace_event = &auxtrace_event_union->auxtrace; 2986 if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) 2987 return -EINVAL; 2988 2989 if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || 2990 auxtrace_event->header.size != sz) { 2991 return -EINVAL; 2992 } 2993 2994 /* 2995 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See 2996 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a 2997 * CPU as we set this always for the AUX_OUTPUT_HW_ID event. 2998 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. 2999 * Return 'not found' if mismatch. 3000 */ 3001 if (auxtrace_event->cpu == (__u32) -1) { 3002 etm->per_thread_decoding = true; 3003 if (auxtrace_event->tid != sample->tid) 3004 return 1; 3005 } else if (auxtrace_event->cpu != sample->cpu) { 3006 if (etm->per_thread_decoding) { 3007 /* 3008 * Found a per-cpu buffer after a per-thread one was 3009 * already found 3010 */ 3011 pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n"); 3012 return -EINVAL; 3013 } 3014 return 1; 3015 } 3016 3017 if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { 3018 /* 3019 * Clamp size in snapshot mode. The buffer size is clamped in 3020 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect 3021 * the buffer size. 3022 */ 3023 aux_size = min(aux_event->aux_size, auxtrace_event->size); 3024 3025 /* 3026 * In this mode, the head also points to the end of the buffer so aux_offset 3027 * needs to have the size subtracted so it points to the beginning as in normal mode 3028 */ 3029 aux_offset = aux_event->aux_offset - aux_size; 3030 } else { 3031 aux_size = aux_event->aux_size; 3032 aux_offset = aux_event->aux_offset; 3033 } 3034 3035 if (aux_offset >= auxtrace_event->offset && 3036 aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { 3037 /* 3038 * If this AUX event was inside this buffer somewhere, create a new auxtrace event 3039 * based on the sizes of the aux event, and queue that fragment. 3040 */ 3041 auxtrace_fragment.auxtrace = *auxtrace_event; 3042 auxtrace_fragment.auxtrace.size = aux_size; 3043 auxtrace_fragment.auxtrace.offset = aux_offset; 3044 file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; 3045 3046 pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 3047 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); 3048 err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, 3049 file_offset, NULL); 3050 if (err) 3051 return err; 3052 3053 idx = auxtrace_event->idx; 3054 formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); 3055 return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], 3056 idx, formatted, sample->cpu); 3057 } 3058 3059 /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ 3060 return 1; 3061 } 3062 3063 static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, 3064 u64 offset __maybe_unused, void *data __maybe_unused) 3065 { 3066 /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ 3067 if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { 3068 (*(int *)data)++; /* increment found count */ 3069 return cs_etm__process_aux_output_hw_id(session, event); 3070 } 3071 return 0; 3072 } 3073 3074 static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, 3075 u64 offset __maybe_unused, void *data __maybe_unused) 3076 { 3077 struct perf_sample sample; 3078 int ret; 3079 struct auxtrace_index_entry *ent; 3080 struct auxtrace_index *auxtrace_index; 3081 struct evsel *evsel; 3082 size_t i; 3083 3084 /* Don't care about any other events, we're only queuing buffers for AUX events */ 3085 if (event->header.type != PERF_RECORD_AUX) 3086 return 0; 3087 3088 if (event->header.size < sizeof(struct perf_record_aux)) 3089 return -EINVAL; 3090 3091 /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ 3092 if (!event->aux.aux_size) 3093 return 0; 3094 3095 /* 3096 * Parse the sample, we need the sample_id_all data that comes after the event so that the 3097 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. 3098 */ 3099 evsel = evlist__event2evsel(session->evlist, event); 3100 if (!evsel) 3101 return -EINVAL; 3102 ret = evsel__parse_sample(evsel, event, &sample); 3103 if (ret) 3104 return ret; 3105 3106 /* 3107 * Loop through the auxtrace index to find the buffer that matches up with this aux event. 3108 */ 3109 list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { 3110 for (i = 0; i < auxtrace_index->nr; i++) { 3111 ent = &auxtrace_index->entries[i]; 3112 ret = cs_etm__queue_aux_fragment(session, ent->file_offset, 3113 ent->sz, &event->aux, &sample); 3114 /* 3115 * Stop search on error or successful values. Continue search on 3116 * 1 ('not found') 3117 */ 3118 if (ret != 1) 3119 return ret; 3120 } 3121 } 3122 3123 /* 3124 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but 3125 * don't exit with an error because it will still be possible to decode other aux records. 3126 */ 3127 pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 3128 " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); 3129 return 0; 3130 } 3131 3132 static int cs_etm__queue_aux_records(struct perf_session *session) 3133 { 3134 struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, 3135 struct auxtrace_index, list); 3136 if (index && index->nr > 0) 3137 return perf_session__peek_events(session, session->header.data_offset, 3138 session->header.data_size, 3139 cs_etm__queue_aux_records_cb, NULL); 3140 3141 /* 3142 * We would get here if there are no entries in the index (either no auxtrace 3143 * buffers or no index at all). Fail silently as there is the possibility of 3144 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still 3145 * false. 3146 * 3147 * In that scenario, buffers will not be split by AUX records. 3148 */ 3149 return 0; 3150 } 3151 3152 #define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \ 3153 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1)) 3154 3155 /* 3156 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual 3157 * timestamps). 3158 */ 3159 static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu) 3160 { 3161 int j; 3162 3163 for (j = 0; j < num_cpu; j++) { 3164 switch (metadata[j][CS_ETM_MAGIC]) { 3165 case __perf_cs_etmv4_magic: 3166 if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1) 3167 return false; 3168 break; 3169 case __perf_cs_ete_magic: 3170 if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1) 3171 return false; 3172 break; 3173 default: 3174 /* Unknown / unsupported magic number. */ 3175 return false; 3176 } 3177 } 3178 return true; 3179 } 3180 3181 /* map trace ids to correct metadata block, from information in metadata */ 3182 static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) 3183 { 3184 u64 cs_etm_magic; 3185 u8 trace_chan_id; 3186 int i, err; 3187 3188 for (i = 0; i < num_cpu; i++) { 3189 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3190 switch (cs_etm_magic) { 3191 case __perf_cs_etmv3_magic: 3192 metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3193 trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); 3194 break; 3195 case __perf_cs_etmv4_magic: 3196 case __perf_cs_ete_magic: 3197 metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; 3198 trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); 3199 break; 3200 default: 3201 /* unknown magic number */ 3202 return -EINVAL; 3203 } 3204 err = cs_etm__map_trace_id(trace_chan_id, metadata[i]); 3205 if (err) 3206 return err; 3207 } 3208 return 0; 3209 } 3210 3211 /* 3212 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the 3213 * unused value to reduce the number of unneeded decoders created. 3214 */ 3215 static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) 3216 { 3217 u64 cs_etm_magic; 3218 int i; 3219 3220 for (i = 0; i < num_cpu; i++) { 3221 cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3222 switch (cs_etm_magic) { 3223 case __perf_cs_etmv3_magic: 3224 if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) 3225 metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; 3226 break; 3227 case __perf_cs_etmv4_magic: 3228 case __perf_cs_ete_magic: 3229 if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) 3230 metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; 3231 break; 3232 default: 3233 /* unknown magic number */ 3234 return -EINVAL; 3235 } 3236 } 3237 return 0; 3238 } 3239 3240 int cs_etm__process_auxtrace_info_full(union perf_event *event, 3241 struct perf_session *session) 3242 { 3243 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3244 struct cs_etm_auxtrace *etm = NULL; 3245 struct perf_record_time_conv *tc = &session->time_conv; 3246 int event_header_size = sizeof(struct perf_event_header); 3247 int total_size = auxtrace_info->header.size; 3248 int priv_size = 0; 3249 int num_cpu; 3250 int err = 0; 3251 int aux_hw_id_found; 3252 int i, j; 3253 u64 *ptr = NULL; 3254 u64 **metadata = NULL; 3255 3256 /* 3257 * Create an RB tree for traceID-metadata tuple. Since the conversion 3258 * has to be made for each packet that gets decoded, optimizing access 3259 * in anything other than a sequential array is worth doing. 3260 */ 3261 traceid_list = intlist__new(NULL); 3262 if (!traceid_list) 3263 return -ENOMEM; 3264 3265 /* First the global part */ 3266 ptr = (u64 *) auxtrace_info->priv; 3267 num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; 3268 metadata = zalloc(sizeof(*metadata) * num_cpu); 3269 if (!metadata) { 3270 err = -ENOMEM; 3271 goto err_free_traceid_list; 3272 } 3273 3274 /* Start parsing after the common part of the header */ 3275 i = CS_HEADER_VERSION_MAX; 3276 3277 /* 3278 * The metadata is stored in the auxtrace_info section and encodes 3279 * the configuration of the ARM embedded trace macrocell which is 3280 * required by the trace decoder to properly decode the trace due 3281 * to its highly compressed nature. 3282 */ 3283 for (j = 0; j < num_cpu; j++) { 3284 if (ptr[i] == __perf_cs_etmv3_magic) { 3285 metadata[j] = 3286 cs_etm__create_meta_blk(ptr, &i, 3287 CS_ETM_PRIV_MAX, 3288 CS_ETM_NR_TRC_PARAMS_V0); 3289 } else if (ptr[i] == __perf_cs_etmv4_magic) { 3290 metadata[j] = 3291 cs_etm__create_meta_blk(ptr, &i, 3292 CS_ETMV4_PRIV_MAX, 3293 CS_ETMV4_NR_TRC_PARAMS_V0); 3294 } else if (ptr[i] == __perf_cs_ete_magic) { 3295 metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); 3296 } else { 3297 ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", 3298 ptr[i]); 3299 err = -EINVAL; 3300 goto err_free_metadata; 3301 } 3302 3303 if (!metadata[j]) { 3304 err = -ENOMEM; 3305 goto err_free_metadata; 3306 } 3307 } 3308 3309 /* 3310 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and 3311 * CS_ETMV4_PRIV_MAX mark how many double words are in the 3312 * global metadata, and each cpu's metadata respectively. 3313 * The following tests if the correct number of double words was 3314 * present in the auxtrace info section. 3315 */ 3316 priv_size = total_size - event_header_size - INFO_HEADER_SIZE; 3317 if (i * 8 != priv_size) { 3318 err = -EINVAL; 3319 goto err_free_metadata; 3320 } 3321 3322 etm = zalloc(sizeof(*etm)); 3323 3324 if (!etm) { 3325 err = -ENOMEM; 3326 goto err_free_metadata; 3327 } 3328 3329 /* 3330 * As all the ETMs run at the same exception level, the system should 3331 * have the same PID format crossing CPUs. So cache the PID format 3332 * and reuse it for sequential decoding. 3333 */ 3334 etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]); 3335 3336 err = auxtrace_queues__init(&etm->queues); 3337 if (err) 3338 goto err_free_etm; 3339 3340 if (session->itrace_synth_opts->set) { 3341 etm->synth_opts = *session->itrace_synth_opts; 3342 } else { 3343 itrace_synth_opts__set_default(&etm->synth_opts, 3344 session->itrace_synth_opts->default_no_sample); 3345 etm->synth_opts.callchain = false; 3346 } 3347 3348 etm->session = session; 3349 3350 etm->num_cpu = num_cpu; 3351 etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); 3352 etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); 3353 etm->metadata = metadata; 3354 etm->auxtrace_type = auxtrace_info->type; 3355 3356 if (etm->synth_opts.use_timestamp) 3357 /* 3358 * Prior to Armv8.4, Arm CPUs don't support FEAT_TRF feature, 3359 * therefore the decoder cannot know if the timestamp trace is 3360 * same with the kernel time. 3361 * 3362 * If a user has knowledge for the working platform and can 3363 * specify itrace option 'T' to tell decoder to forcely use the 3364 * traced timestamp as the kernel time. 3365 */ 3366 etm->has_virtual_ts = true; 3367 else 3368 /* Use virtual timestamps if all ETMs report ts_source = 1 */ 3369 etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu); 3370 3371 if (!etm->has_virtual_ts) 3372 ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n" 3373 "The time field of the samples will not be set accurately.\n" 3374 "For Arm CPUs prior to Armv8.4 or without support FEAT_TRF,\n" 3375 "you can specify the itrace option 'T' for timestamp decoding\n" 3376 "if the Coresight timestamp on the platform is same with the kernel time.\n\n"); 3377 3378 etm->auxtrace.process_event = cs_etm__process_event; 3379 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 3380 etm->auxtrace.flush_events = cs_etm__flush_events; 3381 etm->auxtrace.free_events = cs_etm__free_events; 3382 etm->auxtrace.free = cs_etm__free; 3383 etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace; 3384 session->auxtrace = &etm->auxtrace; 3385 3386 err = cs_etm__setup_timeless_decoding(etm); 3387 if (err) 3388 return err; 3389 3390 etm->tc.time_shift = tc->time_shift; 3391 etm->tc.time_mult = tc->time_mult; 3392 etm->tc.time_zero = tc->time_zero; 3393 if (event_contains(*tc, time_cycles)) { 3394 etm->tc.time_cycles = tc->time_cycles; 3395 etm->tc.time_mask = tc->time_mask; 3396 etm->tc.cap_user_time_zero = tc->cap_user_time_zero; 3397 etm->tc.cap_user_time_short = tc->cap_user_time_short; 3398 } 3399 err = cs_etm__synth_events(etm, session); 3400 if (err) 3401 goto err_free_queues; 3402 3403 /* 3404 * Map Trace ID values to CPU metadata. 3405 * 3406 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the 3407 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata 3408 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. 3409 * 3410 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3411 * the same IDs as the old algorithm as far as is possible, unless there are clashes 3412 * in which case a different value will be used. This means an older perf may still 3413 * be able to record and read files generate on a newer system. 3414 * 3415 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3416 * those packets. If they are there then the values will be mapped and plugged into 3417 * the metadata. We then set any remaining metadata values with the used flag to a 3418 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. 3419 * 3420 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3421 * then we map Trace ID values to CPU directly from the metadata - clearing any unused 3422 * flags if present. 3423 */ 3424 3425 /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ 3426 aux_hw_id_found = 0; 3427 err = perf_session__peek_events(session, session->header.data_offset, 3428 session->header.data_size, 3429 cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); 3430 if (err) 3431 goto err_free_queues; 3432 3433 /* if HW ID found then clear any unused metadata ID values */ 3434 if (aux_hw_id_found) 3435 err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); 3436 /* otherwise, this is a file with metadata values only, map from metadata */ 3437 else 3438 err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); 3439 3440 if (err) 3441 goto err_free_queues; 3442 3443 err = cs_etm__queue_aux_records(session); 3444 if (err) 3445 goto err_free_queues; 3446 3447 etm->data_queued = etm->queues.populated; 3448 return 0; 3449 3450 err_free_queues: 3451 auxtrace_queues__free(&etm->queues); 3452 session->auxtrace = NULL; 3453 err_free_etm: 3454 zfree(&etm); 3455 err_free_metadata: 3456 /* No need to check @metadata[j], free(NULL) is supported */ 3457 for (j = 0; j < num_cpu; j++) 3458 zfree(&metadata[j]); 3459 zfree(&metadata); 3460 err_free_traceid_list: 3461 intlist__delete(traceid_list); 3462 return err; 3463 } 3464