1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/err.h> 11 #include <linux/kernel.h> 12 #include <linux/log2.h> 13 #include <linux/types.h> 14 #include <linux/zalloc.h> 15 16 #include <opencsd/ocsd_if_types.h> 17 #include <stdlib.h> 18 19 #include "auxtrace.h" 20 #include "color.h" 21 #include "cs-etm.h" 22 #include "cs-etm-decoder/cs-etm-decoder.h" 23 #include "debug.h" 24 #include "evlist.h" 25 #include "intlist.h" 26 #include "machine.h" 27 #include "map.h" 28 #include "perf.h" 29 #include "symbol.h" 30 #include "thread.h" 31 #include "thread_map.h" 32 #include "thread-stack.h" 33 #include <tools/libc_compat.h> 34 #include "util.h" 35 36 #define MAX_TIMESTAMP (~0ULL) 37 38 struct cs_etm_auxtrace { 39 struct auxtrace auxtrace; 40 struct auxtrace_queues queues; 41 struct auxtrace_heap heap; 42 struct itrace_synth_opts synth_opts; 43 struct perf_session *session; 44 struct machine *machine; 45 struct thread *unknown_thread; 46 47 u8 timeless_decoding; 48 u8 snapshot_mode; 49 u8 data_queued; 50 u8 sample_branches; 51 u8 sample_instructions; 52 53 int num_cpu; 54 u32 auxtrace_type; 55 u64 branches_sample_type; 56 u64 branches_id; 57 u64 instructions_sample_type; 58 u64 instructions_sample_period; 59 u64 instructions_id; 60 u64 **metadata; 61 u64 kernel_start; 62 unsigned int pmu_type; 63 }; 64 65 struct cs_etm_traceid_queue { 66 u8 trace_chan_id; 67 pid_t pid, tid; 68 u64 period_instructions; 69 size_t last_branch_pos; 70 union perf_event *event_buf; 71 struct thread *thread; 72 struct branch_stack *last_branch; 73 struct branch_stack *last_branch_rb; 74 struct cs_etm_packet *prev_packet; 75 struct cs_etm_packet *packet; 76 struct cs_etm_packet_queue packet_queue; 77 }; 78 79 struct cs_etm_queue { 80 struct cs_etm_auxtrace *etm; 81 struct cs_etm_decoder *decoder; 82 struct auxtrace_buffer *buffer; 83 unsigned int queue_nr; 84 u8 pending_timestamp; 85 u64 offset; 86 const unsigned char *buf; 87 size_t buf_len, buf_used; 88 /* Conversion between traceID and index in traceid_queues array */ 89 struct intlist *traceid_queues_list; 90 struct cs_etm_traceid_queue **traceid_queues; 91 }; 92 93 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 94 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); 95 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 96 pid_t tid); 97 static int cs_etm__get_data_block(struct cs_etm_queue *etmq); 98 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq); 99 100 /* PTMs ETMIDR [11:8] set to b0011 */ 101 #define ETMIDR_PTM_VERSION 0x00000300 102 103 /* 104 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to 105 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply 106 * encode the etm queue number as the upper 16 bit and the channel as 107 * the lower 16 bit. 108 */ 109 #define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \ 110 (queue_nr << 16 | trace_chan_id) 111 #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) 112 #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) 113 114 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 115 { 116 etmidr &= ETMIDR_PTM_VERSION; 117 118 if (etmidr == ETMIDR_PTM_VERSION) 119 return CS_ETM_PROTO_PTM; 120 121 return CS_ETM_PROTO_ETMV3; 122 } 123 124 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 125 { 126 struct int_node *inode; 127 u64 *metadata; 128 129 inode = intlist__find(traceid_list, trace_chan_id); 130 if (!inode) 131 return -EINVAL; 132 133 metadata = inode->priv; 134 *magic = metadata[CS_ETM_MAGIC]; 135 return 0; 136 } 137 138 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 139 { 140 struct int_node *inode; 141 u64 *metadata; 142 143 inode = intlist__find(traceid_list, trace_chan_id); 144 if (!inode) 145 return -EINVAL; 146 147 metadata = inode->priv; 148 *cpu = (int)metadata[CS_ETM_CPU]; 149 return 0; 150 } 151 152 void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, 153 u8 trace_chan_id) 154 { 155 /* 156 * Wnen a timestamp packet is encountered the backend code 157 * is stopped so that the front end has time to process packets 158 * that were accumulated in the traceID queue. Since there can 159 * be more than one channel per cs_etm_queue, we need to specify 160 * what traceID queue needs servicing. 161 */ 162 etmq->pending_timestamp = trace_chan_id; 163 } 164 165 static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, 166 u8 *trace_chan_id) 167 { 168 struct cs_etm_packet_queue *packet_queue; 169 170 if (!etmq->pending_timestamp) 171 return 0; 172 173 if (trace_chan_id) 174 *trace_chan_id = etmq->pending_timestamp; 175 176 packet_queue = cs_etm__etmq_get_packet_queue(etmq, 177 etmq->pending_timestamp); 178 if (!packet_queue) 179 return 0; 180 181 /* Acknowledge pending status */ 182 etmq->pending_timestamp = 0; 183 184 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ 185 return packet_queue->timestamp; 186 } 187 188 static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) 189 { 190 int i; 191 192 queue->head = 0; 193 queue->tail = 0; 194 queue->packet_count = 0; 195 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) { 196 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 197 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 198 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 199 queue->packet_buffer[i].instr_count = 0; 200 queue->packet_buffer[i].last_instr_taken_branch = false; 201 queue->packet_buffer[i].last_instr_size = 0; 202 queue->packet_buffer[i].last_instr_type = 0; 203 queue->packet_buffer[i].last_instr_subtype = 0; 204 queue->packet_buffer[i].last_instr_cond = 0; 205 queue->packet_buffer[i].flags = 0; 206 queue->packet_buffer[i].exception_number = UINT32_MAX; 207 queue->packet_buffer[i].trace_chan_id = UINT8_MAX; 208 queue->packet_buffer[i].cpu = INT_MIN; 209 } 210 } 211 212 static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq) 213 { 214 int idx; 215 struct int_node *inode; 216 struct cs_etm_traceid_queue *tidq; 217 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 218 219 intlist__for_each_entry(inode, traceid_queues_list) { 220 idx = (int)(intptr_t)inode->priv; 221 tidq = etmq->traceid_queues[idx]; 222 cs_etm__clear_packet_queue(&tidq->packet_queue); 223 } 224 } 225 226 static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, 227 struct cs_etm_traceid_queue *tidq, 228 u8 trace_chan_id) 229 { 230 int rc = -ENOMEM; 231 struct auxtrace_queue *queue; 232 struct cs_etm_auxtrace *etm = etmq->etm; 233 234 cs_etm__clear_packet_queue(&tidq->packet_queue); 235 236 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 237 tidq->tid = queue->tid; 238 tidq->pid = -1; 239 tidq->trace_chan_id = trace_chan_id; 240 241 tidq->packet = zalloc(sizeof(struct cs_etm_packet)); 242 if (!tidq->packet) 243 goto out; 244 245 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet)); 246 if (!tidq->prev_packet) 247 goto out_free; 248 249 if (etm->synth_opts.last_branch) { 250 size_t sz = sizeof(struct branch_stack); 251 252 sz += etm->synth_opts.last_branch_sz * 253 sizeof(struct branch_entry); 254 tidq->last_branch = zalloc(sz); 255 if (!tidq->last_branch) 256 goto out_free; 257 tidq->last_branch_rb = zalloc(sz); 258 if (!tidq->last_branch_rb) 259 goto out_free; 260 } 261 262 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 263 if (!tidq->event_buf) 264 goto out_free; 265 266 return 0; 267 268 out_free: 269 zfree(&tidq->last_branch_rb); 270 zfree(&tidq->last_branch); 271 zfree(&tidq->prev_packet); 272 zfree(&tidq->packet); 273 out: 274 return rc; 275 } 276 277 static struct cs_etm_traceid_queue 278 *cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 279 { 280 int idx; 281 struct int_node *inode; 282 struct intlist *traceid_queues_list; 283 struct cs_etm_traceid_queue *tidq, **traceid_queues; 284 struct cs_etm_auxtrace *etm = etmq->etm; 285 286 if (etm->timeless_decoding) 287 trace_chan_id = CS_ETM_PER_THREAD_TRACEID; 288 289 traceid_queues_list = etmq->traceid_queues_list; 290 291 /* 292 * Check if the traceid_queue exist for this traceID by looking 293 * in the queue list. 294 */ 295 inode = intlist__find(traceid_queues_list, trace_chan_id); 296 if (inode) { 297 idx = (int)(intptr_t)inode->priv; 298 return etmq->traceid_queues[idx]; 299 } 300 301 /* We couldn't find a traceid_queue for this traceID, allocate one */ 302 tidq = malloc(sizeof(*tidq)); 303 if (!tidq) 304 return NULL; 305 306 memset(tidq, 0, sizeof(*tidq)); 307 308 /* Get a valid index for the new traceid_queue */ 309 idx = intlist__nr_entries(traceid_queues_list); 310 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */ 311 inode = intlist__findnew(traceid_queues_list, trace_chan_id); 312 if (!inode) 313 goto out_free; 314 315 /* Associate this traceID with this index */ 316 inode->priv = (void *)(intptr_t)idx; 317 318 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) 319 goto out_free; 320 321 /* Grow the traceid_queues array by one unit */ 322 traceid_queues = etmq->traceid_queues; 323 traceid_queues = reallocarray(traceid_queues, 324 idx + 1, 325 sizeof(*traceid_queues)); 326 327 /* 328 * On failure reallocarray() returns NULL and the original block of 329 * memory is left untouched. 330 */ 331 if (!traceid_queues) 332 goto out_free; 333 334 traceid_queues[idx] = tidq; 335 etmq->traceid_queues = traceid_queues; 336 337 return etmq->traceid_queues[idx]; 338 339 out_free: 340 /* 341 * Function intlist__remove() removes the inode from the list 342 * and delete the memory associated to it. 343 */ 344 intlist__remove(traceid_queues_list, inode); 345 free(tidq); 346 347 return NULL; 348 } 349 350 struct cs_etm_packet_queue 351 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id) 352 { 353 struct cs_etm_traceid_queue *tidq; 354 355 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 356 if (tidq) 357 return &tidq->packet_queue; 358 359 return NULL; 360 } 361 362 static void cs_etm__packet_dump(const char *pkt_string) 363 { 364 const char *color = PERF_COLOR_BLUE; 365 int len = strlen(pkt_string); 366 367 if (len && (pkt_string[len-1] == '\n')) 368 color_fprintf(stdout, color, " %s", pkt_string); 369 else 370 color_fprintf(stdout, color, " %s\n", pkt_string); 371 372 fflush(stdout); 373 } 374 375 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 376 struct cs_etm_auxtrace *etm, int idx, 377 u32 etmidr) 378 { 379 u64 **metadata = etm->metadata; 380 381 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 382 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; 383 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; 384 } 385 386 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 387 struct cs_etm_auxtrace *etm, int idx) 388 { 389 u64 **metadata = etm->metadata; 390 391 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; 392 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; 393 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; 394 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; 395 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; 396 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; 397 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; 398 } 399 400 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 401 struct cs_etm_auxtrace *etm) 402 { 403 int i; 404 u32 etmidr; 405 u64 architecture; 406 407 for (i = 0; i < etm->num_cpu; i++) { 408 architecture = etm->metadata[i][CS_ETM_MAGIC]; 409 410 switch (architecture) { 411 case __perf_cs_etmv3_magic: 412 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; 413 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); 414 break; 415 case __perf_cs_etmv4_magic: 416 cs_etm__set_trace_param_etmv4(t_params, etm, i); 417 break; 418 default: 419 return -EINVAL; 420 } 421 } 422 423 return 0; 424 } 425 426 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 427 struct cs_etm_queue *etmq, 428 enum cs_etm_decoder_operation mode) 429 { 430 int ret = -EINVAL; 431 432 if (!(mode < CS_ETM_OPERATION_MAX)) 433 goto out; 434 435 d_params->packet_printer = cs_etm__packet_dump; 436 d_params->operation = mode; 437 d_params->data = etmq; 438 d_params->formatted = true; 439 d_params->fsyncs = false; 440 d_params->hsyncs = false; 441 d_params->frame_aligned = true; 442 443 ret = 0; 444 out: 445 return ret; 446 } 447 448 static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 449 struct auxtrace_buffer *buffer) 450 { 451 int ret; 452 const char *color = PERF_COLOR_BLUE; 453 struct cs_etm_decoder_params d_params; 454 struct cs_etm_trace_params *t_params; 455 struct cs_etm_decoder *decoder; 456 size_t buffer_used = 0; 457 458 fprintf(stdout, "\n"); 459 color_fprintf(stdout, color, 460 ". ... CoreSight ETM Trace data: size %zu bytes\n", 461 buffer->size); 462 463 /* Use metadata to fill in trace parameters for trace decoder */ 464 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 465 466 if (!t_params) 467 return; 468 469 if (cs_etm__init_trace_params(t_params, etm)) 470 goto out_free; 471 472 /* Set decoder parameters to simply print the trace packets */ 473 if (cs_etm__init_decoder_params(&d_params, NULL, 474 CS_ETM_OPERATION_PRINT)) 475 goto out_free; 476 477 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 478 479 if (!decoder) 480 goto out_free; 481 do { 482 size_t consumed; 483 484 ret = cs_etm_decoder__process_data_block( 485 decoder, buffer->offset, 486 &((u8 *)buffer->data)[buffer_used], 487 buffer->size - buffer_used, &consumed); 488 if (ret) 489 break; 490 491 buffer_used += consumed; 492 } while (buffer_used < buffer->size); 493 494 cs_etm_decoder__free(decoder); 495 496 out_free: 497 zfree(&t_params); 498 } 499 500 static int cs_etm__flush_events(struct perf_session *session, 501 struct perf_tool *tool) 502 { 503 int ret; 504 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 505 struct cs_etm_auxtrace, 506 auxtrace); 507 if (dump_trace) 508 return 0; 509 510 if (!tool->ordered_events) 511 return -EINVAL; 512 513 ret = cs_etm__update_queues(etm); 514 515 if (ret < 0) 516 return ret; 517 518 if (etm->timeless_decoding) 519 return cs_etm__process_timeless_queues(etm, -1); 520 521 return cs_etm__process_queues(etm); 522 } 523 524 static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) 525 { 526 int idx; 527 uintptr_t priv; 528 struct int_node *inode, *tmp; 529 struct cs_etm_traceid_queue *tidq; 530 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 531 532 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) { 533 priv = (uintptr_t)inode->priv; 534 idx = priv; 535 536 /* Free this traceid_queue from the array */ 537 tidq = etmq->traceid_queues[idx]; 538 thread__zput(tidq->thread); 539 zfree(&tidq->event_buf); 540 zfree(&tidq->last_branch); 541 zfree(&tidq->last_branch_rb); 542 zfree(&tidq->prev_packet); 543 zfree(&tidq->packet); 544 zfree(&tidq); 545 546 /* 547 * Function intlist__remove() removes the inode from the list 548 * and delete the memory associated to it. 549 */ 550 intlist__remove(traceid_queues_list, inode); 551 } 552 553 /* Then the RB tree itself */ 554 intlist__delete(traceid_queues_list); 555 etmq->traceid_queues_list = NULL; 556 557 /* finally free the traceid_queues array */ 558 zfree(&etmq->traceid_queues); 559 } 560 561 static void cs_etm__free_queue(void *priv) 562 { 563 struct cs_etm_queue *etmq = priv; 564 565 if (!etmq) 566 return; 567 568 cs_etm_decoder__free(etmq->decoder); 569 cs_etm__free_traceid_queues(etmq); 570 free(etmq); 571 } 572 573 static void cs_etm__free_events(struct perf_session *session) 574 { 575 unsigned int i; 576 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 577 struct cs_etm_auxtrace, 578 auxtrace); 579 struct auxtrace_queues *queues = &aux->queues; 580 581 for (i = 0; i < queues->nr_queues; i++) { 582 cs_etm__free_queue(queues->queue_array[i].priv); 583 queues->queue_array[i].priv = NULL; 584 } 585 586 auxtrace_queues__free(queues); 587 } 588 589 static void cs_etm__free(struct perf_session *session) 590 { 591 int i; 592 struct int_node *inode, *tmp; 593 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 594 struct cs_etm_auxtrace, 595 auxtrace); 596 cs_etm__free_events(session); 597 session->auxtrace = NULL; 598 599 /* First remove all traceID/metadata nodes for the RB tree */ 600 intlist__for_each_entry_safe(inode, tmp, traceid_list) 601 intlist__remove(traceid_list, inode); 602 /* Then the RB tree itself */ 603 intlist__delete(traceid_list); 604 605 for (i = 0; i < aux->num_cpu; i++) 606 zfree(&aux->metadata[i]); 607 608 thread__zput(aux->unknown_thread); 609 zfree(&aux->metadata); 610 zfree(&aux); 611 } 612 613 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) 614 { 615 struct machine *machine; 616 617 machine = etmq->etm->machine; 618 619 if (address >= etmq->etm->kernel_start) { 620 if (machine__is_host(machine)) 621 return PERF_RECORD_MISC_KERNEL; 622 else 623 return PERF_RECORD_MISC_GUEST_KERNEL; 624 } else { 625 if (machine__is_host(machine)) 626 return PERF_RECORD_MISC_USER; 627 else if (perf_guest) 628 return PERF_RECORD_MISC_GUEST_USER; 629 else 630 return PERF_RECORD_MISC_HYPERVISOR; 631 } 632 } 633 634 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, 635 u64 address, size_t size, u8 *buffer) 636 { 637 u8 cpumode; 638 u64 offset; 639 int len; 640 struct thread *thread; 641 struct machine *machine; 642 struct addr_location al; 643 struct cs_etm_traceid_queue *tidq; 644 645 if (!etmq) 646 return 0; 647 648 machine = etmq->etm->machine; 649 cpumode = cs_etm__cpu_mode(etmq, address); 650 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 651 if (!tidq) 652 return 0; 653 654 thread = tidq->thread; 655 if (!thread) { 656 if (cpumode != PERF_RECORD_MISC_KERNEL) 657 return 0; 658 thread = etmq->etm->unknown_thread; 659 } 660 661 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) 662 return 0; 663 664 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 665 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) 666 return 0; 667 668 offset = al.map->map_ip(al.map, address); 669 670 map__load(al.map); 671 672 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); 673 674 if (len <= 0) 675 return 0; 676 677 return len; 678 } 679 680 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) 681 { 682 struct cs_etm_decoder_params d_params; 683 struct cs_etm_trace_params *t_params = NULL; 684 struct cs_etm_queue *etmq; 685 686 etmq = zalloc(sizeof(*etmq)); 687 if (!etmq) 688 return NULL; 689 690 etmq->traceid_queues_list = intlist__new(NULL); 691 if (!etmq->traceid_queues_list) 692 goto out_free; 693 694 /* Use metadata to fill in trace parameters for trace decoder */ 695 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 696 697 if (!t_params) 698 goto out_free; 699 700 if (cs_etm__init_trace_params(t_params, etm)) 701 goto out_free; 702 703 /* Set decoder parameters to decode trace packets */ 704 if (cs_etm__init_decoder_params(&d_params, etmq, 705 CS_ETM_OPERATION_DECODE)) 706 goto out_free; 707 708 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 709 710 if (!etmq->decoder) 711 goto out_free; 712 713 /* 714 * Register a function to handle all memory accesses required by 715 * the trace decoder library. 716 */ 717 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 718 0x0L, ((u64) -1L), 719 cs_etm__mem_access)) 720 goto out_free_decoder; 721 722 zfree(&t_params); 723 return etmq; 724 725 out_free_decoder: 726 cs_etm_decoder__free(etmq->decoder); 727 out_free: 728 intlist__delete(etmq->traceid_queues_list); 729 free(etmq); 730 731 return NULL; 732 } 733 734 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 735 struct auxtrace_queue *queue, 736 unsigned int queue_nr) 737 { 738 int ret = 0; 739 unsigned int cs_queue_nr; 740 u8 trace_chan_id; 741 u64 timestamp; 742 struct cs_etm_queue *etmq = queue->priv; 743 744 if (list_empty(&queue->head) || etmq) 745 goto out; 746 747 etmq = cs_etm__alloc_queue(etm); 748 749 if (!etmq) { 750 ret = -ENOMEM; 751 goto out; 752 } 753 754 queue->priv = etmq; 755 etmq->etm = etm; 756 etmq->queue_nr = queue_nr; 757 etmq->offset = 0; 758 759 if (etm->timeless_decoding) 760 goto out; 761 762 /* 763 * We are under a CPU-wide trace scenario. As such we need to know 764 * when the code that generated the traces started to execute so that 765 * it can be correlated with execution on other CPUs. So we get a 766 * handle on the beginning of traces and decode until we find a 767 * timestamp. The timestamp is then added to the auxtrace min heap 768 * in order to know what nibble (of all the etmqs) to decode first. 769 */ 770 while (1) { 771 /* 772 * Fetch an aux_buffer from this etmq. Bail if no more 773 * blocks or an error has been encountered. 774 */ 775 ret = cs_etm__get_data_block(etmq); 776 if (ret <= 0) 777 goto out; 778 779 /* 780 * Run decoder on the trace block. The decoder will stop when 781 * encountering a timestamp, a full packet queue or the end of 782 * trace for that block. 783 */ 784 ret = cs_etm__decode_data_block(etmq); 785 if (ret) 786 goto out; 787 788 /* 789 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all 790 * the timestamp calculation for us. 791 */ 792 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 793 794 /* We found a timestamp, no need to continue. */ 795 if (timestamp) 796 break; 797 798 /* 799 * We didn't find a timestamp so empty all the traceid packet 800 * queues before looking for another timestamp packet, either 801 * in the current data block or a new one. Packets that were 802 * just decoded are useless since no timestamp has been 803 * associated with them. As such simply discard them. 804 */ 805 cs_etm__clear_all_packet_queues(etmq); 806 } 807 808 /* 809 * We have a timestamp. Add it to the min heap to reflect when 810 * instructions conveyed by the range packets of this traceID queue 811 * started to execute. Once the same has been done for all the traceID 812 * queues of each etmq, redenring and decoding can start in 813 * chronological order. 814 * 815 * Note that packets decoded above are still in the traceID's packet 816 * queue and will be processed in cs_etm__process_queues(). 817 */ 818 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan); 819 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 820 out: 821 return ret; 822 } 823 824 static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 825 { 826 unsigned int i; 827 int ret; 828 829 if (!etm->kernel_start) 830 etm->kernel_start = machine__kernel_start(etm->machine); 831 832 for (i = 0; i < etm->queues.nr_queues; i++) { 833 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 834 if (ret) 835 return ret; 836 } 837 838 return 0; 839 } 840 841 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) 842 { 843 if (etm->queues.new_data) { 844 etm->queues.new_data = false; 845 return cs_etm__setup_queues(etm); 846 } 847 848 return 0; 849 } 850 851 static inline 852 void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, 853 struct cs_etm_traceid_queue *tidq) 854 { 855 struct branch_stack *bs_src = tidq->last_branch_rb; 856 struct branch_stack *bs_dst = tidq->last_branch; 857 size_t nr = 0; 858 859 /* 860 * Set the number of records before early exit: ->nr is used to 861 * determine how many branches to copy from ->entries. 862 */ 863 bs_dst->nr = bs_src->nr; 864 865 /* 866 * Early exit when there is nothing to copy. 867 */ 868 if (!bs_src->nr) 869 return; 870 871 /* 872 * As bs_src->entries is a circular buffer, we need to copy from it in 873 * two steps. First, copy the branches from the most recently inserted 874 * branch ->last_branch_pos until the end of bs_src->entries buffer. 875 */ 876 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; 877 memcpy(&bs_dst->entries[0], 878 &bs_src->entries[tidq->last_branch_pos], 879 sizeof(struct branch_entry) * nr); 880 881 /* 882 * If we wrapped around at least once, the branches from the beginning 883 * of the bs_src->entries buffer and until the ->last_branch_pos element 884 * are older valid branches: copy them over. The total number of 885 * branches copied over will be equal to the number of branches asked by 886 * the user in last_branch_sz. 887 */ 888 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 889 memcpy(&bs_dst->entries[nr], 890 &bs_src->entries[0], 891 sizeof(struct branch_entry) * tidq->last_branch_pos); 892 } 893 } 894 895 static inline 896 void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) 897 { 898 tidq->last_branch_pos = 0; 899 tidq->last_branch_rb->nr = 0; 900 } 901 902 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 903 u8 trace_chan_id, u64 addr) 904 { 905 u8 instrBytes[2]; 906 907 cs_etm__mem_access(etmq, trace_chan_id, addr, 908 ARRAY_SIZE(instrBytes), instrBytes); 909 /* 910 * T32 instruction size is indicated by bits[15:11] of the first 911 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 912 * denote a 32-bit instruction. 913 */ 914 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 915 } 916 917 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 918 { 919 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 920 if (packet->sample_type == CS_ETM_DISCONTINUITY) 921 return 0; 922 923 return packet->start_addr; 924 } 925 926 static inline 927 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 928 { 929 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 930 if (packet->sample_type == CS_ETM_DISCONTINUITY) 931 return 0; 932 933 return packet->end_addr - packet->last_instr_size; 934 } 935 936 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 937 u64 trace_chan_id, 938 const struct cs_etm_packet *packet, 939 u64 offset) 940 { 941 if (packet->isa == CS_ETM_ISA_T32) { 942 u64 addr = packet->start_addr; 943 944 while (offset > 0) { 945 addr += cs_etm__t32_instr_size(etmq, 946 trace_chan_id, addr); 947 offset--; 948 } 949 return addr; 950 } 951 952 /* Assume a 4 byte instruction size (A32/A64) */ 953 return packet->start_addr + offset * 4; 954 } 955 956 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, 957 struct cs_etm_traceid_queue *tidq) 958 { 959 struct branch_stack *bs = tidq->last_branch_rb; 960 struct branch_entry *be; 961 962 /* 963 * The branches are recorded in a circular buffer in reverse 964 * chronological order: we start recording from the last element of the 965 * buffer down. After writing the first element of the stack, move the 966 * insert position back to the end of the buffer. 967 */ 968 if (!tidq->last_branch_pos) 969 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 970 971 tidq->last_branch_pos -= 1; 972 973 be = &bs->entries[tidq->last_branch_pos]; 974 be->from = cs_etm__last_executed_instr(tidq->prev_packet); 975 be->to = cs_etm__first_executed_instr(tidq->packet); 976 /* No support for mispredict */ 977 be->flags.mispred = 0; 978 be->flags.predicted = 1; 979 980 /* 981 * Increment bs->nr until reaching the number of last branches asked by 982 * the user on the command line. 983 */ 984 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 985 bs->nr += 1; 986 } 987 988 static int cs_etm__inject_event(union perf_event *event, 989 struct perf_sample *sample, u64 type) 990 { 991 event->header.size = perf_event__sample_event_size(sample, type, 0); 992 return perf_event__synthesize_sample(event, type, 0, sample); 993 } 994 995 996 static int 997 cs_etm__get_trace(struct cs_etm_queue *etmq) 998 { 999 struct auxtrace_buffer *aux_buffer = etmq->buffer; 1000 struct auxtrace_buffer *old_buffer = aux_buffer; 1001 struct auxtrace_queue *queue; 1002 1003 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 1004 1005 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 1006 1007 /* If no more data, drop the previous auxtrace_buffer and return */ 1008 if (!aux_buffer) { 1009 if (old_buffer) 1010 auxtrace_buffer__drop_data(old_buffer); 1011 etmq->buf_len = 0; 1012 return 0; 1013 } 1014 1015 etmq->buffer = aux_buffer; 1016 1017 /* If the aux_buffer doesn't have data associated, try to load it */ 1018 if (!aux_buffer->data) { 1019 /* get the file desc associated with the perf data file */ 1020 int fd = perf_data__fd(etmq->etm->session->data); 1021 1022 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 1023 if (!aux_buffer->data) 1024 return -ENOMEM; 1025 } 1026 1027 /* If valid, drop the previous buffer */ 1028 if (old_buffer) 1029 auxtrace_buffer__drop_data(old_buffer); 1030 1031 etmq->buf_used = 0; 1032 etmq->buf_len = aux_buffer->size; 1033 etmq->buf = aux_buffer->data; 1034 1035 return etmq->buf_len; 1036 } 1037 1038 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 1039 struct cs_etm_traceid_queue *tidq) 1040 { 1041 if ((!tidq->thread) && (tidq->tid != -1)) 1042 tidq->thread = machine__find_thread(etm->machine, -1, 1043 tidq->tid); 1044 1045 if (tidq->thread) 1046 tidq->pid = tidq->thread->pid_; 1047 } 1048 1049 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, 1050 pid_t tid, u8 trace_chan_id) 1051 { 1052 int cpu, err = -EINVAL; 1053 struct cs_etm_auxtrace *etm = etmq->etm; 1054 struct cs_etm_traceid_queue *tidq; 1055 1056 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 1057 if (!tidq) 1058 return err; 1059 1060 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 1061 return err; 1062 1063 err = machine__set_current_tid(etm->machine, cpu, tid, tid); 1064 if (err) 1065 return err; 1066 1067 tidq->tid = tid; 1068 thread__zput(tidq->thread); 1069 1070 cs_etm__set_pid_tid_cpu(etm, tidq); 1071 return 0; 1072 } 1073 1074 bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq) 1075 { 1076 return !!etmq->etm->timeless_decoding; 1077 } 1078 1079 static void cs_etm__copy_insn(struct cs_etm_queue *etmq, 1080 u64 trace_chan_id, 1081 const struct cs_etm_packet *packet, 1082 struct perf_sample *sample) 1083 { 1084 /* 1085 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY 1086 * packet, so directly bail out with 'insn_len' = 0. 1087 */ 1088 if (packet->sample_type == CS_ETM_DISCONTINUITY) { 1089 sample->insn_len = 0; 1090 return; 1091 } 1092 1093 /* 1094 * T32 instruction size might be 32-bit or 16-bit, decide by calling 1095 * cs_etm__t32_instr_size(). 1096 */ 1097 if (packet->isa == CS_ETM_ISA_T32) 1098 sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id, 1099 sample->ip); 1100 /* Otherwise, A64 and A32 instruction size are always 32-bit. */ 1101 else 1102 sample->insn_len = 4; 1103 1104 cs_etm__mem_access(etmq, trace_chan_id, sample->ip, 1105 sample->insn_len, (void *)sample->insn); 1106 } 1107 1108 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1109 struct cs_etm_traceid_queue *tidq, 1110 u64 addr, u64 period) 1111 { 1112 int ret = 0; 1113 struct cs_etm_auxtrace *etm = etmq->etm; 1114 union perf_event *event = tidq->event_buf; 1115 struct perf_sample sample = {.ip = 0,}; 1116 1117 event->sample.header.type = PERF_RECORD_SAMPLE; 1118 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); 1119 event->sample.header.size = sizeof(struct perf_event_header); 1120 1121 sample.ip = addr; 1122 sample.pid = tidq->pid; 1123 sample.tid = tidq->tid; 1124 sample.id = etmq->etm->instructions_id; 1125 sample.stream_id = etmq->etm->instructions_id; 1126 sample.period = period; 1127 sample.cpu = tidq->packet->cpu; 1128 sample.flags = tidq->prev_packet->flags; 1129 sample.cpumode = event->sample.header.misc; 1130 1131 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); 1132 1133 if (etm->synth_opts.last_branch) { 1134 cs_etm__copy_last_branch_rb(etmq, tidq); 1135 sample.branch_stack = tidq->last_branch; 1136 } 1137 1138 if (etm->synth_opts.inject) { 1139 ret = cs_etm__inject_event(event, &sample, 1140 etm->instructions_sample_type); 1141 if (ret) 1142 return ret; 1143 } 1144 1145 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1146 1147 if (ret) 1148 pr_err( 1149 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1150 ret); 1151 1152 if (etm->synth_opts.last_branch) 1153 cs_etm__reset_last_branch_rb(tidq); 1154 1155 return ret; 1156 } 1157 1158 /* 1159 * The cs etm packet encodes an instruction range between a branch target 1160 * and the next taken branch. Generate sample accordingly. 1161 */ 1162 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 1163 struct cs_etm_traceid_queue *tidq) 1164 { 1165 int ret = 0; 1166 struct cs_etm_auxtrace *etm = etmq->etm; 1167 struct perf_sample sample = {.ip = 0,}; 1168 union perf_event *event = tidq->event_buf; 1169 struct dummy_branch_stack { 1170 u64 nr; 1171 struct branch_entry entries; 1172 } dummy_bs; 1173 u64 ip; 1174 1175 ip = cs_etm__last_executed_instr(tidq->prev_packet); 1176 1177 event->sample.header.type = PERF_RECORD_SAMPLE; 1178 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 1179 event->sample.header.size = sizeof(struct perf_event_header); 1180 1181 sample.ip = ip; 1182 sample.pid = tidq->pid; 1183 sample.tid = tidq->tid; 1184 sample.addr = cs_etm__first_executed_instr(tidq->packet); 1185 sample.id = etmq->etm->branches_id; 1186 sample.stream_id = etmq->etm->branches_id; 1187 sample.period = 1; 1188 sample.cpu = tidq->packet->cpu; 1189 sample.flags = tidq->prev_packet->flags; 1190 sample.cpumode = event->sample.header.misc; 1191 1192 cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet, 1193 &sample); 1194 1195 /* 1196 * perf report cannot handle events without a branch stack 1197 */ 1198 if (etm->synth_opts.last_branch) { 1199 dummy_bs = (struct dummy_branch_stack){ 1200 .nr = 1, 1201 .entries = { 1202 .from = sample.ip, 1203 .to = sample.addr, 1204 }, 1205 }; 1206 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1207 } 1208 1209 if (etm->synth_opts.inject) { 1210 ret = cs_etm__inject_event(event, &sample, 1211 etm->branches_sample_type); 1212 if (ret) 1213 return ret; 1214 } 1215 1216 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 1217 1218 if (ret) 1219 pr_err( 1220 "CS ETM Trace: failed to deliver instruction event, error %d\n", 1221 ret); 1222 1223 return ret; 1224 } 1225 1226 struct cs_etm_synth { 1227 struct perf_tool dummy_tool; 1228 struct perf_session *session; 1229 }; 1230 1231 static int cs_etm__event_synth(struct perf_tool *tool, 1232 union perf_event *event, 1233 struct perf_sample *sample __maybe_unused, 1234 struct machine *machine __maybe_unused) 1235 { 1236 struct cs_etm_synth *cs_etm_synth = 1237 container_of(tool, struct cs_etm_synth, dummy_tool); 1238 1239 return perf_session__deliver_synth_event(cs_etm_synth->session, 1240 event, NULL); 1241 } 1242 1243 static int cs_etm__synth_event(struct perf_session *session, 1244 struct perf_event_attr *attr, u64 id) 1245 { 1246 struct cs_etm_synth cs_etm_synth; 1247 1248 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 1249 cs_etm_synth.session = session; 1250 1251 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 1252 &id, cs_etm__event_synth); 1253 } 1254 1255 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 1256 struct perf_session *session) 1257 { 1258 struct evlist *evlist = session->evlist; 1259 struct evsel *evsel; 1260 struct perf_event_attr attr; 1261 bool found = false; 1262 u64 id; 1263 int err; 1264 1265 evlist__for_each_entry(evlist, evsel) { 1266 if (evsel->core.attr.type == etm->pmu_type) { 1267 found = true; 1268 break; 1269 } 1270 } 1271 1272 if (!found) { 1273 pr_debug("No selected events with CoreSight Trace data\n"); 1274 return 0; 1275 } 1276 1277 memset(&attr, 0, sizeof(struct perf_event_attr)); 1278 attr.size = sizeof(struct perf_event_attr); 1279 attr.type = PERF_TYPE_HARDWARE; 1280 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 1281 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1282 PERF_SAMPLE_PERIOD; 1283 if (etm->timeless_decoding) 1284 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1285 else 1286 attr.sample_type |= PERF_SAMPLE_TIME; 1287 1288 attr.exclude_user = evsel->core.attr.exclude_user; 1289 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 1290 attr.exclude_hv = evsel->core.attr.exclude_hv; 1291 attr.exclude_host = evsel->core.attr.exclude_host; 1292 attr.exclude_guest = evsel->core.attr.exclude_guest; 1293 attr.sample_id_all = evsel->core.attr.sample_id_all; 1294 attr.read_format = evsel->core.attr.read_format; 1295 1296 /* create new id val to be a fixed offset from evsel id */ 1297 id = evsel->id[0] + 1000000000; 1298 1299 if (!id) 1300 id = 1; 1301 1302 if (etm->synth_opts.branches) { 1303 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1304 attr.sample_period = 1; 1305 attr.sample_type |= PERF_SAMPLE_ADDR; 1306 err = cs_etm__synth_event(session, &attr, id); 1307 if (err) 1308 return err; 1309 etm->sample_branches = true; 1310 etm->branches_sample_type = attr.sample_type; 1311 etm->branches_id = id; 1312 id += 1; 1313 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 1314 } 1315 1316 if (etm->synth_opts.last_branch) 1317 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1318 1319 if (etm->synth_opts.instructions) { 1320 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1321 attr.sample_period = etm->synth_opts.period; 1322 etm->instructions_sample_period = attr.sample_period; 1323 err = cs_etm__synth_event(session, &attr, id); 1324 if (err) 1325 return err; 1326 etm->sample_instructions = true; 1327 etm->instructions_sample_type = attr.sample_type; 1328 etm->instructions_id = id; 1329 id += 1; 1330 } 1331 1332 return 0; 1333 } 1334 1335 static int cs_etm__sample(struct cs_etm_queue *etmq, 1336 struct cs_etm_traceid_queue *tidq) 1337 { 1338 struct cs_etm_auxtrace *etm = etmq->etm; 1339 struct cs_etm_packet *tmp; 1340 int ret; 1341 u8 trace_chan_id = tidq->trace_chan_id; 1342 u64 instrs_executed = tidq->packet->instr_count; 1343 1344 tidq->period_instructions += instrs_executed; 1345 1346 /* 1347 * Record a branch when the last instruction in 1348 * PREV_PACKET is a branch. 1349 */ 1350 if (etm->synth_opts.last_branch && 1351 tidq->prev_packet->sample_type == CS_ETM_RANGE && 1352 tidq->prev_packet->last_instr_taken_branch) 1353 cs_etm__update_last_branch_rb(etmq, tidq); 1354 1355 if (etm->sample_instructions && 1356 tidq->period_instructions >= etm->instructions_sample_period) { 1357 /* 1358 * Emit instruction sample periodically 1359 * TODO: allow period to be defined in cycles and clock time 1360 */ 1361 1362 /* Get number of instructions executed after the sample point */ 1363 u64 instrs_over = tidq->period_instructions - 1364 etm->instructions_sample_period; 1365 1366 /* 1367 * Calculate the address of the sampled instruction (-1 as 1368 * sample is reported as though instruction has just been 1369 * executed, but PC has not advanced to next instruction) 1370 */ 1371 u64 offset = (instrs_executed - instrs_over - 1); 1372 u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, 1373 tidq->packet, offset); 1374 1375 ret = cs_etm__synth_instruction_sample( 1376 etmq, tidq, addr, etm->instructions_sample_period); 1377 if (ret) 1378 return ret; 1379 1380 /* Carry remaining instructions into next sample period */ 1381 tidq->period_instructions = instrs_over; 1382 } 1383 1384 if (etm->sample_branches) { 1385 bool generate_sample = false; 1386 1387 /* Generate sample for tracing on packet */ 1388 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1389 generate_sample = true; 1390 1391 /* Generate sample for branch taken packet */ 1392 if (tidq->prev_packet->sample_type == CS_ETM_RANGE && 1393 tidq->prev_packet->last_instr_taken_branch) 1394 generate_sample = true; 1395 1396 if (generate_sample) { 1397 ret = cs_etm__synth_branch_sample(etmq, tidq); 1398 if (ret) 1399 return ret; 1400 } 1401 } 1402 1403 if (etm->sample_branches || etm->synth_opts.last_branch) { 1404 /* 1405 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1406 * the next incoming packet. 1407 */ 1408 tmp = tidq->packet; 1409 tidq->packet = tidq->prev_packet; 1410 tidq->prev_packet = tmp; 1411 } 1412 1413 return 0; 1414 } 1415 1416 static int cs_etm__exception(struct cs_etm_traceid_queue *tidq) 1417 { 1418 /* 1419 * When the exception packet is inserted, whether the last instruction 1420 * in previous range packet is taken branch or not, we need to force 1421 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1422 * to generate branch sample for the instruction range before the 1423 * exception is trapped to kernel or before the exception returning. 1424 * 1425 * The exception packet includes the dummy address values, so don't 1426 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1427 * for generating instruction and branch samples. 1428 */ 1429 if (tidq->prev_packet->sample_type == CS_ETM_RANGE) 1430 tidq->prev_packet->last_instr_taken_branch = true; 1431 1432 return 0; 1433 } 1434 1435 static int cs_etm__flush(struct cs_etm_queue *etmq, 1436 struct cs_etm_traceid_queue *tidq) 1437 { 1438 int err = 0; 1439 struct cs_etm_auxtrace *etm = etmq->etm; 1440 struct cs_etm_packet *tmp; 1441 1442 /* Handle start tracing packet */ 1443 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) 1444 goto swap_packet; 1445 1446 if (etmq->etm->synth_opts.last_branch && 1447 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1448 /* 1449 * Generate a last branch event for the branches left in the 1450 * circular buffer at the end of the trace. 1451 * 1452 * Use the address of the end of the last reported execution 1453 * range 1454 */ 1455 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1456 1457 err = cs_etm__synth_instruction_sample( 1458 etmq, tidq, addr, 1459 tidq->period_instructions); 1460 if (err) 1461 return err; 1462 1463 tidq->period_instructions = 0; 1464 1465 } 1466 1467 if (etm->sample_branches && 1468 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1469 err = cs_etm__synth_branch_sample(etmq, tidq); 1470 if (err) 1471 return err; 1472 } 1473 1474 swap_packet: 1475 if (etm->sample_branches || etm->synth_opts.last_branch) { 1476 /* 1477 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1478 * the next incoming packet. 1479 */ 1480 tmp = tidq->packet; 1481 tidq->packet = tidq->prev_packet; 1482 tidq->prev_packet = tmp; 1483 } 1484 1485 return err; 1486 } 1487 1488 static int cs_etm__end_block(struct cs_etm_queue *etmq, 1489 struct cs_etm_traceid_queue *tidq) 1490 { 1491 int err; 1492 1493 /* 1494 * It has no new packet coming and 'etmq->packet' contains the stale 1495 * packet which was set at the previous time with packets swapping; 1496 * so skip to generate branch sample to avoid stale packet. 1497 * 1498 * For this case only flush branch stack and generate a last branch 1499 * event for the branches left in the circular buffer at the end of 1500 * the trace. 1501 */ 1502 if (etmq->etm->synth_opts.last_branch && 1503 tidq->prev_packet->sample_type == CS_ETM_RANGE) { 1504 /* 1505 * Use the address of the end of the last reported execution 1506 * range. 1507 */ 1508 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); 1509 1510 err = cs_etm__synth_instruction_sample( 1511 etmq, tidq, addr, 1512 tidq->period_instructions); 1513 if (err) 1514 return err; 1515 1516 tidq->period_instructions = 0; 1517 } 1518 1519 return 0; 1520 } 1521 /* 1522 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1523 * if need be. 1524 * Returns: < 0 if error 1525 * = 0 if no more auxtrace_buffer to read 1526 * > 0 if the current buffer isn't empty yet 1527 */ 1528 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1529 { 1530 int ret; 1531 1532 if (!etmq->buf_len) { 1533 ret = cs_etm__get_trace(etmq); 1534 if (ret <= 0) 1535 return ret; 1536 /* 1537 * We cannot assume consecutive blocks in the data file 1538 * are contiguous, reset the decoder to force re-sync. 1539 */ 1540 ret = cs_etm_decoder__reset(etmq->decoder); 1541 if (ret) 1542 return ret; 1543 } 1544 1545 return etmq->buf_len; 1546 } 1547 1548 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, 1549 struct cs_etm_packet *packet, 1550 u64 end_addr) 1551 { 1552 /* Initialise to keep compiler happy */ 1553 u16 instr16 = 0; 1554 u32 instr32 = 0; 1555 u64 addr; 1556 1557 switch (packet->isa) { 1558 case CS_ETM_ISA_T32: 1559 /* 1560 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1561 * 1562 * b'15 b'8 1563 * +-----------------+--------+ 1564 * | 1 1 0 1 1 1 1 1 | imm8 | 1565 * +-----------------+--------+ 1566 * 1567 * According to the specifiction, it only defines SVC for T32 1568 * with 16 bits instruction and has no definition for 32bits; 1569 * so below only read 2 bytes as instruction size for T32. 1570 */ 1571 addr = end_addr - 2; 1572 cs_etm__mem_access(etmq, trace_chan_id, addr, 1573 sizeof(instr16), (u8 *)&instr16); 1574 if ((instr16 & 0xFF00) == 0xDF00) 1575 return true; 1576 1577 break; 1578 case CS_ETM_ISA_A32: 1579 /* 1580 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 1581 * 1582 * b'31 b'28 b'27 b'24 1583 * +---------+---------+-------------------------+ 1584 * | !1111 | 1 1 1 1 | imm24 | 1585 * +---------+---------+-------------------------+ 1586 */ 1587 addr = end_addr - 4; 1588 cs_etm__mem_access(etmq, trace_chan_id, addr, 1589 sizeof(instr32), (u8 *)&instr32); 1590 if ((instr32 & 0x0F000000) == 0x0F000000 && 1591 (instr32 & 0xF0000000) != 0xF0000000) 1592 return true; 1593 1594 break; 1595 case CS_ETM_ISA_A64: 1596 /* 1597 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 1598 * 1599 * b'31 b'21 b'4 b'0 1600 * +-----------------------+---------+-----------+ 1601 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 1602 * +-----------------------+---------+-----------+ 1603 */ 1604 addr = end_addr - 4; 1605 cs_etm__mem_access(etmq, trace_chan_id, addr, 1606 sizeof(instr32), (u8 *)&instr32); 1607 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1608 return true; 1609 1610 break; 1611 case CS_ETM_ISA_UNKNOWN: 1612 default: 1613 break; 1614 } 1615 1616 return false; 1617 } 1618 1619 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, 1620 struct cs_etm_traceid_queue *tidq, u64 magic) 1621 { 1622 u8 trace_chan_id = tidq->trace_chan_id; 1623 struct cs_etm_packet *packet = tidq->packet; 1624 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1625 1626 if (magic == __perf_cs_etmv3_magic) 1627 if (packet->exception_number == CS_ETMV3_EXC_SVC) 1628 return true; 1629 1630 /* 1631 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 1632 * HVC cases; need to check if it's SVC instruction based on 1633 * packet address. 1634 */ 1635 if (magic == __perf_cs_etmv4_magic) { 1636 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1637 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1638 prev_packet->end_addr)) 1639 return true; 1640 } 1641 1642 return false; 1643 } 1644 1645 static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq, 1646 u64 magic) 1647 { 1648 struct cs_etm_packet *packet = tidq->packet; 1649 1650 if (magic == __perf_cs_etmv3_magic) 1651 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1652 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 1653 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 1654 packet->exception_number == CS_ETMV3_EXC_IRQ || 1655 packet->exception_number == CS_ETMV3_EXC_FIQ) 1656 return true; 1657 1658 if (magic == __perf_cs_etmv4_magic) 1659 if (packet->exception_number == CS_ETMV4_EXC_RESET || 1660 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 1661 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 1662 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 1663 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 1664 packet->exception_number == CS_ETMV4_EXC_IRQ || 1665 packet->exception_number == CS_ETMV4_EXC_FIQ) 1666 return true; 1667 1668 return false; 1669 } 1670 1671 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, 1672 struct cs_etm_traceid_queue *tidq, 1673 u64 magic) 1674 { 1675 u8 trace_chan_id = tidq->trace_chan_id; 1676 struct cs_etm_packet *packet = tidq->packet; 1677 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1678 1679 if (magic == __perf_cs_etmv3_magic) 1680 if (packet->exception_number == CS_ETMV3_EXC_SMC || 1681 packet->exception_number == CS_ETMV3_EXC_HYP || 1682 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 1683 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 1684 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 1685 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 1686 packet->exception_number == CS_ETMV3_EXC_GENERIC) 1687 return true; 1688 1689 if (magic == __perf_cs_etmv4_magic) { 1690 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 1691 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 1692 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 1693 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 1694 return true; 1695 1696 /* 1697 * For CS_ETMV4_EXC_CALL, except SVC other instructions 1698 * (SMC, HVC) are taken as sync exceptions. 1699 */ 1700 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1701 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet, 1702 prev_packet->end_addr)) 1703 return true; 1704 1705 /* 1706 * ETMv4 has 5 bits for exception number; if the numbers 1707 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 1708 * they are implementation defined exceptions. 1709 * 1710 * For this case, simply take it as sync exception. 1711 */ 1712 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 1713 packet->exception_number <= CS_ETMV4_EXC_END) 1714 return true; 1715 } 1716 1717 return false; 1718 } 1719 1720 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, 1721 struct cs_etm_traceid_queue *tidq) 1722 { 1723 struct cs_etm_packet *packet = tidq->packet; 1724 struct cs_etm_packet *prev_packet = tidq->prev_packet; 1725 u8 trace_chan_id = tidq->trace_chan_id; 1726 u64 magic; 1727 int ret; 1728 1729 switch (packet->sample_type) { 1730 case CS_ETM_RANGE: 1731 /* 1732 * Immediate branch instruction without neither link nor 1733 * return flag, it's normal branch instruction within 1734 * the function. 1735 */ 1736 if (packet->last_instr_type == OCSD_INSTR_BR && 1737 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 1738 packet->flags = PERF_IP_FLAG_BRANCH; 1739 1740 if (packet->last_instr_cond) 1741 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 1742 } 1743 1744 /* 1745 * Immediate branch instruction with link (e.g. BL), this is 1746 * branch instruction for function call. 1747 */ 1748 if (packet->last_instr_type == OCSD_INSTR_BR && 1749 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1750 packet->flags = PERF_IP_FLAG_BRANCH | 1751 PERF_IP_FLAG_CALL; 1752 1753 /* 1754 * Indirect branch instruction with link (e.g. BLR), this is 1755 * branch instruction for function call. 1756 */ 1757 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1758 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1759 packet->flags = PERF_IP_FLAG_BRANCH | 1760 PERF_IP_FLAG_CALL; 1761 1762 /* 1763 * Indirect branch instruction with subtype of 1764 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 1765 * function return for A32/T32. 1766 */ 1767 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1768 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 1769 packet->flags = PERF_IP_FLAG_BRANCH | 1770 PERF_IP_FLAG_RETURN; 1771 1772 /* 1773 * Indirect branch instruction without link (e.g. BR), usually 1774 * this is used for function return, especially for functions 1775 * within dynamic link lib. 1776 */ 1777 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1778 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 1779 packet->flags = PERF_IP_FLAG_BRANCH | 1780 PERF_IP_FLAG_RETURN; 1781 1782 /* Return instruction for function return. */ 1783 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1784 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 1785 packet->flags = PERF_IP_FLAG_BRANCH | 1786 PERF_IP_FLAG_RETURN; 1787 1788 /* 1789 * Decoder might insert a discontinuity in the middle of 1790 * instruction packets, fixup prev_packet with flag 1791 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 1792 */ 1793 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1794 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1795 PERF_IP_FLAG_TRACE_BEGIN; 1796 1797 /* 1798 * If the previous packet is an exception return packet 1799 * and the return address just follows SVC instuction, 1800 * it needs to calibrate the previous packet sample flags 1801 * as PERF_IP_FLAG_SYSCALLRET. 1802 */ 1803 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1804 PERF_IP_FLAG_RETURN | 1805 PERF_IP_FLAG_INTERRUPT) && 1806 cs_etm__is_svc_instr(etmq, trace_chan_id, 1807 packet, packet->start_addr)) 1808 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1809 PERF_IP_FLAG_RETURN | 1810 PERF_IP_FLAG_SYSCALLRET; 1811 break; 1812 case CS_ETM_DISCONTINUITY: 1813 /* 1814 * The trace is discontinuous, if the previous packet is 1815 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 1816 * for previous packet. 1817 */ 1818 if (prev_packet->sample_type == CS_ETM_RANGE) 1819 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1820 PERF_IP_FLAG_TRACE_END; 1821 break; 1822 case CS_ETM_EXCEPTION: 1823 ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 1824 if (ret) 1825 return ret; 1826 1827 /* The exception is for system call. */ 1828 if (cs_etm__is_syscall(etmq, tidq, magic)) 1829 packet->flags = PERF_IP_FLAG_BRANCH | 1830 PERF_IP_FLAG_CALL | 1831 PERF_IP_FLAG_SYSCALLRET; 1832 /* 1833 * The exceptions are triggered by external signals from bus, 1834 * interrupt controller, debug module, PE reset or halt. 1835 */ 1836 else if (cs_etm__is_async_exception(tidq, magic)) 1837 packet->flags = PERF_IP_FLAG_BRANCH | 1838 PERF_IP_FLAG_CALL | 1839 PERF_IP_FLAG_ASYNC | 1840 PERF_IP_FLAG_INTERRUPT; 1841 /* 1842 * Otherwise, exception is caused by trap, instruction & 1843 * data fault, or alignment errors. 1844 */ 1845 else if (cs_etm__is_sync_exception(etmq, tidq, magic)) 1846 packet->flags = PERF_IP_FLAG_BRANCH | 1847 PERF_IP_FLAG_CALL | 1848 PERF_IP_FLAG_INTERRUPT; 1849 1850 /* 1851 * When the exception packet is inserted, since exception 1852 * packet is not used standalone for generating samples 1853 * and it's affiliation to the previous instruction range 1854 * packet; so set previous range packet flags to tell perf 1855 * it is an exception taken branch. 1856 */ 1857 if (prev_packet->sample_type == CS_ETM_RANGE) 1858 prev_packet->flags = packet->flags; 1859 break; 1860 case CS_ETM_EXCEPTION_RET: 1861 /* 1862 * When the exception return packet is inserted, since 1863 * exception return packet is not used standalone for 1864 * generating samples and it's affiliation to the previous 1865 * instruction range packet; so set previous range packet 1866 * flags to tell perf it is an exception return branch. 1867 * 1868 * The exception return can be for either system call or 1869 * other exception types; unfortunately the packet doesn't 1870 * contain exception type related info so we cannot decide 1871 * the exception type purely based on exception return packet. 1872 * If we record the exception number from exception packet and 1873 * reuse it for excpetion return packet, this is not reliable 1874 * due the trace can be discontinuity or the interrupt can 1875 * be nested, thus the recorded exception number cannot be 1876 * used for exception return packet for these two cases. 1877 * 1878 * For exception return packet, we only need to distinguish the 1879 * packet is for system call or for other types. Thus the 1880 * decision can be deferred when receive the next packet which 1881 * contains the return address, based on the return address we 1882 * can read out the previous instruction and check if it's a 1883 * system call instruction and then calibrate the sample flag 1884 * as needed. 1885 */ 1886 if (prev_packet->sample_type == CS_ETM_RANGE) 1887 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1888 PERF_IP_FLAG_RETURN | 1889 PERF_IP_FLAG_INTERRUPT; 1890 break; 1891 case CS_ETM_EMPTY: 1892 default: 1893 break; 1894 } 1895 1896 return 0; 1897 } 1898 1899 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 1900 { 1901 int ret = 0; 1902 size_t processed = 0; 1903 1904 /* 1905 * Packets are decoded and added to the decoder's packet queue 1906 * until the decoder packet processing callback has requested that 1907 * processing stops or there is nothing left in the buffer. Normal 1908 * operations that stop processing are a timestamp packet or a full 1909 * decoder buffer queue. 1910 */ 1911 ret = cs_etm_decoder__process_data_block(etmq->decoder, 1912 etmq->offset, 1913 &etmq->buf[etmq->buf_used], 1914 etmq->buf_len, 1915 &processed); 1916 if (ret) 1917 goto out; 1918 1919 etmq->offset += processed; 1920 etmq->buf_used += processed; 1921 etmq->buf_len -= processed; 1922 1923 out: 1924 return ret; 1925 } 1926 1927 static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, 1928 struct cs_etm_traceid_queue *tidq) 1929 { 1930 int ret; 1931 struct cs_etm_packet_queue *packet_queue; 1932 1933 packet_queue = &tidq->packet_queue; 1934 1935 /* Process each packet in this chunk */ 1936 while (1) { 1937 ret = cs_etm_decoder__get_packet(packet_queue, 1938 tidq->packet); 1939 if (ret <= 0) 1940 /* 1941 * Stop processing this chunk on 1942 * end of data or error 1943 */ 1944 break; 1945 1946 /* 1947 * Since packet addresses are swapped in packet 1948 * handling within below switch() statements, 1949 * thus setting sample flags must be called 1950 * prior to switch() statement to use address 1951 * information before packets swapping. 1952 */ 1953 ret = cs_etm__set_sample_flags(etmq, tidq); 1954 if (ret < 0) 1955 break; 1956 1957 switch (tidq->packet->sample_type) { 1958 case CS_ETM_RANGE: 1959 /* 1960 * If the packet contains an instruction 1961 * range, generate instruction sequence 1962 * events. 1963 */ 1964 cs_etm__sample(etmq, tidq); 1965 break; 1966 case CS_ETM_EXCEPTION: 1967 case CS_ETM_EXCEPTION_RET: 1968 /* 1969 * If the exception packet is coming, 1970 * make sure the previous instruction 1971 * range packet to be handled properly. 1972 */ 1973 cs_etm__exception(tidq); 1974 break; 1975 case CS_ETM_DISCONTINUITY: 1976 /* 1977 * Discontinuity in trace, flush 1978 * previous branch stack 1979 */ 1980 cs_etm__flush(etmq, tidq); 1981 break; 1982 case CS_ETM_EMPTY: 1983 /* 1984 * Should not receive empty packet, 1985 * report error. 1986 */ 1987 pr_err("CS ETM Trace: empty packet\n"); 1988 return -EINVAL; 1989 default: 1990 break; 1991 } 1992 } 1993 1994 return ret; 1995 } 1996 1997 static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) 1998 { 1999 int idx; 2000 struct int_node *inode; 2001 struct cs_etm_traceid_queue *tidq; 2002 struct intlist *traceid_queues_list = etmq->traceid_queues_list; 2003 2004 intlist__for_each_entry(inode, traceid_queues_list) { 2005 idx = (int)(intptr_t)inode->priv; 2006 tidq = etmq->traceid_queues[idx]; 2007 2008 /* Ignore return value */ 2009 cs_etm__process_traceid_queue(etmq, tidq); 2010 2011 /* 2012 * Generate an instruction sample with the remaining 2013 * branchstack entries. 2014 */ 2015 cs_etm__flush(etmq, tidq); 2016 } 2017 } 2018 2019 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 2020 { 2021 int err = 0; 2022 struct cs_etm_traceid_queue *tidq; 2023 2024 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID); 2025 if (!tidq) 2026 return -EINVAL; 2027 2028 /* Go through each buffer in the queue and decode them one by one */ 2029 while (1) { 2030 err = cs_etm__get_data_block(etmq); 2031 if (err <= 0) 2032 return err; 2033 2034 /* Run trace decoder until buffer consumed or end of trace */ 2035 do { 2036 err = cs_etm__decode_data_block(etmq); 2037 if (err) 2038 return err; 2039 2040 /* 2041 * Process each packet in this chunk, nothing to do if 2042 * an error occurs other than hoping the next one will 2043 * be better. 2044 */ 2045 err = cs_etm__process_traceid_queue(etmq, tidq); 2046 2047 } while (etmq->buf_len); 2048 2049 if (err == 0) 2050 /* Flush any remaining branch stack entries */ 2051 err = cs_etm__end_block(etmq, tidq); 2052 } 2053 2054 return err; 2055 } 2056 2057 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 2058 pid_t tid) 2059 { 2060 unsigned int i; 2061 struct auxtrace_queues *queues = &etm->queues; 2062 2063 for (i = 0; i < queues->nr_queues; i++) { 2064 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2065 struct cs_etm_queue *etmq = queue->priv; 2066 struct cs_etm_traceid_queue *tidq; 2067 2068 if (!etmq) 2069 continue; 2070 2071 tidq = cs_etm__etmq_get_traceid_queue(etmq, 2072 CS_ETM_PER_THREAD_TRACEID); 2073 2074 if (!tidq) 2075 continue; 2076 2077 if ((tid == -1) || (tidq->tid == tid)) { 2078 cs_etm__set_pid_tid_cpu(etm, tidq); 2079 cs_etm__run_decoder(etmq); 2080 } 2081 } 2082 2083 return 0; 2084 } 2085 2086 static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) 2087 { 2088 int ret = 0; 2089 unsigned int cs_queue_nr, queue_nr; 2090 u8 trace_chan_id; 2091 u64 timestamp; 2092 struct auxtrace_queue *queue; 2093 struct cs_etm_queue *etmq; 2094 struct cs_etm_traceid_queue *tidq; 2095 2096 while (1) { 2097 if (!etm->heap.heap_cnt) 2098 goto out; 2099 2100 /* Take the entry at the top of the min heap */ 2101 cs_queue_nr = etm->heap.heap_array[0].queue_nr; 2102 queue_nr = TO_QUEUE_NR(cs_queue_nr); 2103 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr); 2104 queue = &etm->queues.queue_array[queue_nr]; 2105 etmq = queue->priv; 2106 2107 /* 2108 * Remove the top entry from the heap since we are about 2109 * to process it. 2110 */ 2111 auxtrace_heap__pop(&etm->heap); 2112 2113 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); 2114 if (!tidq) { 2115 /* 2116 * No traceID queue has been allocated for this traceID, 2117 * which means something somewhere went very wrong. No 2118 * other choice than simply exit. 2119 */ 2120 ret = -EINVAL; 2121 goto out; 2122 } 2123 2124 /* 2125 * Packets associated with this timestamp are already in 2126 * the etmq's traceID queue, so process them. 2127 */ 2128 ret = cs_etm__process_traceid_queue(etmq, tidq); 2129 if (ret < 0) 2130 goto out; 2131 2132 /* 2133 * Packets for this timestamp have been processed, time to 2134 * move on to the next timestamp, fetching a new auxtrace_buffer 2135 * if need be. 2136 */ 2137 refetch: 2138 ret = cs_etm__get_data_block(etmq); 2139 if (ret < 0) 2140 goto out; 2141 2142 /* 2143 * No more auxtrace_buffers to process in this etmq, simply 2144 * move on to another entry in the auxtrace_heap. 2145 */ 2146 if (!ret) 2147 continue; 2148 2149 ret = cs_etm__decode_data_block(etmq); 2150 if (ret) 2151 goto out; 2152 2153 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); 2154 2155 if (!timestamp) { 2156 /* 2157 * Function cs_etm__decode_data_block() returns when 2158 * there is no more traces to decode in the current 2159 * auxtrace_buffer OR when a timestamp has been 2160 * encountered on any of the traceID queues. Since we 2161 * did not get a timestamp, there is no more traces to 2162 * process in this auxtrace_buffer. As such empty and 2163 * flush all traceID queues. 2164 */ 2165 cs_etm__clear_all_traceid_queues(etmq); 2166 2167 /* Fetch another auxtrace_buffer for this etmq */ 2168 goto refetch; 2169 } 2170 2171 /* 2172 * Add to the min heap the timestamp for packets that have 2173 * just been decoded. They will be processed and synthesized 2174 * during the next call to cs_etm__process_traceid_queue() for 2175 * this queue/traceID. 2176 */ 2177 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); 2178 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); 2179 } 2180 2181 out: 2182 return ret; 2183 } 2184 2185 static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm, 2186 union perf_event *event) 2187 { 2188 struct thread *th; 2189 2190 if (etm->timeless_decoding) 2191 return 0; 2192 2193 /* 2194 * Add the tid/pid to the log so that we can get a match when 2195 * we get a contextID from the decoder. 2196 */ 2197 th = machine__findnew_thread(etm->machine, 2198 event->itrace_start.pid, 2199 event->itrace_start.tid); 2200 if (!th) 2201 return -ENOMEM; 2202 2203 thread__put(th); 2204 2205 return 0; 2206 } 2207 2208 static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm, 2209 union perf_event *event) 2210 { 2211 struct thread *th; 2212 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2213 2214 /* 2215 * Context switch in per-thread mode are irrelevant since perf 2216 * will start/stop tracing as the process is scheduled. 2217 */ 2218 if (etm->timeless_decoding) 2219 return 0; 2220 2221 /* 2222 * SWITCH_IN events carry the next process to be switched out while 2223 * SWITCH_OUT events carry the process to be switched in. As such 2224 * we don't care about IN events. 2225 */ 2226 if (!out) 2227 return 0; 2228 2229 /* 2230 * Add the tid/pid to the log so that we can get a match when 2231 * we get a contextID from the decoder. 2232 */ 2233 th = machine__findnew_thread(etm->machine, 2234 event->context_switch.next_prev_pid, 2235 event->context_switch.next_prev_tid); 2236 if (!th) 2237 return -ENOMEM; 2238 2239 thread__put(th); 2240 2241 return 0; 2242 } 2243 2244 static int cs_etm__process_event(struct perf_session *session, 2245 union perf_event *event, 2246 struct perf_sample *sample, 2247 struct perf_tool *tool) 2248 { 2249 int err = 0; 2250 u64 timestamp; 2251 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2252 struct cs_etm_auxtrace, 2253 auxtrace); 2254 2255 if (dump_trace) 2256 return 0; 2257 2258 if (!tool->ordered_events) { 2259 pr_err("CoreSight ETM Trace requires ordered events\n"); 2260 return -EINVAL; 2261 } 2262 2263 if (sample->time && (sample->time != (u64) -1)) 2264 timestamp = sample->time; 2265 else 2266 timestamp = 0; 2267 2268 if (timestamp || etm->timeless_decoding) { 2269 err = cs_etm__update_queues(etm); 2270 if (err) 2271 return err; 2272 } 2273 2274 if (etm->timeless_decoding && 2275 event->header.type == PERF_RECORD_EXIT) 2276 return cs_etm__process_timeless_queues(etm, 2277 event->fork.tid); 2278 2279 if (event->header.type == PERF_RECORD_ITRACE_START) 2280 return cs_etm__process_itrace_start(etm, event); 2281 else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 2282 return cs_etm__process_switch_cpu_wide(etm, event); 2283 2284 if (!etm->timeless_decoding && 2285 event->header.type == PERF_RECORD_AUX) 2286 return cs_etm__process_queues(etm); 2287 2288 return 0; 2289 } 2290 2291 static int cs_etm__process_auxtrace_event(struct perf_session *session, 2292 union perf_event *event, 2293 struct perf_tool *tool __maybe_unused) 2294 { 2295 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 2296 struct cs_etm_auxtrace, 2297 auxtrace); 2298 if (!etm->data_queued) { 2299 struct auxtrace_buffer *buffer; 2300 off_t data_offset; 2301 int fd = perf_data__fd(session->data); 2302 bool is_pipe = perf_data__is_pipe(session->data); 2303 int err; 2304 2305 if (is_pipe) 2306 data_offset = 0; 2307 else { 2308 data_offset = lseek(fd, 0, SEEK_CUR); 2309 if (data_offset == -1) 2310 return -errno; 2311 } 2312 2313 err = auxtrace_queues__add_event(&etm->queues, session, 2314 event, data_offset, &buffer); 2315 if (err) 2316 return err; 2317 2318 if (dump_trace) 2319 if (auxtrace_buffer__get_data(buffer, fd)) { 2320 cs_etm__dump_event(etm, buffer); 2321 auxtrace_buffer__put_data(buffer); 2322 } 2323 } 2324 2325 return 0; 2326 } 2327 2328 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) 2329 { 2330 struct evsel *evsel; 2331 struct evlist *evlist = etm->session->evlist; 2332 bool timeless_decoding = true; 2333 2334 /* 2335 * Circle through the list of event and complain if we find one 2336 * with the time bit set. 2337 */ 2338 evlist__for_each_entry(evlist, evsel) { 2339 if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 2340 timeless_decoding = false; 2341 } 2342 2343 return timeless_decoding; 2344 } 2345 2346 static const char * const cs_etm_global_header_fmts[] = { 2347 [CS_HEADER_VERSION_0] = " Header version %llx\n", 2348 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 2349 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 2350 }; 2351 2352 static const char * const cs_etm_priv_fmts[] = { 2353 [CS_ETM_MAGIC] = " Magic number %llx\n", 2354 [CS_ETM_CPU] = " CPU %lld\n", 2355 [CS_ETM_ETMCR] = " ETMCR %llx\n", 2356 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 2357 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", 2358 [CS_ETM_ETMIDR] = " ETMIDR %llx\n", 2359 }; 2360 2361 static const char * const cs_etmv4_priv_fmts[] = { 2362 [CS_ETM_MAGIC] = " Magic number %llx\n", 2363 [CS_ETM_CPU] = " CPU %lld\n", 2364 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 2365 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 2366 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", 2367 [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", 2368 [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", 2369 [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", 2370 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 2371 }; 2372 2373 static void cs_etm__print_auxtrace_info(__u64 *val, int num) 2374 { 2375 int i, j, cpu = 0; 2376 2377 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2378 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 2379 2380 for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 2381 if (val[i] == __perf_cs_etmv3_magic) 2382 for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 2383 fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 2384 else if (val[i] == __perf_cs_etmv4_magic) 2385 for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 2386 fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 2387 else 2388 /* failure.. return */ 2389 return; 2390 } 2391 } 2392 2393 int cs_etm__process_auxtrace_info(union perf_event *event, 2394 struct perf_session *session) 2395 { 2396 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 2397 struct cs_etm_auxtrace *etm = NULL; 2398 struct int_node *inode; 2399 unsigned int pmu_type; 2400 int event_header_size = sizeof(struct perf_event_header); 2401 int info_header_size; 2402 int total_size = auxtrace_info->header.size; 2403 int priv_size = 0; 2404 int num_cpu; 2405 int err = 0, idx = -1; 2406 int i, j, k; 2407 u64 *ptr, *hdr = NULL; 2408 u64 **metadata = NULL; 2409 2410 /* 2411 * sizeof(auxtrace_info_event::type) + 2412 * sizeof(auxtrace_info_event::reserved) == 8 2413 */ 2414 info_header_size = 8; 2415 2416 if (total_size < (event_header_size + info_header_size)) 2417 return -EINVAL; 2418 2419 priv_size = total_size - event_header_size - info_header_size; 2420 2421 /* First the global part */ 2422 ptr = (u64 *) auxtrace_info->priv; 2423 2424 /* Look for version '0' of the header */ 2425 if (ptr[0] != 0) 2426 return -EINVAL; 2427 2428 hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 2429 if (!hdr) 2430 return -ENOMEM; 2431 2432 /* Extract header information - see cs-etm.h for format */ 2433 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 2434 hdr[i] = ptr[i]; 2435 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 2436 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 2437 0xffffffff); 2438 2439 /* 2440 * Create an RB tree for traceID-metadata tuple. Since the conversion 2441 * has to be made for each packet that gets decoded, optimizing access 2442 * in anything other than a sequential array is worth doing. 2443 */ 2444 traceid_list = intlist__new(NULL); 2445 if (!traceid_list) { 2446 err = -ENOMEM; 2447 goto err_free_hdr; 2448 } 2449 2450 metadata = zalloc(sizeof(*metadata) * num_cpu); 2451 if (!metadata) { 2452 err = -ENOMEM; 2453 goto err_free_traceid_list; 2454 } 2455 2456 /* 2457 * The metadata is stored in the auxtrace_info section and encodes 2458 * the configuration of the ARM embedded trace macrocell which is 2459 * required by the trace decoder to properly decode the trace due 2460 * to its highly compressed nature. 2461 */ 2462 for (j = 0; j < num_cpu; j++) { 2463 if (ptr[i] == __perf_cs_etmv3_magic) { 2464 metadata[j] = zalloc(sizeof(*metadata[j]) * 2465 CS_ETM_PRIV_MAX); 2466 if (!metadata[j]) { 2467 err = -ENOMEM; 2468 goto err_free_metadata; 2469 } 2470 for (k = 0; k < CS_ETM_PRIV_MAX; k++) 2471 metadata[j][k] = ptr[i + k]; 2472 2473 /* The traceID is our handle */ 2474 idx = metadata[j][CS_ETM_ETMTRACEIDR]; 2475 i += CS_ETM_PRIV_MAX; 2476 } else if (ptr[i] == __perf_cs_etmv4_magic) { 2477 metadata[j] = zalloc(sizeof(*metadata[j]) * 2478 CS_ETMV4_PRIV_MAX); 2479 if (!metadata[j]) { 2480 err = -ENOMEM; 2481 goto err_free_metadata; 2482 } 2483 for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 2484 metadata[j][k] = ptr[i + k]; 2485 2486 /* The traceID is our handle */ 2487 idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 2488 i += CS_ETMV4_PRIV_MAX; 2489 } 2490 2491 /* Get an RB node for this CPU */ 2492 inode = intlist__findnew(traceid_list, idx); 2493 2494 /* Something went wrong, no need to continue */ 2495 if (!inode) { 2496 err = -ENOMEM; 2497 goto err_free_metadata; 2498 } 2499 2500 /* 2501 * The node for that CPU should not be taken. 2502 * Back out if that's the case. 2503 */ 2504 if (inode->priv) { 2505 err = -EINVAL; 2506 goto err_free_metadata; 2507 } 2508 /* All good, associate the traceID with the metadata pointer */ 2509 inode->priv = metadata[j]; 2510 } 2511 2512 /* 2513 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 2514 * CS_ETMV4_PRIV_MAX mark how many double words are in the 2515 * global metadata, and each cpu's metadata respectively. 2516 * The following tests if the correct number of double words was 2517 * present in the auxtrace info section. 2518 */ 2519 if (i * 8 != priv_size) { 2520 err = -EINVAL; 2521 goto err_free_metadata; 2522 } 2523 2524 etm = zalloc(sizeof(*etm)); 2525 2526 if (!etm) { 2527 err = -ENOMEM; 2528 goto err_free_metadata; 2529 } 2530 2531 err = auxtrace_queues__init(&etm->queues); 2532 if (err) 2533 goto err_free_etm; 2534 2535 etm->session = session; 2536 etm->machine = &session->machines.host; 2537 2538 etm->num_cpu = num_cpu; 2539 etm->pmu_type = pmu_type; 2540 etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); 2541 etm->metadata = metadata; 2542 etm->auxtrace_type = auxtrace_info->type; 2543 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); 2544 2545 etm->auxtrace.process_event = cs_etm__process_event; 2546 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 2547 etm->auxtrace.flush_events = cs_etm__flush_events; 2548 etm->auxtrace.free_events = cs_etm__free_events; 2549 etm->auxtrace.free = cs_etm__free; 2550 session->auxtrace = &etm->auxtrace; 2551 2552 etm->unknown_thread = thread__new(999999999, 999999999); 2553 if (!etm->unknown_thread) { 2554 err = -ENOMEM; 2555 goto err_free_queues; 2556 } 2557 2558 /* 2559 * Initialize list node so that at thread__zput() we can avoid 2560 * segmentation fault at list_del_init(). 2561 */ 2562 INIT_LIST_HEAD(&etm->unknown_thread->node); 2563 2564 err = thread__set_comm(etm->unknown_thread, "unknown", 0); 2565 if (err) 2566 goto err_delete_thread; 2567 2568 if (thread__init_map_groups(etm->unknown_thread, etm->machine)) { 2569 err = -ENOMEM; 2570 goto err_delete_thread; 2571 } 2572 2573 if (dump_trace) { 2574 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 2575 return 0; 2576 } 2577 2578 if (session->itrace_synth_opts->set) { 2579 etm->synth_opts = *session->itrace_synth_opts; 2580 } else { 2581 itrace_synth_opts__set_default(&etm->synth_opts, 2582 session->itrace_synth_opts->default_no_sample); 2583 etm->synth_opts.callchain = false; 2584 } 2585 2586 err = cs_etm__synth_events(etm, session); 2587 if (err) 2588 goto err_delete_thread; 2589 2590 err = auxtrace_queues__process_index(&etm->queues, session); 2591 if (err) 2592 goto err_delete_thread; 2593 2594 etm->data_queued = etm->queues.populated; 2595 2596 return 0; 2597 2598 err_delete_thread: 2599 thread__zput(etm->unknown_thread); 2600 err_free_queues: 2601 auxtrace_queues__free(&etm->queues); 2602 session->auxtrace = NULL; 2603 err_free_etm: 2604 zfree(&etm); 2605 err_free_metadata: 2606 /* No need to check @metadata[j], free(NULL) is supported */ 2607 for (j = 0; j < num_cpu; j++) 2608 zfree(&metadata[j]); 2609 zfree(&metadata); 2610 err_free_traceid_list: 2611 intlist__delete(traceid_list); 2612 err_free_hdr: 2613 zfree(&hdr); 2614 2615 return err; 2616 } 2617