1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/err.h> 11 #include <linux/kernel.h> 12 #include <linux/log2.h> 13 #include <linux/types.h> 14 15 #include <opencsd/ocsd_if_types.h> 16 #include <stdlib.h> 17 18 #include "auxtrace.h" 19 #include "color.h" 20 #include "cs-etm.h" 21 #include "cs-etm-decoder/cs-etm-decoder.h" 22 #include "debug.h" 23 #include "evlist.h" 24 #include "intlist.h" 25 #include "machine.h" 26 #include "map.h" 27 #include "perf.h" 28 #include "symbol.h" 29 #include "thread.h" 30 #include "thread_map.h" 31 #include "thread-stack.h" 32 #include "util.h" 33 34 #define MAX_TIMESTAMP (~0ULL) 35 36 struct cs_etm_auxtrace { 37 struct auxtrace auxtrace; 38 struct auxtrace_queues queues; 39 struct auxtrace_heap heap; 40 struct itrace_synth_opts synth_opts; 41 struct perf_session *session; 42 struct machine *machine; 43 struct thread *unknown_thread; 44 45 u8 timeless_decoding; 46 u8 snapshot_mode; 47 u8 data_queued; 48 u8 sample_branches; 49 u8 sample_instructions; 50 51 int num_cpu; 52 u32 auxtrace_type; 53 u64 branches_sample_type; 54 u64 branches_id; 55 u64 instructions_sample_type; 56 u64 instructions_sample_period; 57 u64 instructions_id; 58 u64 **metadata; 59 u64 kernel_start; 60 unsigned int pmu_type; 61 }; 62 63 struct cs_etm_queue { 64 struct cs_etm_auxtrace *etm; 65 struct thread *thread; 66 struct cs_etm_decoder *decoder; 67 struct auxtrace_buffer *buffer; 68 union perf_event *event_buf; 69 unsigned int queue_nr; 70 pid_t pid, tid; 71 int cpu; 72 u64 offset; 73 u64 period_instructions; 74 struct branch_stack *last_branch; 75 struct branch_stack *last_branch_rb; 76 size_t last_branch_pos; 77 struct cs_etm_packet *prev_packet; 78 struct cs_etm_packet *packet; 79 const unsigned char *buf; 80 size_t buf_len, buf_used; 81 }; 82 83 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 84 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 85 pid_t tid); 86 87 /* PTMs ETMIDR [11:8] set to b0011 */ 88 #define ETMIDR_PTM_VERSION 0x00000300 89 90 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 91 { 92 etmidr &= ETMIDR_PTM_VERSION; 93 94 if (etmidr == ETMIDR_PTM_VERSION) 95 return CS_ETM_PROTO_PTM; 96 97 return CS_ETM_PROTO_ETMV3; 98 } 99 100 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 101 { 102 struct int_node *inode; 103 u64 *metadata; 104 105 inode = intlist__find(traceid_list, trace_chan_id); 106 if (!inode) 107 return -EINVAL; 108 109 metadata = inode->priv; 110 *magic = metadata[CS_ETM_MAGIC]; 111 return 0; 112 } 113 114 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 115 { 116 struct int_node *inode; 117 u64 *metadata; 118 119 inode = intlist__find(traceid_list, trace_chan_id); 120 if (!inode) 121 return -EINVAL; 122 123 metadata = inode->priv; 124 *cpu = (int)metadata[CS_ETM_CPU]; 125 return 0; 126 } 127 128 static void cs_etm__packet_dump(const char *pkt_string) 129 { 130 const char *color = PERF_COLOR_BLUE; 131 int len = strlen(pkt_string); 132 133 if (len && (pkt_string[len-1] == '\n')) 134 color_fprintf(stdout, color, " %s", pkt_string); 135 else 136 color_fprintf(stdout, color, " %s\n", pkt_string); 137 138 fflush(stdout); 139 } 140 141 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 142 struct cs_etm_auxtrace *etm, int idx, 143 u32 etmidr) 144 { 145 u64 **metadata = etm->metadata; 146 147 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 148 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; 149 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; 150 } 151 152 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 153 struct cs_etm_auxtrace *etm, int idx) 154 { 155 u64 **metadata = etm->metadata; 156 157 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; 158 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; 159 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; 160 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; 161 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; 162 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; 163 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; 164 } 165 166 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 167 struct cs_etm_auxtrace *etm) 168 { 169 int i; 170 u32 etmidr; 171 u64 architecture; 172 173 for (i = 0; i < etm->num_cpu; i++) { 174 architecture = etm->metadata[i][CS_ETM_MAGIC]; 175 176 switch (architecture) { 177 case __perf_cs_etmv3_magic: 178 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; 179 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); 180 break; 181 case __perf_cs_etmv4_magic: 182 cs_etm__set_trace_param_etmv4(t_params, etm, i); 183 break; 184 default: 185 return -EINVAL; 186 } 187 } 188 189 return 0; 190 } 191 192 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 193 struct cs_etm_queue *etmq, 194 enum cs_etm_decoder_operation mode) 195 { 196 int ret = -EINVAL; 197 198 if (!(mode < CS_ETM_OPERATION_MAX)) 199 goto out; 200 201 d_params->packet_printer = cs_etm__packet_dump; 202 d_params->operation = mode; 203 d_params->data = etmq; 204 d_params->formatted = true; 205 d_params->fsyncs = false; 206 d_params->hsyncs = false; 207 d_params->frame_aligned = true; 208 209 ret = 0; 210 out: 211 return ret; 212 } 213 214 static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 215 struct auxtrace_buffer *buffer) 216 { 217 int ret; 218 const char *color = PERF_COLOR_BLUE; 219 struct cs_etm_decoder_params d_params; 220 struct cs_etm_trace_params *t_params; 221 struct cs_etm_decoder *decoder; 222 size_t buffer_used = 0; 223 224 fprintf(stdout, "\n"); 225 color_fprintf(stdout, color, 226 ". ... CoreSight ETM Trace data: size %zu bytes\n", 227 buffer->size); 228 229 /* Use metadata to fill in trace parameters for trace decoder */ 230 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 231 232 if (!t_params) 233 return; 234 235 if (cs_etm__init_trace_params(t_params, etm)) 236 goto out_free; 237 238 /* Set decoder parameters to simply print the trace packets */ 239 if (cs_etm__init_decoder_params(&d_params, NULL, 240 CS_ETM_OPERATION_PRINT)) 241 goto out_free; 242 243 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 244 245 if (!decoder) 246 goto out_free; 247 do { 248 size_t consumed; 249 250 ret = cs_etm_decoder__process_data_block( 251 decoder, buffer->offset, 252 &((u8 *)buffer->data)[buffer_used], 253 buffer->size - buffer_used, &consumed); 254 if (ret) 255 break; 256 257 buffer_used += consumed; 258 } while (buffer_used < buffer->size); 259 260 cs_etm_decoder__free(decoder); 261 262 out_free: 263 zfree(&t_params); 264 } 265 266 static int cs_etm__flush_events(struct perf_session *session, 267 struct perf_tool *tool) 268 { 269 int ret; 270 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 271 struct cs_etm_auxtrace, 272 auxtrace); 273 if (dump_trace) 274 return 0; 275 276 if (!tool->ordered_events) 277 return -EINVAL; 278 279 if (!etm->timeless_decoding) 280 return -EINVAL; 281 282 ret = cs_etm__update_queues(etm); 283 284 if (ret < 0) 285 return ret; 286 287 return cs_etm__process_timeless_queues(etm, -1); 288 } 289 290 static void cs_etm__free_queue(void *priv) 291 { 292 struct cs_etm_queue *etmq = priv; 293 294 if (!etmq) 295 return; 296 297 thread__zput(etmq->thread); 298 cs_etm_decoder__free(etmq->decoder); 299 zfree(&etmq->event_buf); 300 zfree(&etmq->last_branch); 301 zfree(&etmq->last_branch_rb); 302 zfree(&etmq->prev_packet); 303 zfree(&etmq->packet); 304 free(etmq); 305 } 306 307 static void cs_etm__free_events(struct perf_session *session) 308 { 309 unsigned int i; 310 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 311 struct cs_etm_auxtrace, 312 auxtrace); 313 struct auxtrace_queues *queues = &aux->queues; 314 315 for (i = 0; i < queues->nr_queues; i++) { 316 cs_etm__free_queue(queues->queue_array[i].priv); 317 queues->queue_array[i].priv = NULL; 318 } 319 320 auxtrace_queues__free(queues); 321 } 322 323 static void cs_etm__free(struct perf_session *session) 324 { 325 int i; 326 struct int_node *inode, *tmp; 327 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 328 struct cs_etm_auxtrace, 329 auxtrace); 330 cs_etm__free_events(session); 331 session->auxtrace = NULL; 332 333 /* First remove all traceID/metadata nodes for the RB tree */ 334 intlist__for_each_entry_safe(inode, tmp, traceid_list) 335 intlist__remove(traceid_list, inode); 336 /* Then the RB tree itself */ 337 intlist__delete(traceid_list); 338 339 for (i = 0; i < aux->num_cpu; i++) 340 zfree(&aux->metadata[i]); 341 342 thread__zput(aux->unknown_thread); 343 zfree(&aux->metadata); 344 zfree(&aux); 345 } 346 347 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) 348 { 349 struct machine *machine; 350 351 machine = etmq->etm->machine; 352 353 if (address >= etmq->etm->kernel_start) { 354 if (machine__is_host(machine)) 355 return PERF_RECORD_MISC_KERNEL; 356 else 357 return PERF_RECORD_MISC_GUEST_KERNEL; 358 } else { 359 if (machine__is_host(machine)) 360 return PERF_RECORD_MISC_USER; 361 else if (perf_guest) 362 return PERF_RECORD_MISC_GUEST_USER; 363 else 364 return PERF_RECORD_MISC_HYPERVISOR; 365 } 366 } 367 368 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, 369 size_t size, u8 *buffer) 370 { 371 u8 cpumode; 372 u64 offset; 373 int len; 374 struct thread *thread; 375 struct machine *machine; 376 struct addr_location al; 377 378 if (!etmq) 379 return 0; 380 381 machine = etmq->etm->machine; 382 cpumode = cs_etm__cpu_mode(etmq, address); 383 384 thread = etmq->thread; 385 if (!thread) { 386 if (cpumode != PERF_RECORD_MISC_KERNEL) 387 return 0; 388 thread = etmq->etm->unknown_thread; 389 } 390 391 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) 392 return 0; 393 394 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 395 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) 396 return 0; 397 398 offset = al.map->map_ip(al.map, address); 399 400 map__load(al.map); 401 402 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); 403 404 if (len <= 0) 405 return 0; 406 407 return len; 408 } 409 410 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) 411 { 412 struct cs_etm_decoder_params d_params; 413 struct cs_etm_trace_params *t_params = NULL; 414 struct cs_etm_queue *etmq; 415 size_t szp = sizeof(struct cs_etm_packet); 416 417 etmq = zalloc(sizeof(*etmq)); 418 if (!etmq) 419 return NULL; 420 421 etmq->packet = zalloc(szp); 422 if (!etmq->packet) 423 goto out_free; 424 425 if (etm->synth_opts.last_branch || etm->sample_branches) { 426 etmq->prev_packet = zalloc(szp); 427 if (!etmq->prev_packet) 428 goto out_free; 429 } 430 431 if (etm->synth_opts.last_branch) { 432 size_t sz = sizeof(struct branch_stack); 433 434 sz += etm->synth_opts.last_branch_sz * 435 sizeof(struct branch_entry); 436 etmq->last_branch = zalloc(sz); 437 if (!etmq->last_branch) 438 goto out_free; 439 etmq->last_branch_rb = zalloc(sz); 440 if (!etmq->last_branch_rb) 441 goto out_free; 442 } 443 444 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 445 if (!etmq->event_buf) 446 goto out_free; 447 448 /* Use metadata to fill in trace parameters for trace decoder */ 449 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 450 451 if (!t_params) 452 goto out_free; 453 454 if (cs_etm__init_trace_params(t_params, etm)) 455 goto out_free; 456 457 /* Set decoder parameters to decode trace packets */ 458 if (cs_etm__init_decoder_params(&d_params, etmq, 459 CS_ETM_OPERATION_DECODE)) 460 goto out_free; 461 462 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 463 464 if (!etmq->decoder) 465 goto out_free; 466 467 /* 468 * Register a function to handle all memory accesses required by 469 * the trace decoder library. 470 */ 471 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 472 0x0L, ((u64) -1L), 473 cs_etm__mem_access)) 474 goto out_free_decoder; 475 476 zfree(&t_params); 477 return etmq; 478 479 out_free_decoder: 480 cs_etm_decoder__free(etmq->decoder); 481 out_free: 482 zfree(&t_params); 483 zfree(&etmq->event_buf); 484 zfree(&etmq->last_branch); 485 zfree(&etmq->last_branch_rb); 486 zfree(&etmq->prev_packet); 487 zfree(&etmq->packet); 488 free(etmq); 489 490 return NULL; 491 } 492 493 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 494 struct auxtrace_queue *queue, 495 unsigned int queue_nr) 496 { 497 int ret = 0; 498 struct cs_etm_queue *etmq = queue->priv; 499 500 if (list_empty(&queue->head) || etmq) 501 goto out; 502 503 etmq = cs_etm__alloc_queue(etm); 504 505 if (!etmq) { 506 ret = -ENOMEM; 507 goto out; 508 } 509 510 queue->priv = etmq; 511 etmq->etm = etm; 512 etmq->queue_nr = queue_nr; 513 etmq->cpu = queue->cpu; 514 etmq->tid = queue->tid; 515 etmq->pid = -1; 516 etmq->offset = 0; 517 etmq->period_instructions = 0; 518 519 out: 520 return ret; 521 } 522 523 static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 524 { 525 unsigned int i; 526 int ret; 527 528 if (!etm->kernel_start) 529 etm->kernel_start = machine__kernel_start(etm->machine); 530 531 for (i = 0; i < etm->queues.nr_queues; i++) { 532 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 533 if (ret) 534 return ret; 535 } 536 537 return 0; 538 } 539 540 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) 541 { 542 if (etm->queues.new_data) { 543 etm->queues.new_data = false; 544 return cs_etm__setup_queues(etm); 545 } 546 547 return 0; 548 } 549 550 static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) 551 { 552 struct branch_stack *bs_src = etmq->last_branch_rb; 553 struct branch_stack *bs_dst = etmq->last_branch; 554 size_t nr = 0; 555 556 /* 557 * Set the number of records before early exit: ->nr is used to 558 * determine how many branches to copy from ->entries. 559 */ 560 bs_dst->nr = bs_src->nr; 561 562 /* 563 * Early exit when there is nothing to copy. 564 */ 565 if (!bs_src->nr) 566 return; 567 568 /* 569 * As bs_src->entries is a circular buffer, we need to copy from it in 570 * two steps. First, copy the branches from the most recently inserted 571 * branch ->last_branch_pos until the end of bs_src->entries buffer. 572 */ 573 nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; 574 memcpy(&bs_dst->entries[0], 575 &bs_src->entries[etmq->last_branch_pos], 576 sizeof(struct branch_entry) * nr); 577 578 /* 579 * If we wrapped around at least once, the branches from the beginning 580 * of the bs_src->entries buffer and until the ->last_branch_pos element 581 * are older valid branches: copy them over. The total number of 582 * branches copied over will be equal to the number of branches asked by 583 * the user in last_branch_sz. 584 */ 585 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 586 memcpy(&bs_dst->entries[nr], 587 &bs_src->entries[0], 588 sizeof(struct branch_entry) * etmq->last_branch_pos); 589 } 590 } 591 592 static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) 593 { 594 etmq->last_branch_pos = 0; 595 etmq->last_branch_rb->nr = 0; 596 } 597 598 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 599 u64 addr) { 600 u8 instrBytes[2]; 601 602 cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); 603 /* 604 * T32 instruction size is indicated by bits[15:11] of the first 605 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 606 * denote a 32-bit instruction. 607 */ 608 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 609 } 610 611 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 612 { 613 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 614 if (packet->sample_type == CS_ETM_DISCONTINUITY) 615 return 0; 616 617 return packet->start_addr; 618 } 619 620 static inline 621 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 622 { 623 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 624 if (packet->sample_type == CS_ETM_DISCONTINUITY) 625 return 0; 626 627 return packet->end_addr - packet->last_instr_size; 628 } 629 630 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 631 const struct cs_etm_packet *packet, 632 u64 offset) 633 { 634 if (packet->isa == CS_ETM_ISA_T32) { 635 u64 addr = packet->start_addr; 636 637 while (offset > 0) { 638 addr += cs_etm__t32_instr_size(etmq, addr); 639 offset--; 640 } 641 return addr; 642 } 643 644 /* Assume a 4 byte instruction size (A32/A64) */ 645 return packet->start_addr + offset * 4; 646 } 647 648 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) 649 { 650 struct branch_stack *bs = etmq->last_branch_rb; 651 struct branch_entry *be; 652 653 /* 654 * The branches are recorded in a circular buffer in reverse 655 * chronological order: we start recording from the last element of the 656 * buffer down. After writing the first element of the stack, move the 657 * insert position back to the end of the buffer. 658 */ 659 if (!etmq->last_branch_pos) 660 etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 661 662 etmq->last_branch_pos -= 1; 663 664 be = &bs->entries[etmq->last_branch_pos]; 665 be->from = cs_etm__last_executed_instr(etmq->prev_packet); 666 be->to = cs_etm__first_executed_instr(etmq->packet); 667 /* No support for mispredict */ 668 be->flags.mispred = 0; 669 be->flags.predicted = 1; 670 671 /* 672 * Increment bs->nr until reaching the number of last branches asked by 673 * the user on the command line. 674 */ 675 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 676 bs->nr += 1; 677 } 678 679 static int cs_etm__inject_event(union perf_event *event, 680 struct perf_sample *sample, u64 type) 681 { 682 event->header.size = perf_event__sample_event_size(sample, type, 0); 683 return perf_event__synthesize_sample(event, type, 0, sample); 684 } 685 686 687 static int 688 cs_etm__get_trace(struct cs_etm_queue *etmq) 689 { 690 struct auxtrace_buffer *aux_buffer = etmq->buffer; 691 struct auxtrace_buffer *old_buffer = aux_buffer; 692 struct auxtrace_queue *queue; 693 694 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 695 696 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 697 698 /* If no more data, drop the previous auxtrace_buffer and return */ 699 if (!aux_buffer) { 700 if (old_buffer) 701 auxtrace_buffer__drop_data(old_buffer); 702 etmq->buf_len = 0; 703 return 0; 704 } 705 706 etmq->buffer = aux_buffer; 707 708 /* If the aux_buffer doesn't have data associated, try to load it */ 709 if (!aux_buffer->data) { 710 /* get the file desc associated with the perf data file */ 711 int fd = perf_data__fd(etmq->etm->session->data); 712 713 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 714 if (!aux_buffer->data) 715 return -ENOMEM; 716 } 717 718 /* If valid, drop the previous buffer */ 719 if (old_buffer) 720 auxtrace_buffer__drop_data(old_buffer); 721 722 etmq->buf_used = 0; 723 etmq->buf_len = aux_buffer->size; 724 etmq->buf = aux_buffer->data; 725 726 return etmq->buf_len; 727 } 728 729 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 730 struct auxtrace_queue *queue) 731 { 732 struct cs_etm_queue *etmq = queue->priv; 733 734 /* CPU-wide tracing isn't supported yet */ 735 if (queue->tid == -1) 736 return; 737 738 if ((!etmq->thread) && (etmq->tid != -1)) 739 etmq->thread = machine__find_thread(etm->machine, -1, 740 etmq->tid); 741 742 if (etmq->thread) { 743 etmq->pid = etmq->thread->pid_; 744 if (queue->cpu == -1) 745 etmq->cpu = etmq->thread->cpu; 746 } 747 } 748 749 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 750 u64 addr, u64 period) 751 { 752 int ret = 0; 753 struct cs_etm_auxtrace *etm = etmq->etm; 754 union perf_event *event = etmq->event_buf; 755 struct perf_sample sample = {.ip = 0,}; 756 757 event->sample.header.type = PERF_RECORD_SAMPLE; 758 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); 759 event->sample.header.size = sizeof(struct perf_event_header); 760 761 sample.ip = addr; 762 sample.pid = etmq->pid; 763 sample.tid = etmq->tid; 764 sample.id = etmq->etm->instructions_id; 765 sample.stream_id = etmq->etm->instructions_id; 766 sample.period = period; 767 sample.cpu = etmq->packet->cpu; 768 sample.flags = etmq->prev_packet->flags; 769 sample.insn_len = 1; 770 sample.cpumode = event->sample.header.misc; 771 772 if (etm->synth_opts.last_branch) { 773 cs_etm__copy_last_branch_rb(etmq); 774 sample.branch_stack = etmq->last_branch; 775 } 776 777 if (etm->synth_opts.inject) { 778 ret = cs_etm__inject_event(event, &sample, 779 etm->instructions_sample_type); 780 if (ret) 781 return ret; 782 } 783 784 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 785 786 if (ret) 787 pr_err( 788 "CS ETM Trace: failed to deliver instruction event, error %d\n", 789 ret); 790 791 if (etm->synth_opts.last_branch) 792 cs_etm__reset_last_branch_rb(etmq); 793 794 return ret; 795 } 796 797 /* 798 * The cs etm packet encodes an instruction range between a branch target 799 * and the next taken branch. Generate sample accordingly. 800 */ 801 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) 802 { 803 int ret = 0; 804 struct cs_etm_auxtrace *etm = etmq->etm; 805 struct perf_sample sample = {.ip = 0,}; 806 union perf_event *event = etmq->event_buf; 807 struct dummy_branch_stack { 808 u64 nr; 809 struct branch_entry entries; 810 } dummy_bs; 811 u64 ip; 812 813 ip = cs_etm__last_executed_instr(etmq->prev_packet); 814 815 event->sample.header.type = PERF_RECORD_SAMPLE; 816 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 817 event->sample.header.size = sizeof(struct perf_event_header); 818 819 sample.ip = ip; 820 sample.pid = etmq->pid; 821 sample.tid = etmq->tid; 822 sample.addr = cs_etm__first_executed_instr(etmq->packet); 823 sample.id = etmq->etm->branches_id; 824 sample.stream_id = etmq->etm->branches_id; 825 sample.period = 1; 826 sample.cpu = etmq->packet->cpu; 827 sample.flags = etmq->prev_packet->flags; 828 sample.cpumode = event->sample.header.misc; 829 830 /* 831 * perf report cannot handle events without a branch stack 832 */ 833 if (etm->synth_opts.last_branch) { 834 dummy_bs = (struct dummy_branch_stack){ 835 .nr = 1, 836 .entries = { 837 .from = sample.ip, 838 .to = sample.addr, 839 }, 840 }; 841 sample.branch_stack = (struct branch_stack *)&dummy_bs; 842 } 843 844 if (etm->synth_opts.inject) { 845 ret = cs_etm__inject_event(event, &sample, 846 etm->branches_sample_type); 847 if (ret) 848 return ret; 849 } 850 851 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 852 853 if (ret) 854 pr_err( 855 "CS ETM Trace: failed to deliver instruction event, error %d\n", 856 ret); 857 858 return ret; 859 } 860 861 struct cs_etm_synth { 862 struct perf_tool dummy_tool; 863 struct perf_session *session; 864 }; 865 866 static int cs_etm__event_synth(struct perf_tool *tool, 867 union perf_event *event, 868 struct perf_sample *sample __maybe_unused, 869 struct machine *machine __maybe_unused) 870 { 871 struct cs_etm_synth *cs_etm_synth = 872 container_of(tool, struct cs_etm_synth, dummy_tool); 873 874 return perf_session__deliver_synth_event(cs_etm_synth->session, 875 event, NULL); 876 } 877 878 static int cs_etm__synth_event(struct perf_session *session, 879 struct perf_event_attr *attr, u64 id) 880 { 881 struct cs_etm_synth cs_etm_synth; 882 883 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 884 cs_etm_synth.session = session; 885 886 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 887 &id, cs_etm__event_synth); 888 } 889 890 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 891 struct perf_session *session) 892 { 893 struct perf_evlist *evlist = session->evlist; 894 struct perf_evsel *evsel; 895 struct perf_event_attr attr; 896 bool found = false; 897 u64 id; 898 int err; 899 900 evlist__for_each_entry(evlist, evsel) { 901 if (evsel->attr.type == etm->pmu_type) { 902 found = true; 903 break; 904 } 905 } 906 907 if (!found) { 908 pr_debug("No selected events with CoreSight Trace data\n"); 909 return 0; 910 } 911 912 memset(&attr, 0, sizeof(struct perf_event_attr)); 913 attr.size = sizeof(struct perf_event_attr); 914 attr.type = PERF_TYPE_HARDWARE; 915 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; 916 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 917 PERF_SAMPLE_PERIOD; 918 if (etm->timeless_decoding) 919 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 920 else 921 attr.sample_type |= PERF_SAMPLE_TIME; 922 923 attr.exclude_user = evsel->attr.exclude_user; 924 attr.exclude_kernel = evsel->attr.exclude_kernel; 925 attr.exclude_hv = evsel->attr.exclude_hv; 926 attr.exclude_host = evsel->attr.exclude_host; 927 attr.exclude_guest = evsel->attr.exclude_guest; 928 attr.sample_id_all = evsel->attr.sample_id_all; 929 attr.read_format = evsel->attr.read_format; 930 931 /* create new id val to be a fixed offset from evsel id */ 932 id = evsel->id[0] + 1000000000; 933 934 if (!id) 935 id = 1; 936 937 if (etm->synth_opts.branches) { 938 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 939 attr.sample_period = 1; 940 attr.sample_type |= PERF_SAMPLE_ADDR; 941 err = cs_etm__synth_event(session, &attr, id); 942 if (err) 943 return err; 944 etm->sample_branches = true; 945 etm->branches_sample_type = attr.sample_type; 946 etm->branches_id = id; 947 id += 1; 948 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 949 } 950 951 if (etm->synth_opts.last_branch) 952 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 953 954 if (etm->synth_opts.instructions) { 955 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 956 attr.sample_period = etm->synth_opts.period; 957 etm->instructions_sample_period = attr.sample_period; 958 err = cs_etm__synth_event(session, &attr, id); 959 if (err) 960 return err; 961 etm->sample_instructions = true; 962 etm->instructions_sample_type = attr.sample_type; 963 etm->instructions_id = id; 964 id += 1; 965 } 966 967 return 0; 968 } 969 970 static int cs_etm__sample(struct cs_etm_queue *etmq) 971 { 972 struct cs_etm_auxtrace *etm = etmq->etm; 973 struct cs_etm_packet *tmp; 974 int ret; 975 u64 instrs_executed = etmq->packet->instr_count; 976 977 etmq->period_instructions += instrs_executed; 978 979 /* 980 * Record a branch when the last instruction in 981 * PREV_PACKET is a branch. 982 */ 983 if (etm->synth_opts.last_branch && 984 etmq->prev_packet && 985 etmq->prev_packet->sample_type == CS_ETM_RANGE && 986 etmq->prev_packet->last_instr_taken_branch) 987 cs_etm__update_last_branch_rb(etmq); 988 989 if (etm->sample_instructions && 990 etmq->period_instructions >= etm->instructions_sample_period) { 991 /* 992 * Emit instruction sample periodically 993 * TODO: allow period to be defined in cycles and clock time 994 */ 995 996 /* Get number of instructions executed after the sample point */ 997 u64 instrs_over = etmq->period_instructions - 998 etm->instructions_sample_period; 999 1000 /* 1001 * Calculate the address of the sampled instruction (-1 as 1002 * sample is reported as though instruction has just been 1003 * executed, but PC has not advanced to next instruction) 1004 */ 1005 u64 offset = (instrs_executed - instrs_over - 1); 1006 u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); 1007 1008 ret = cs_etm__synth_instruction_sample( 1009 etmq, addr, etm->instructions_sample_period); 1010 if (ret) 1011 return ret; 1012 1013 /* Carry remaining instructions into next sample period */ 1014 etmq->period_instructions = instrs_over; 1015 } 1016 1017 if (etm->sample_branches && etmq->prev_packet) { 1018 bool generate_sample = false; 1019 1020 /* Generate sample for tracing on packet */ 1021 if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1022 generate_sample = true; 1023 1024 /* Generate sample for branch taken packet */ 1025 if (etmq->prev_packet->sample_type == CS_ETM_RANGE && 1026 etmq->prev_packet->last_instr_taken_branch) 1027 generate_sample = true; 1028 1029 if (generate_sample) { 1030 ret = cs_etm__synth_branch_sample(etmq); 1031 if (ret) 1032 return ret; 1033 } 1034 } 1035 1036 if (etm->sample_branches || etm->synth_opts.last_branch) { 1037 /* 1038 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1039 * the next incoming packet. 1040 */ 1041 tmp = etmq->packet; 1042 etmq->packet = etmq->prev_packet; 1043 etmq->prev_packet = tmp; 1044 } 1045 1046 return 0; 1047 } 1048 1049 static int cs_etm__exception(struct cs_etm_queue *etmq) 1050 { 1051 /* 1052 * When the exception packet is inserted, whether the last instruction 1053 * in previous range packet is taken branch or not, we need to force 1054 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1055 * to generate branch sample for the instruction range before the 1056 * exception is trapped to kernel or before the exception returning. 1057 * 1058 * The exception packet includes the dummy address values, so don't 1059 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1060 * for generating instruction and branch samples. 1061 */ 1062 if (etmq->prev_packet->sample_type == CS_ETM_RANGE) 1063 etmq->prev_packet->last_instr_taken_branch = true; 1064 1065 return 0; 1066 } 1067 1068 static int cs_etm__flush(struct cs_etm_queue *etmq) 1069 { 1070 int err = 0; 1071 struct cs_etm_auxtrace *etm = etmq->etm; 1072 struct cs_etm_packet *tmp; 1073 1074 if (!etmq->prev_packet) 1075 return 0; 1076 1077 /* Handle start tracing packet */ 1078 if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) 1079 goto swap_packet; 1080 1081 if (etmq->etm->synth_opts.last_branch && 1082 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1083 /* 1084 * Generate a last branch event for the branches left in the 1085 * circular buffer at the end of the trace. 1086 * 1087 * Use the address of the end of the last reported execution 1088 * range 1089 */ 1090 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1091 1092 err = cs_etm__synth_instruction_sample( 1093 etmq, addr, 1094 etmq->period_instructions); 1095 if (err) 1096 return err; 1097 1098 etmq->period_instructions = 0; 1099 1100 } 1101 1102 if (etm->sample_branches && 1103 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1104 err = cs_etm__synth_branch_sample(etmq); 1105 if (err) 1106 return err; 1107 } 1108 1109 swap_packet: 1110 if (etm->sample_branches || etm->synth_opts.last_branch) { 1111 /* 1112 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1113 * the next incoming packet. 1114 */ 1115 tmp = etmq->packet; 1116 etmq->packet = etmq->prev_packet; 1117 etmq->prev_packet = tmp; 1118 } 1119 1120 return err; 1121 } 1122 1123 static int cs_etm__end_block(struct cs_etm_queue *etmq) 1124 { 1125 int err; 1126 1127 /* 1128 * It has no new packet coming and 'etmq->packet' contains the stale 1129 * packet which was set at the previous time with packets swapping; 1130 * so skip to generate branch sample to avoid stale packet. 1131 * 1132 * For this case only flush branch stack and generate a last branch 1133 * event for the branches left in the circular buffer at the end of 1134 * the trace. 1135 */ 1136 if (etmq->etm->synth_opts.last_branch && 1137 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1138 /* 1139 * Use the address of the end of the last reported execution 1140 * range. 1141 */ 1142 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1143 1144 err = cs_etm__synth_instruction_sample( 1145 etmq, addr, 1146 etmq->period_instructions); 1147 if (err) 1148 return err; 1149 1150 etmq->period_instructions = 0; 1151 } 1152 1153 return 0; 1154 } 1155 /* 1156 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1157 * if need be. 1158 * Returns: < 0 if error 1159 * = 0 if no more auxtrace_buffer to read 1160 * > 0 if the current buffer isn't empty yet 1161 */ 1162 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1163 { 1164 int ret; 1165 1166 if (!etmq->buf_len) { 1167 ret = cs_etm__get_trace(etmq); 1168 if (ret <= 0) 1169 return ret; 1170 /* 1171 * We cannot assume consecutive blocks in the data file 1172 * are contiguous, reset the decoder to force re-sync. 1173 */ 1174 ret = cs_etm_decoder__reset(etmq->decoder); 1175 if (ret) 1176 return ret; 1177 } 1178 1179 return etmq->buf_len; 1180 } 1181 1182 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, 1183 struct cs_etm_packet *packet, 1184 u64 end_addr) 1185 { 1186 u16 instr16; 1187 u32 instr32; 1188 u64 addr; 1189 1190 switch (packet->isa) { 1191 case CS_ETM_ISA_T32: 1192 /* 1193 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1194 * 1195 * b'15 b'8 1196 * +-----------------+--------+ 1197 * | 1 1 0 1 1 1 1 1 | imm8 | 1198 * +-----------------+--------+ 1199 * 1200 * According to the specifiction, it only defines SVC for T32 1201 * with 16 bits instruction and has no definition for 32bits; 1202 * so below only read 2 bytes as instruction size for T32. 1203 */ 1204 addr = end_addr - 2; 1205 cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); 1206 if ((instr16 & 0xFF00) == 0xDF00) 1207 return true; 1208 1209 break; 1210 case CS_ETM_ISA_A32: 1211 /* 1212 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 1213 * 1214 * b'31 b'28 b'27 b'24 1215 * +---------+---------+-------------------------+ 1216 * | !1111 | 1 1 1 1 | imm24 | 1217 * +---------+---------+-------------------------+ 1218 */ 1219 addr = end_addr - 4; 1220 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1221 if ((instr32 & 0x0F000000) == 0x0F000000 && 1222 (instr32 & 0xF0000000) != 0xF0000000) 1223 return true; 1224 1225 break; 1226 case CS_ETM_ISA_A64: 1227 /* 1228 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 1229 * 1230 * b'31 b'21 b'4 b'0 1231 * +-----------------------+---------+-----------+ 1232 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 1233 * +-----------------------+---------+-----------+ 1234 */ 1235 addr = end_addr - 4; 1236 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1237 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1238 return true; 1239 1240 break; 1241 case CS_ETM_ISA_UNKNOWN: 1242 default: 1243 break; 1244 } 1245 1246 return false; 1247 } 1248 1249 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) 1250 { 1251 struct cs_etm_packet *packet = etmq->packet; 1252 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1253 1254 if (magic == __perf_cs_etmv3_magic) 1255 if (packet->exception_number == CS_ETMV3_EXC_SVC) 1256 return true; 1257 1258 /* 1259 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 1260 * HVC cases; need to check if it's SVC instruction based on 1261 * packet address. 1262 */ 1263 if (magic == __perf_cs_etmv4_magic) { 1264 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1265 cs_etm__is_svc_instr(etmq, prev_packet, 1266 prev_packet->end_addr)) 1267 return true; 1268 } 1269 1270 return false; 1271 } 1272 1273 static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) 1274 { 1275 struct cs_etm_packet *packet = etmq->packet; 1276 1277 if (magic == __perf_cs_etmv3_magic) 1278 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1279 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 1280 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 1281 packet->exception_number == CS_ETMV3_EXC_IRQ || 1282 packet->exception_number == CS_ETMV3_EXC_FIQ) 1283 return true; 1284 1285 if (magic == __perf_cs_etmv4_magic) 1286 if (packet->exception_number == CS_ETMV4_EXC_RESET || 1287 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 1288 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 1289 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 1290 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 1291 packet->exception_number == CS_ETMV4_EXC_IRQ || 1292 packet->exception_number == CS_ETMV4_EXC_FIQ) 1293 return true; 1294 1295 return false; 1296 } 1297 1298 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) 1299 { 1300 struct cs_etm_packet *packet = etmq->packet; 1301 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1302 1303 if (magic == __perf_cs_etmv3_magic) 1304 if (packet->exception_number == CS_ETMV3_EXC_SMC || 1305 packet->exception_number == CS_ETMV3_EXC_HYP || 1306 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 1307 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 1308 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 1309 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 1310 packet->exception_number == CS_ETMV3_EXC_GENERIC) 1311 return true; 1312 1313 if (magic == __perf_cs_etmv4_magic) { 1314 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 1315 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 1316 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 1317 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 1318 return true; 1319 1320 /* 1321 * For CS_ETMV4_EXC_CALL, except SVC other instructions 1322 * (SMC, HVC) are taken as sync exceptions. 1323 */ 1324 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1325 !cs_etm__is_svc_instr(etmq, prev_packet, 1326 prev_packet->end_addr)) 1327 return true; 1328 1329 /* 1330 * ETMv4 has 5 bits for exception number; if the numbers 1331 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 1332 * they are implementation defined exceptions. 1333 * 1334 * For this case, simply take it as sync exception. 1335 */ 1336 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 1337 packet->exception_number <= CS_ETMV4_EXC_END) 1338 return true; 1339 } 1340 1341 return false; 1342 } 1343 1344 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) 1345 { 1346 struct cs_etm_packet *packet = etmq->packet; 1347 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1348 u64 magic; 1349 int ret; 1350 1351 switch (packet->sample_type) { 1352 case CS_ETM_RANGE: 1353 /* 1354 * Immediate branch instruction without neither link nor 1355 * return flag, it's normal branch instruction within 1356 * the function. 1357 */ 1358 if (packet->last_instr_type == OCSD_INSTR_BR && 1359 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 1360 packet->flags = PERF_IP_FLAG_BRANCH; 1361 1362 if (packet->last_instr_cond) 1363 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 1364 } 1365 1366 /* 1367 * Immediate branch instruction with link (e.g. BL), this is 1368 * branch instruction for function call. 1369 */ 1370 if (packet->last_instr_type == OCSD_INSTR_BR && 1371 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1372 packet->flags = PERF_IP_FLAG_BRANCH | 1373 PERF_IP_FLAG_CALL; 1374 1375 /* 1376 * Indirect branch instruction with link (e.g. BLR), this is 1377 * branch instruction for function call. 1378 */ 1379 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1380 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1381 packet->flags = PERF_IP_FLAG_BRANCH | 1382 PERF_IP_FLAG_CALL; 1383 1384 /* 1385 * Indirect branch instruction with subtype of 1386 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 1387 * function return for A32/T32. 1388 */ 1389 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1390 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 1391 packet->flags = PERF_IP_FLAG_BRANCH | 1392 PERF_IP_FLAG_RETURN; 1393 1394 /* 1395 * Indirect branch instruction without link (e.g. BR), usually 1396 * this is used for function return, especially for functions 1397 * within dynamic link lib. 1398 */ 1399 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1400 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 1401 packet->flags = PERF_IP_FLAG_BRANCH | 1402 PERF_IP_FLAG_RETURN; 1403 1404 /* Return instruction for function return. */ 1405 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1406 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 1407 packet->flags = PERF_IP_FLAG_BRANCH | 1408 PERF_IP_FLAG_RETURN; 1409 1410 /* 1411 * Decoder might insert a discontinuity in the middle of 1412 * instruction packets, fixup prev_packet with flag 1413 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 1414 */ 1415 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1416 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1417 PERF_IP_FLAG_TRACE_BEGIN; 1418 1419 /* 1420 * If the previous packet is an exception return packet 1421 * and the return address just follows SVC instuction, 1422 * it needs to calibrate the previous packet sample flags 1423 * as PERF_IP_FLAG_SYSCALLRET. 1424 */ 1425 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1426 PERF_IP_FLAG_RETURN | 1427 PERF_IP_FLAG_INTERRUPT) && 1428 cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) 1429 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1430 PERF_IP_FLAG_RETURN | 1431 PERF_IP_FLAG_SYSCALLRET; 1432 break; 1433 case CS_ETM_DISCONTINUITY: 1434 /* 1435 * The trace is discontinuous, if the previous packet is 1436 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 1437 * for previous packet. 1438 */ 1439 if (prev_packet->sample_type == CS_ETM_RANGE) 1440 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1441 PERF_IP_FLAG_TRACE_END; 1442 break; 1443 case CS_ETM_EXCEPTION: 1444 ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 1445 if (ret) 1446 return ret; 1447 1448 /* The exception is for system call. */ 1449 if (cs_etm__is_syscall(etmq, magic)) 1450 packet->flags = PERF_IP_FLAG_BRANCH | 1451 PERF_IP_FLAG_CALL | 1452 PERF_IP_FLAG_SYSCALLRET; 1453 /* 1454 * The exceptions are triggered by external signals from bus, 1455 * interrupt controller, debug module, PE reset or halt. 1456 */ 1457 else if (cs_etm__is_async_exception(etmq, magic)) 1458 packet->flags = PERF_IP_FLAG_BRANCH | 1459 PERF_IP_FLAG_CALL | 1460 PERF_IP_FLAG_ASYNC | 1461 PERF_IP_FLAG_INTERRUPT; 1462 /* 1463 * Otherwise, exception is caused by trap, instruction & 1464 * data fault, or alignment errors. 1465 */ 1466 else if (cs_etm__is_sync_exception(etmq, magic)) 1467 packet->flags = PERF_IP_FLAG_BRANCH | 1468 PERF_IP_FLAG_CALL | 1469 PERF_IP_FLAG_INTERRUPT; 1470 1471 /* 1472 * When the exception packet is inserted, since exception 1473 * packet is not used standalone for generating samples 1474 * and it's affiliation to the previous instruction range 1475 * packet; so set previous range packet flags to tell perf 1476 * it is an exception taken branch. 1477 */ 1478 if (prev_packet->sample_type == CS_ETM_RANGE) 1479 prev_packet->flags = packet->flags; 1480 break; 1481 case CS_ETM_EXCEPTION_RET: 1482 /* 1483 * When the exception return packet is inserted, since 1484 * exception return packet is not used standalone for 1485 * generating samples and it's affiliation to the previous 1486 * instruction range packet; so set previous range packet 1487 * flags to tell perf it is an exception return branch. 1488 * 1489 * The exception return can be for either system call or 1490 * other exception types; unfortunately the packet doesn't 1491 * contain exception type related info so we cannot decide 1492 * the exception type purely based on exception return packet. 1493 * If we record the exception number from exception packet and 1494 * reuse it for excpetion return packet, this is not reliable 1495 * due the trace can be discontinuity or the interrupt can 1496 * be nested, thus the recorded exception number cannot be 1497 * used for exception return packet for these two cases. 1498 * 1499 * For exception return packet, we only need to distinguish the 1500 * packet is for system call or for other types. Thus the 1501 * decision can be deferred when receive the next packet which 1502 * contains the return address, based on the return address we 1503 * can read out the previous instruction and check if it's a 1504 * system call instruction and then calibrate the sample flag 1505 * as needed. 1506 */ 1507 if (prev_packet->sample_type == CS_ETM_RANGE) 1508 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1509 PERF_IP_FLAG_RETURN | 1510 PERF_IP_FLAG_INTERRUPT; 1511 break; 1512 case CS_ETM_EMPTY: 1513 default: 1514 break; 1515 } 1516 1517 return 0; 1518 } 1519 1520 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 1521 { 1522 int ret = 0; 1523 size_t processed = 0; 1524 1525 /* 1526 * Packets are decoded and added to the decoder's packet queue 1527 * until the decoder packet processing callback has requested that 1528 * processing stops or there is nothing left in the buffer. Normal 1529 * operations that stop processing are a timestamp packet or a full 1530 * decoder buffer queue. 1531 */ 1532 ret = cs_etm_decoder__process_data_block(etmq->decoder, 1533 etmq->offset, 1534 &etmq->buf[etmq->buf_used], 1535 etmq->buf_len, 1536 &processed); 1537 if (ret) 1538 goto out; 1539 1540 etmq->offset += processed; 1541 etmq->buf_used += processed; 1542 etmq->buf_len -= processed; 1543 1544 out: 1545 return ret; 1546 } 1547 1548 static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) 1549 { 1550 int ret; 1551 1552 /* Process each packet in this chunk */ 1553 while (1) { 1554 ret = cs_etm_decoder__get_packet(etmq->decoder, 1555 etmq->packet); 1556 if (ret <= 0) 1557 /* 1558 * Stop processing this chunk on 1559 * end of data or error 1560 */ 1561 break; 1562 1563 /* 1564 * Since packet addresses are swapped in packet 1565 * handling within below switch() statements, 1566 * thus setting sample flags must be called 1567 * prior to switch() statement to use address 1568 * information before packets swapping. 1569 */ 1570 ret = cs_etm__set_sample_flags(etmq); 1571 if (ret < 0) 1572 break; 1573 1574 switch (etmq->packet->sample_type) { 1575 case CS_ETM_RANGE: 1576 /* 1577 * If the packet contains an instruction 1578 * range, generate instruction sequence 1579 * events. 1580 */ 1581 cs_etm__sample(etmq); 1582 break; 1583 case CS_ETM_EXCEPTION: 1584 case CS_ETM_EXCEPTION_RET: 1585 /* 1586 * If the exception packet is coming, 1587 * make sure the previous instruction 1588 * range packet to be handled properly. 1589 */ 1590 cs_etm__exception(etmq); 1591 break; 1592 case CS_ETM_DISCONTINUITY: 1593 /* 1594 * Discontinuity in trace, flush 1595 * previous branch stack 1596 */ 1597 cs_etm__flush(etmq); 1598 break; 1599 case CS_ETM_EMPTY: 1600 /* 1601 * Should not receive empty packet, 1602 * report error. 1603 */ 1604 pr_err("CS ETM Trace: empty packet\n"); 1605 return -EINVAL; 1606 default: 1607 break; 1608 } 1609 } 1610 1611 return ret; 1612 } 1613 1614 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1615 { 1616 int err = 0; 1617 1618 /* Go through each buffer in the queue and decode them one by one */ 1619 while (1) { 1620 err = cs_etm__get_data_block(etmq); 1621 if (err <= 0) 1622 return err; 1623 1624 /* Run trace decoder until buffer consumed or end of trace */ 1625 do { 1626 err = cs_etm__decode_data_block(etmq); 1627 if (err) 1628 return err; 1629 1630 /* 1631 * Process each packet in this chunk, nothing to do if 1632 * an error occurs other than hoping the next one will 1633 * be better. 1634 */ 1635 err = cs_etm__process_decoder_queue(etmq); 1636 1637 } while (etmq->buf_len); 1638 1639 if (err == 0) 1640 /* Flush any remaining branch stack entries */ 1641 err = cs_etm__end_block(etmq); 1642 } 1643 1644 return err; 1645 } 1646 1647 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 1648 pid_t tid) 1649 { 1650 unsigned int i; 1651 struct auxtrace_queues *queues = &etm->queues; 1652 1653 for (i = 0; i < queues->nr_queues; i++) { 1654 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 1655 struct cs_etm_queue *etmq = queue->priv; 1656 1657 if (etmq && ((tid == -1) || (etmq->tid == tid))) { 1658 cs_etm__set_pid_tid_cpu(etm, queue); 1659 cs_etm__run_decoder(etmq); 1660 } 1661 } 1662 1663 return 0; 1664 } 1665 1666 static int cs_etm__process_event(struct perf_session *session, 1667 union perf_event *event, 1668 struct perf_sample *sample, 1669 struct perf_tool *tool) 1670 { 1671 int err = 0; 1672 u64 timestamp; 1673 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 1674 struct cs_etm_auxtrace, 1675 auxtrace); 1676 1677 if (dump_trace) 1678 return 0; 1679 1680 if (!tool->ordered_events) { 1681 pr_err("CoreSight ETM Trace requires ordered events\n"); 1682 return -EINVAL; 1683 } 1684 1685 if (!etm->timeless_decoding) 1686 return -EINVAL; 1687 1688 if (sample->time && (sample->time != (u64) -1)) 1689 timestamp = sample->time; 1690 else 1691 timestamp = 0; 1692 1693 if (timestamp || etm->timeless_decoding) { 1694 err = cs_etm__update_queues(etm); 1695 if (err) 1696 return err; 1697 } 1698 1699 if (event->header.type == PERF_RECORD_EXIT) 1700 return cs_etm__process_timeless_queues(etm, 1701 event->fork.tid); 1702 1703 return 0; 1704 } 1705 1706 static int cs_etm__process_auxtrace_event(struct perf_session *session, 1707 union perf_event *event, 1708 struct perf_tool *tool __maybe_unused) 1709 { 1710 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 1711 struct cs_etm_auxtrace, 1712 auxtrace); 1713 if (!etm->data_queued) { 1714 struct auxtrace_buffer *buffer; 1715 off_t data_offset; 1716 int fd = perf_data__fd(session->data); 1717 bool is_pipe = perf_data__is_pipe(session->data); 1718 int err; 1719 1720 if (is_pipe) 1721 data_offset = 0; 1722 else { 1723 data_offset = lseek(fd, 0, SEEK_CUR); 1724 if (data_offset == -1) 1725 return -errno; 1726 } 1727 1728 err = auxtrace_queues__add_event(&etm->queues, session, 1729 event, data_offset, &buffer); 1730 if (err) 1731 return err; 1732 1733 if (dump_trace) 1734 if (auxtrace_buffer__get_data(buffer, fd)) { 1735 cs_etm__dump_event(etm, buffer); 1736 auxtrace_buffer__put_data(buffer); 1737 } 1738 } 1739 1740 return 0; 1741 } 1742 1743 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) 1744 { 1745 struct perf_evsel *evsel; 1746 struct perf_evlist *evlist = etm->session->evlist; 1747 bool timeless_decoding = true; 1748 1749 /* 1750 * Circle through the list of event and complain if we find one 1751 * with the time bit set. 1752 */ 1753 evlist__for_each_entry(evlist, evsel) { 1754 if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) 1755 timeless_decoding = false; 1756 } 1757 1758 return timeless_decoding; 1759 } 1760 1761 static const char * const cs_etm_global_header_fmts[] = { 1762 [CS_HEADER_VERSION_0] = " Header version %llx\n", 1763 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 1764 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 1765 }; 1766 1767 static const char * const cs_etm_priv_fmts[] = { 1768 [CS_ETM_MAGIC] = " Magic number %llx\n", 1769 [CS_ETM_CPU] = " CPU %lld\n", 1770 [CS_ETM_ETMCR] = " ETMCR %llx\n", 1771 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 1772 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", 1773 [CS_ETM_ETMIDR] = " ETMIDR %llx\n", 1774 }; 1775 1776 static const char * const cs_etmv4_priv_fmts[] = { 1777 [CS_ETM_MAGIC] = " Magic number %llx\n", 1778 [CS_ETM_CPU] = " CPU %lld\n", 1779 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 1780 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 1781 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", 1782 [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", 1783 [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", 1784 [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", 1785 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 1786 }; 1787 1788 static void cs_etm__print_auxtrace_info(u64 *val, int num) 1789 { 1790 int i, j, cpu = 0; 1791 1792 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 1793 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 1794 1795 for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 1796 if (val[i] == __perf_cs_etmv3_magic) 1797 for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 1798 fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 1799 else if (val[i] == __perf_cs_etmv4_magic) 1800 for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 1801 fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 1802 else 1803 /* failure.. return */ 1804 return; 1805 } 1806 } 1807 1808 int cs_etm__process_auxtrace_info(union perf_event *event, 1809 struct perf_session *session) 1810 { 1811 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 1812 struct cs_etm_auxtrace *etm = NULL; 1813 struct int_node *inode; 1814 unsigned int pmu_type; 1815 int event_header_size = sizeof(struct perf_event_header); 1816 int info_header_size; 1817 int total_size = auxtrace_info->header.size; 1818 int priv_size = 0; 1819 int num_cpu; 1820 int err = 0, idx = -1; 1821 int i, j, k; 1822 u64 *ptr, *hdr = NULL; 1823 u64 **metadata = NULL; 1824 1825 /* 1826 * sizeof(auxtrace_info_event::type) + 1827 * sizeof(auxtrace_info_event::reserved) == 8 1828 */ 1829 info_header_size = 8; 1830 1831 if (total_size < (event_header_size + info_header_size)) 1832 return -EINVAL; 1833 1834 priv_size = total_size - event_header_size - info_header_size; 1835 1836 /* First the global part */ 1837 ptr = (u64 *) auxtrace_info->priv; 1838 1839 /* Look for version '0' of the header */ 1840 if (ptr[0] != 0) 1841 return -EINVAL; 1842 1843 hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 1844 if (!hdr) 1845 return -ENOMEM; 1846 1847 /* Extract header information - see cs-etm.h for format */ 1848 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 1849 hdr[i] = ptr[i]; 1850 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 1851 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 1852 0xffffffff); 1853 1854 /* 1855 * Create an RB tree for traceID-metadata tuple. Since the conversion 1856 * has to be made for each packet that gets decoded, optimizing access 1857 * in anything other than a sequential array is worth doing. 1858 */ 1859 traceid_list = intlist__new(NULL); 1860 if (!traceid_list) { 1861 err = -ENOMEM; 1862 goto err_free_hdr; 1863 } 1864 1865 metadata = zalloc(sizeof(*metadata) * num_cpu); 1866 if (!metadata) { 1867 err = -ENOMEM; 1868 goto err_free_traceid_list; 1869 } 1870 1871 /* 1872 * The metadata is stored in the auxtrace_info section and encodes 1873 * the configuration of the ARM embedded trace macrocell which is 1874 * required by the trace decoder to properly decode the trace due 1875 * to its highly compressed nature. 1876 */ 1877 for (j = 0; j < num_cpu; j++) { 1878 if (ptr[i] == __perf_cs_etmv3_magic) { 1879 metadata[j] = zalloc(sizeof(*metadata[j]) * 1880 CS_ETM_PRIV_MAX); 1881 if (!metadata[j]) { 1882 err = -ENOMEM; 1883 goto err_free_metadata; 1884 } 1885 for (k = 0; k < CS_ETM_PRIV_MAX; k++) 1886 metadata[j][k] = ptr[i + k]; 1887 1888 /* The traceID is our handle */ 1889 idx = metadata[j][CS_ETM_ETMTRACEIDR]; 1890 i += CS_ETM_PRIV_MAX; 1891 } else if (ptr[i] == __perf_cs_etmv4_magic) { 1892 metadata[j] = zalloc(sizeof(*metadata[j]) * 1893 CS_ETMV4_PRIV_MAX); 1894 if (!metadata[j]) { 1895 err = -ENOMEM; 1896 goto err_free_metadata; 1897 } 1898 for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 1899 metadata[j][k] = ptr[i + k]; 1900 1901 /* The traceID is our handle */ 1902 idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 1903 i += CS_ETMV4_PRIV_MAX; 1904 } 1905 1906 /* Get an RB node for this CPU */ 1907 inode = intlist__findnew(traceid_list, idx); 1908 1909 /* Something went wrong, no need to continue */ 1910 if (!inode) { 1911 err = PTR_ERR(inode); 1912 goto err_free_metadata; 1913 } 1914 1915 /* 1916 * The node for that CPU should not be taken. 1917 * Back out if that's the case. 1918 */ 1919 if (inode->priv) { 1920 err = -EINVAL; 1921 goto err_free_metadata; 1922 } 1923 /* All good, associate the traceID with the metadata pointer */ 1924 inode->priv = metadata[j]; 1925 } 1926 1927 /* 1928 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 1929 * CS_ETMV4_PRIV_MAX mark how many double words are in the 1930 * global metadata, and each cpu's metadata respectively. 1931 * The following tests if the correct number of double words was 1932 * present in the auxtrace info section. 1933 */ 1934 if (i * 8 != priv_size) { 1935 err = -EINVAL; 1936 goto err_free_metadata; 1937 } 1938 1939 etm = zalloc(sizeof(*etm)); 1940 1941 if (!etm) { 1942 err = -ENOMEM; 1943 goto err_free_metadata; 1944 } 1945 1946 err = auxtrace_queues__init(&etm->queues); 1947 if (err) 1948 goto err_free_etm; 1949 1950 etm->session = session; 1951 etm->machine = &session->machines.host; 1952 1953 etm->num_cpu = num_cpu; 1954 etm->pmu_type = pmu_type; 1955 etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); 1956 etm->metadata = metadata; 1957 etm->auxtrace_type = auxtrace_info->type; 1958 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); 1959 1960 etm->auxtrace.process_event = cs_etm__process_event; 1961 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 1962 etm->auxtrace.flush_events = cs_etm__flush_events; 1963 etm->auxtrace.free_events = cs_etm__free_events; 1964 etm->auxtrace.free = cs_etm__free; 1965 session->auxtrace = &etm->auxtrace; 1966 1967 etm->unknown_thread = thread__new(999999999, 999999999); 1968 if (!etm->unknown_thread) 1969 goto err_free_queues; 1970 1971 /* 1972 * Initialize list node so that at thread__zput() we can avoid 1973 * segmentation fault at list_del_init(). 1974 */ 1975 INIT_LIST_HEAD(&etm->unknown_thread->node); 1976 1977 err = thread__set_comm(etm->unknown_thread, "unknown", 0); 1978 if (err) 1979 goto err_delete_thread; 1980 1981 if (thread__init_map_groups(etm->unknown_thread, etm->machine)) 1982 goto err_delete_thread; 1983 1984 if (dump_trace) { 1985 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 1986 return 0; 1987 } 1988 1989 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1990 etm->synth_opts = *session->itrace_synth_opts; 1991 } else { 1992 itrace_synth_opts__set_default(&etm->synth_opts, 1993 session->itrace_synth_opts->default_no_sample); 1994 etm->synth_opts.callchain = false; 1995 } 1996 1997 err = cs_etm__synth_events(etm, session); 1998 if (err) 1999 goto err_delete_thread; 2000 2001 err = auxtrace_queues__process_index(&etm->queues, session); 2002 if (err) 2003 goto err_delete_thread; 2004 2005 etm->data_queued = etm->queues.populated; 2006 2007 return 0; 2008 2009 err_delete_thread: 2010 thread__zput(etm->unknown_thread); 2011 err_free_queues: 2012 auxtrace_queues__free(&etm->queues); 2013 session->auxtrace = NULL; 2014 err_free_etm: 2015 zfree(&etm); 2016 err_free_metadata: 2017 /* No need to check @metadata[j], free(NULL) is supported */ 2018 for (j = 0; j < num_cpu; j++) 2019 free(metadata[j]); 2020 zfree(&metadata); 2021 err_free_traceid_list: 2022 intlist__delete(traceid_list); 2023 err_free_hdr: 2024 zfree(&hdr); 2025 2026 return -EINVAL; 2027 } 2028