1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/err.h> 11 #include <linux/kernel.h> 12 #include <linux/log2.h> 13 #include <linux/types.h> 14 15 #include <opencsd/ocsd_if_types.h> 16 #include <stdlib.h> 17 18 #include "auxtrace.h" 19 #include "color.h" 20 #include "cs-etm.h" 21 #include "cs-etm-decoder/cs-etm-decoder.h" 22 #include "debug.h" 23 #include "evlist.h" 24 #include "intlist.h" 25 #include "machine.h" 26 #include "map.h" 27 #include "perf.h" 28 #include "symbol.h" 29 #include "thread.h" 30 #include "thread_map.h" 31 #include "thread-stack.h" 32 #include "util.h" 33 34 #define MAX_TIMESTAMP (~0ULL) 35 36 struct cs_etm_auxtrace { 37 struct auxtrace auxtrace; 38 struct auxtrace_queues queues; 39 struct auxtrace_heap heap; 40 struct itrace_synth_opts synth_opts; 41 struct perf_session *session; 42 struct machine *machine; 43 struct thread *unknown_thread; 44 45 u8 timeless_decoding; 46 u8 snapshot_mode; 47 u8 data_queued; 48 u8 sample_branches; 49 u8 sample_instructions; 50 51 int num_cpu; 52 u32 auxtrace_type; 53 u64 branches_sample_type; 54 u64 branches_id; 55 u64 instructions_sample_type; 56 u64 instructions_sample_period; 57 u64 instructions_id; 58 u64 **metadata; 59 u64 kernel_start; 60 unsigned int pmu_type; 61 }; 62 63 struct cs_etm_queue { 64 struct cs_etm_auxtrace *etm; 65 struct thread *thread; 66 struct cs_etm_decoder *decoder; 67 struct auxtrace_buffer *buffer; 68 union perf_event *event_buf; 69 unsigned int queue_nr; 70 pid_t pid, tid; 71 int cpu; 72 u64 offset; 73 u64 period_instructions; 74 struct branch_stack *last_branch; 75 struct branch_stack *last_branch_rb; 76 size_t last_branch_pos; 77 struct cs_etm_packet *prev_packet; 78 struct cs_etm_packet *packet; 79 const unsigned char *buf; 80 size_t buf_len, buf_used; 81 }; 82 83 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 84 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 85 pid_t tid); 86 87 /* PTMs ETMIDR [11:8] set to b0011 */ 88 #define ETMIDR_PTM_VERSION 0x00000300 89 90 static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 91 { 92 etmidr &= ETMIDR_PTM_VERSION; 93 94 if (etmidr == ETMIDR_PTM_VERSION) 95 return CS_ETM_PROTO_PTM; 96 97 return CS_ETM_PROTO_ETMV3; 98 } 99 100 static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) 101 { 102 struct int_node *inode; 103 u64 *metadata; 104 105 inode = intlist__find(traceid_list, trace_chan_id); 106 if (!inode) 107 return -EINVAL; 108 109 metadata = inode->priv; 110 *magic = metadata[CS_ETM_MAGIC]; 111 return 0; 112 } 113 114 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) 115 { 116 struct int_node *inode; 117 u64 *metadata; 118 119 inode = intlist__find(traceid_list, trace_chan_id); 120 if (!inode) 121 return -EINVAL; 122 123 metadata = inode->priv; 124 *cpu = (int)metadata[CS_ETM_CPU]; 125 return 0; 126 } 127 128 static void cs_etm__packet_dump(const char *pkt_string) 129 { 130 const char *color = PERF_COLOR_BLUE; 131 int len = strlen(pkt_string); 132 133 if (len && (pkt_string[len-1] == '\n')) 134 color_fprintf(stdout, color, " %s", pkt_string); 135 else 136 color_fprintf(stdout, color, " %s\n", pkt_string); 137 138 fflush(stdout); 139 } 140 141 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 142 struct cs_etm_auxtrace *etm, int idx, 143 u32 etmidr) 144 { 145 u64 **metadata = etm->metadata; 146 147 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 148 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; 149 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; 150 } 151 152 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 153 struct cs_etm_auxtrace *etm, int idx) 154 { 155 u64 **metadata = etm->metadata; 156 157 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; 158 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; 159 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; 160 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; 161 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; 162 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; 163 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; 164 } 165 166 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 167 struct cs_etm_auxtrace *etm) 168 { 169 int i; 170 u32 etmidr; 171 u64 architecture; 172 173 for (i = 0; i < etm->num_cpu; i++) { 174 architecture = etm->metadata[i][CS_ETM_MAGIC]; 175 176 switch (architecture) { 177 case __perf_cs_etmv3_magic: 178 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; 179 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); 180 break; 181 case __perf_cs_etmv4_magic: 182 cs_etm__set_trace_param_etmv4(t_params, etm, i); 183 break; 184 default: 185 return -EINVAL; 186 } 187 } 188 189 return 0; 190 } 191 192 static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, 193 struct cs_etm_queue *etmq, 194 enum cs_etm_decoder_operation mode) 195 { 196 int ret = -EINVAL; 197 198 if (!(mode < CS_ETM_OPERATION_MAX)) 199 goto out; 200 201 d_params->packet_printer = cs_etm__packet_dump; 202 d_params->operation = mode; 203 d_params->data = etmq; 204 d_params->formatted = true; 205 d_params->fsyncs = false; 206 d_params->hsyncs = false; 207 d_params->frame_aligned = true; 208 209 ret = 0; 210 out: 211 return ret; 212 } 213 214 static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 215 struct auxtrace_buffer *buffer) 216 { 217 int ret; 218 const char *color = PERF_COLOR_BLUE; 219 struct cs_etm_decoder_params d_params; 220 struct cs_etm_trace_params *t_params; 221 struct cs_etm_decoder *decoder; 222 size_t buffer_used = 0; 223 224 fprintf(stdout, "\n"); 225 color_fprintf(stdout, color, 226 ". ... CoreSight ETM Trace data: size %zu bytes\n", 227 buffer->size); 228 229 /* Use metadata to fill in trace parameters for trace decoder */ 230 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 231 232 if (!t_params) 233 return; 234 235 if (cs_etm__init_trace_params(t_params, etm)) 236 goto out_free; 237 238 /* Set decoder parameters to simply print the trace packets */ 239 if (cs_etm__init_decoder_params(&d_params, NULL, 240 CS_ETM_OPERATION_PRINT)) 241 goto out_free; 242 243 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 244 245 if (!decoder) 246 goto out_free; 247 do { 248 size_t consumed; 249 250 ret = cs_etm_decoder__process_data_block( 251 decoder, buffer->offset, 252 &((u8 *)buffer->data)[buffer_used], 253 buffer->size - buffer_used, &consumed); 254 if (ret) 255 break; 256 257 buffer_used += consumed; 258 } while (buffer_used < buffer->size); 259 260 cs_etm_decoder__free(decoder); 261 262 out_free: 263 zfree(&t_params); 264 } 265 266 static int cs_etm__flush_events(struct perf_session *session, 267 struct perf_tool *tool) 268 { 269 int ret; 270 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 271 struct cs_etm_auxtrace, 272 auxtrace); 273 if (dump_trace) 274 return 0; 275 276 if (!tool->ordered_events) 277 return -EINVAL; 278 279 if (!etm->timeless_decoding) 280 return -EINVAL; 281 282 ret = cs_etm__update_queues(etm); 283 284 if (ret < 0) 285 return ret; 286 287 return cs_etm__process_timeless_queues(etm, -1); 288 } 289 290 static void cs_etm__free_queue(void *priv) 291 { 292 struct cs_etm_queue *etmq = priv; 293 294 if (!etmq) 295 return; 296 297 thread__zput(etmq->thread); 298 cs_etm_decoder__free(etmq->decoder); 299 zfree(&etmq->event_buf); 300 zfree(&etmq->last_branch); 301 zfree(&etmq->last_branch_rb); 302 zfree(&etmq->prev_packet); 303 zfree(&etmq->packet); 304 free(etmq); 305 } 306 307 static void cs_etm__free_events(struct perf_session *session) 308 { 309 unsigned int i; 310 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 311 struct cs_etm_auxtrace, 312 auxtrace); 313 struct auxtrace_queues *queues = &aux->queues; 314 315 for (i = 0; i < queues->nr_queues; i++) { 316 cs_etm__free_queue(queues->queue_array[i].priv); 317 queues->queue_array[i].priv = NULL; 318 } 319 320 auxtrace_queues__free(queues); 321 } 322 323 static void cs_etm__free(struct perf_session *session) 324 { 325 int i; 326 struct int_node *inode, *tmp; 327 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 328 struct cs_etm_auxtrace, 329 auxtrace); 330 cs_etm__free_events(session); 331 session->auxtrace = NULL; 332 333 /* First remove all traceID/metadata nodes for the RB tree */ 334 intlist__for_each_entry_safe(inode, tmp, traceid_list) 335 intlist__remove(traceid_list, inode); 336 /* Then the RB tree itself */ 337 intlist__delete(traceid_list); 338 339 for (i = 0; i < aux->num_cpu; i++) 340 zfree(&aux->metadata[i]); 341 342 thread__zput(aux->unknown_thread); 343 zfree(&aux->metadata); 344 zfree(&aux); 345 } 346 347 static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) 348 { 349 struct machine *machine; 350 351 machine = etmq->etm->machine; 352 353 if (address >= etmq->etm->kernel_start) { 354 if (machine__is_host(machine)) 355 return PERF_RECORD_MISC_KERNEL; 356 else 357 return PERF_RECORD_MISC_GUEST_KERNEL; 358 } else { 359 if (machine__is_host(machine)) 360 return PERF_RECORD_MISC_USER; 361 else if (perf_guest) 362 return PERF_RECORD_MISC_GUEST_USER; 363 else 364 return PERF_RECORD_MISC_HYPERVISOR; 365 } 366 } 367 368 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, 369 size_t size, u8 *buffer) 370 { 371 u8 cpumode; 372 u64 offset; 373 int len; 374 struct thread *thread; 375 struct machine *machine; 376 struct addr_location al; 377 378 if (!etmq) 379 return 0; 380 381 machine = etmq->etm->machine; 382 cpumode = cs_etm__cpu_mode(etmq, address); 383 384 thread = etmq->thread; 385 if (!thread) { 386 if (cpumode != PERF_RECORD_MISC_KERNEL) 387 return 0; 388 thread = etmq->etm->unknown_thread; 389 } 390 391 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) 392 return 0; 393 394 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 395 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) 396 return 0; 397 398 offset = al.map->map_ip(al.map, address); 399 400 map__load(al.map); 401 402 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); 403 404 if (len <= 0) 405 return 0; 406 407 return len; 408 } 409 410 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) 411 { 412 struct cs_etm_decoder_params d_params; 413 struct cs_etm_trace_params *t_params = NULL; 414 struct cs_etm_queue *etmq; 415 size_t szp = sizeof(struct cs_etm_packet); 416 417 etmq = zalloc(sizeof(*etmq)); 418 if (!etmq) 419 return NULL; 420 421 etmq->packet = zalloc(szp); 422 if (!etmq->packet) 423 goto out_free; 424 425 etmq->prev_packet = zalloc(szp); 426 if (!etmq->prev_packet) 427 goto out_free; 428 429 if (etm->synth_opts.last_branch) { 430 size_t sz = sizeof(struct branch_stack); 431 432 sz += etm->synth_opts.last_branch_sz * 433 sizeof(struct branch_entry); 434 etmq->last_branch = zalloc(sz); 435 if (!etmq->last_branch) 436 goto out_free; 437 etmq->last_branch_rb = zalloc(sz); 438 if (!etmq->last_branch_rb) 439 goto out_free; 440 } 441 442 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 443 if (!etmq->event_buf) 444 goto out_free; 445 446 /* Use metadata to fill in trace parameters for trace decoder */ 447 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 448 449 if (!t_params) 450 goto out_free; 451 452 if (cs_etm__init_trace_params(t_params, etm)) 453 goto out_free; 454 455 /* Set decoder parameters to decode trace packets */ 456 if (cs_etm__init_decoder_params(&d_params, etmq, 457 CS_ETM_OPERATION_DECODE)) 458 goto out_free; 459 460 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 461 462 if (!etmq->decoder) 463 goto out_free; 464 465 /* 466 * Register a function to handle all memory accesses required by 467 * the trace decoder library. 468 */ 469 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 470 0x0L, ((u64) -1L), 471 cs_etm__mem_access)) 472 goto out_free_decoder; 473 474 zfree(&t_params); 475 return etmq; 476 477 out_free_decoder: 478 cs_etm_decoder__free(etmq->decoder); 479 out_free: 480 zfree(&t_params); 481 zfree(&etmq->event_buf); 482 zfree(&etmq->last_branch); 483 zfree(&etmq->last_branch_rb); 484 zfree(&etmq->prev_packet); 485 zfree(&etmq->packet); 486 free(etmq); 487 488 return NULL; 489 } 490 491 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 492 struct auxtrace_queue *queue, 493 unsigned int queue_nr) 494 { 495 int ret = 0; 496 struct cs_etm_queue *etmq = queue->priv; 497 498 if (list_empty(&queue->head) || etmq) 499 goto out; 500 501 etmq = cs_etm__alloc_queue(etm); 502 503 if (!etmq) { 504 ret = -ENOMEM; 505 goto out; 506 } 507 508 queue->priv = etmq; 509 etmq->etm = etm; 510 etmq->queue_nr = queue_nr; 511 etmq->cpu = queue->cpu; 512 etmq->tid = queue->tid; 513 etmq->pid = -1; 514 etmq->offset = 0; 515 etmq->period_instructions = 0; 516 517 out: 518 return ret; 519 } 520 521 static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 522 { 523 unsigned int i; 524 int ret; 525 526 if (!etm->kernel_start) 527 etm->kernel_start = machine__kernel_start(etm->machine); 528 529 for (i = 0; i < etm->queues.nr_queues; i++) { 530 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 531 if (ret) 532 return ret; 533 } 534 535 return 0; 536 } 537 538 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) 539 { 540 if (etm->queues.new_data) { 541 etm->queues.new_data = false; 542 return cs_etm__setup_queues(etm); 543 } 544 545 return 0; 546 } 547 548 static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) 549 { 550 struct branch_stack *bs_src = etmq->last_branch_rb; 551 struct branch_stack *bs_dst = etmq->last_branch; 552 size_t nr = 0; 553 554 /* 555 * Set the number of records before early exit: ->nr is used to 556 * determine how many branches to copy from ->entries. 557 */ 558 bs_dst->nr = bs_src->nr; 559 560 /* 561 * Early exit when there is nothing to copy. 562 */ 563 if (!bs_src->nr) 564 return; 565 566 /* 567 * As bs_src->entries is a circular buffer, we need to copy from it in 568 * two steps. First, copy the branches from the most recently inserted 569 * branch ->last_branch_pos until the end of bs_src->entries buffer. 570 */ 571 nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; 572 memcpy(&bs_dst->entries[0], 573 &bs_src->entries[etmq->last_branch_pos], 574 sizeof(struct branch_entry) * nr); 575 576 /* 577 * If we wrapped around at least once, the branches from the beginning 578 * of the bs_src->entries buffer and until the ->last_branch_pos element 579 * are older valid branches: copy them over. The total number of 580 * branches copied over will be equal to the number of branches asked by 581 * the user in last_branch_sz. 582 */ 583 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 584 memcpy(&bs_dst->entries[nr], 585 &bs_src->entries[0], 586 sizeof(struct branch_entry) * etmq->last_branch_pos); 587 } 588 } 589 590 static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) 591 { 592 etmq->last_branch_pos = 0; 593 etmq->last_branch_rb->nr = 0; 594 } 595 596 static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 597 u64 addr) { 598 u8 instrBytes[2]; 599 600 cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); 601 /* 602 * T32 instruction size is indicated by bits[15:11] of the first 603 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 604 * denote a 32-bit instruction. 605 */ 606 return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2; 607 } 608 609 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) 610 { 611 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 612 if (packet->sample_type == CS_ETM_DISCONTINUITY) 613 return 0; 614 615 return packet->start_addr; 616 } 617 618 static inline 619 u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet) 620 { 621 /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ 622 if (packet->sample_type == CS_ETM_DISCONTINUITY) 623 return 0; 624 625 return packet->end_addr - packet->last_instr_size; 626 } 627 628 static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 629 const struct cs_etm_packet *packet, 630 u64 offset) 631 { 632 if (packet->isa == CS_ETM_ISA_T32) { 633 u64 addr = packet->start_addr; 634 635 while (offset > 0) { 636 addr += cs_etm__t32_instr_size(etmq, addr); 637 offset--; 638 } 639 return addr; 640 } 641 642 /* Assume a 4 byte instruction size (A32/A64) */ 643 return packet->start_addr + offset * 4; 644 } 645 646 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) 647 { 648 struct branch_stack *bs = etmq->last_branch_rb; 649 struct branch_entry *be; 650 651 /* 652 * The branches are recorded in a circular buffer in reverse 653 * chronological order: we start recording from the last element of the 654 * buffer down. After writing the first element of the stack, move the 655 * insert position back to the end of the buffer. 656 */ 657 if (!etmq->last_branch_pos) 658 etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 659 660 etmq->last_branch_pos -= 1; 661 662 be = &bs->entries[etmq->last_branch_pos]; 663 be->from = cs_etm__last_executed_instr(etmq->prev_packet); 664 be->to = cs_etm__first_executed_instr(etmq->packet); 665 /* No support for mispredict */ 666 be->flags.mispred = 0; 667 be->flags.predicted = 1; 668 669 /* 670 * Increment bs->nr until reaching the number of last branches asked by 671 * the user on the command line. 672 */ 673 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 674 bs->nr += 1; 675 } 676 677 static int cs_etm__inject_event(union perf_event *event, 678 struct perf_sample *sample, u64 type) 679 { 680 event->header.size = perf_event__sample_event_size(sample, type, 0); 681 return perf_event__synthesize_sample(event, type, 0, sample); 682 } 683 684 685 static int 686 cs_etm__get_trace(struct cs_etm_queue *etmq) 687 { 688 struct auxtrace_buffer *aux_buffer = etmq->buffer; 689 struct auxtrace_buffer *old_buffer = aux_buffer; 690 struct auxtrace_queue *queue; 691 692 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 693 694 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 695 696 /* If no more data, drop the previous auxtrace_buffer and return */ 697 if (!aux_buffer) { 698 if (old_buffer) 699 auxtrace_buffer__drop_data(old_buffer); 700 etmq->buf_len = 0; 701 return 0; 702 } 703 704 etmq->buffer = aux_buffer; 705 706 /* If the aux_buffer doesn't have data associated, try to load it */ 707 if (!aux_buffer->data) { 708 /* get the file desc associated with the perf data file */ 709 int fd = perf_data__fd(etmq->etm->session->data); 710 711 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 712 if (!aux_buffer->data) 713 return -ENOMEM; 714 } 715 716 /* If valid, drop the previous buffer */ 717 if (old_buffer) 718 auxtrace_buffer__drop_data(old_buffer); 719 720 etmq->buf_used = 0; 721 etmq->buf_len = aux_buffer->size; 722 etmq->buf = aux_buffer->data; 723 724 return etmq->buf_len; 725 } 726 727 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 728 struct auxtrace_queue *queue) 729 { 730 struct cs_etm_queue *etmq = queue->priv; 731 732 /* CPU-wide tracing isn't supported yet */ 733 if (queue->tid == -1) 734 return; 735 736 if ((!etmq->thread) && (etmq->tid != -1)) 737 etmq->thread = machine__find_thread(etm->machine, -1, 738 etmq->tid); 739 740 if (etmq->thread) { 741 etmq->pid = etmq->thread->pid_; 742 if (queue->cpu == -1) 743 etmq->cpu = etmq->thread->cpu; 744 } 745 } 746 747 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 748 u64 addr, u64 period) 749 { 750 int ret = 0; 751 struct cs_etm_auxtrace *etm = etmq->etm; 752 union perf_event *event = etmq->event_buf; 753 struct perf_sample sample = {.ip = 0,}; 754 755 event->sample.header.type = PERF_RECORD_SAMPLE; 756 event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); 757 event->sample.header.size = sizeof(struct perf_event_header); 758 759 sample.ip = addr; 760 sample.pid = etmq->pid; 761 sample.tid = etmq->tid; 762 sample.id = etmq->etm->instructions_id; 763 sample.stream_id = etmq->etm->instructions_id; 764 sample.period = period; 765 sample.cpu = etmq->packet->cpu; 766 sample.flags = etmq->prev_packet->flags; 767 sample.insn_len = 1; 768 sample.cpumode = event->sample.header.misc; 769 770 if (etm->synth_opts.last_branch) { 771 cs_etm__copy_last_branch_rb(etmq); 772 sample.branch_stack = etmq->last_branch; 773 } 774 775 if (etm->synth_opts.inject) { 776 ret = cs_etm__inject_event(event, &sample, 777 etm->instructions_sample_type); 778 if (ret) 779 return ret; 780 } 781 782 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 783 784 if (ret) 785 pr_err( 786 "CS ETM Trace: failed to deliver instruction event, error %d\n", 787 ret); 788 789 if (etm->synth_opts.last_branch) 790 cs_etm__reset_last_branch_rb(etmq); 791 792 return ret; 793 } 794 795 /* 796 * The cs etm packet encodes an instruction range between a branch target 797 * and the next taken branch. Generate sample accordingly. 798 */ 799 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) 800 { 801 int ret = 0; 802 struct cs_etm_auxtrace *etm = etmq->etm; 803 struct perf_sample sample = {.ip = 0,}; 804 union perf_event *event = etmq->event_buf; 805 struct dummy_branch_stack { 806 u64 nr; 807 struct branch_entry entries; 808 } dummy_bs; 809 u64 ip; 810 811 ip = cs_etm__last_executed_instr(etmq->prev_packet); 812 813 event->sample.header.type = PERF_RECORD_SAMPLE; 814 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 815 event->sample.header.size = sizeof(struct perf_event_header); 816 817 sample.ip = ip; 818 sample.pid = etmq->pid; 819 sample.tid = etmq->tid; 820 sample.addr = cs_etm__first_executed_instr(etmq->packet); 821 sample.id = etmq->etm->branches_id; 822 sample.stream_id = etmq->etm->branches_id; 823 sample.period = 1; 824 sample.cpu = etmq->packet->cpu; 825 sample.flags = etmq->prev_packet->flags; 826 sample.cpumode = event->sample.header.misc; 827 828 /* 829 * perf report cannot handle events without a branch stack 830 */ 831 if (etm->synth_opts.last_branch) { 832 dummy_bs = (struct dummy_branch_stack){ 833 .nr = 1, 834 .entries = { 835 .from = sample.ip, 836 .to = sample.addr, 837 }, 838 }; 839 sample.branch_stack = (struct branch_stack *)&dummy_bs; 840 } 841 842 if (etm->synth_opts.inject) { 843 ret = cs_etm__inject_event(event, &sample, 844 etm->branches_sample_type); 845 if (ret) 846 return ret; 847 } 848 849 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 850 851 if (ret) 852 pr_err( 853 "CS ETM Trace: failed to deliver instruction event, error %d\n", 854 ret); 855 856 return ret; 857 } 858 859 struct cs_etm_synth { 860 struct perf_tool dummy_tool; 861 struct perf_session *session; 862 }; 863 864 static int cs_etm__event_synth(struct perf_tool *tool, 865 union perf_event *event, 866 struct perf_sample *sample __maybe_unused, 867 struct machine *machine __maybe_unused) 868 { 869 struct cs_etm_synth *cs_etm_synth = 870 container_of(tool, struct cs_etm_synth, dummy_tool); 871 872 return perf_session__deliver_synth_event(cs_etm_synth->session, 873 event, NULL); 874 } 875 876 static int cs_etm__synth_event(struct perf_session *session, 877 struct perf_event_attr *attr, u64 id) 878 { 879 struct cs_etm_synth cs_etm_synth; 880 881 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 882 cs_etm_synth.session = session; 883 884 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 885 &id, cs_etm__event_synth); 886 } 887 888 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 889 struct perf_session *session) 890 { 891 struct perf_evlist *evlist = session->evlist; 892 struct perf_evsel *evsel; 893 struct perf_event_attr attr; 894 bool found = false; 895 u64 id; 896 int err; 897 898 evlist__for_each_entry(evlist, evsel) { 899 if (evsel->attr.type == etm->pmu_type) { 900 found = true; 901 break; 902 } 903 } 904 905 if (!found) { 906 pr_debug("No selected events with CoreSight Trace data\n"); 907 return 0; 908 } 909 910 memset(&attr, 0, sizeof(struct perf_event_attr)); 911 attr.size = sizeof(struct perf_event_attr); 912 attr.type = PERF_TYPE_HARDWARE; 913 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; 914 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 915 PERF_SAMPLE_PERIOD; 916 if (etm->timeless_decoding) 917 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 918 else 919 attr.sample_type |= PERF_SAMPLE_TIME; 920 921 attr.exclude_user = evsel->attr.exclude_user; 922 attr.exclude_kernel = evsel->attr.exclude_kernel; 923 attr.exclude_hv = evsel->attr.exclude_hv; 924 attr.exclude_host = evsel->attr.exclude_host; 925 attr.exclude_guest = evsel->attr.exclude_guest; 926 attr.sample_id_all = evsel->attr.sample_id_all; 927 attr.read_format = evsel->attr.read_format; 928 929 /* create new id val to be a fixed offset from evsel id */ 930 id = evsel->id[0] + 1000000000; 931 932 if (!id) 933 id = 1; 934 935 if (etm->synth_opts.branches) { 936 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 937 attr.sample_period = 1; 938 attr.sample_type |= PERF_SAMPLE_ADDR; 939 err = cs_etm__synth_event(session, &attr, id); 940 if (err) 941 return err; 942 etm->sample_branches = true; 943 etm->branches_sample_type = attr.sample_type; 944 etm->branches_id = id; 945 id += 1; 946 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 947 } 948 949 if (etm->synth_opts.last_branch) 950 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 951 952 if (etm->synth_opts.instructions) { 953 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 954 attr.sample_period = etm->synth_opts.period; 955 etm->instructions_sample_period = attr.sample_period; 956 err = cs_etm__synth_event(session, &attr, id); 957 if (err) 958 return err; 959 etm->sample_instructions = true; 960 etm->instructions_sample_type = attr.sample_type; 961 etm->instructions_id = id; 962 id += 1; 963 } 964 965 return 0; 966 } 967 968 static int cs_etm__sample(struct cs_etm_queue *etmq) 969 { 970 struct cs_etm_auxtrace *etm = etmq->etm; 971 struct cs_etm_packet *tmp; 972 int ret; 973 u64 instrs_executed = etmq->packet->instr_count; 974 975 etmq->period_instructions += instrs_executed; 976 977 /* 978 * Record a branch when the last instruction in 979 * PREV_PACKET is a branch. 980 */ 981 if (etm->synth_opts.last_branch && 982 etmq->prev_packet->sample_type == CS_ETM_RANGE && 983 etmq->prev_packet->last_instr_taken_branch) 984 cs_etm__update_last_branch_rb(etmq); 985 986 if (etm->sample_instructions && 987 etmq->period_instructions >= etm->instructions_sample_period) { 988 /* 989 * Emit instruction sample periodically 990 * TODO: allow period to be defined in cycles and clock time 991 */ 992 993 /* Get number of instructions executed after the sample point */ 994 u64 instrs_over = etmq->period_instructions - 995 etm->instructions_sample_period; 996 997 /* 998 * Calculate the address of the sampled instruction (-1 as 999 * sample is reported as though instruction has just been 1000 * executed, but PC has not advanced to next instruction) 1001 */ 1002 u64 offset = (instrs_executed - instrs_over - 1); 1003 u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); 1004 1005 ret = cs_etm__synth_instruction_sample( 1006 etmq, addr, etm->instructions_sample_period); 1007 if (ret) 1008 return ret; 1009 1010 /* Carry remaining instructions into next sample period */ 1011 etmq->period_instructions = instrs_over; 1012 } 1013 1014 if (etm->sample_branches) { 1015 bool generate_sample = false; 1016 1017 /* Generate sample for tracing on packet */ 1018 if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1019 generate_sample = true; 1020 1021 /* Generate sample for branch taken packet */ 1022 if (etmq->prev_packet->sample_type == CS_ETM_RANGE && 1023 etmq->prev_packet->last_instr_taken_branch) 1024 generate_sample = true; 1025 1026 if (generate_sample) { 1027 ret = cs_etm__synth_branch_sample(etmq); 1028 if (ret) 1029 return ret; 1030 } 1031 } 1032 1033 if (etm->sample_branches || etm->synth_opts.last_branch) { 1034 /* 1035 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1036 * the next incoming packet. 1037 */ 1038 tmp = etmq->packet; 1039 etmq->packet = etmq->prev_packet; 1040 etmq->prev_packet = tmp; 1041 } 1042 1043 return 0; 1044 } 1045 1046 static int cs_etm__exception(struct cs_etm_queue *etmq) 1047 { 1048 /* 1049 * When the exception packet is inserted, whether the last instruction 1050 * in previous range packet is taken branch or not, we need to force 1051 * to set 'prev_packet->last_instr_taken_branch' to true. This ensures 1052 * to generate branch sample for the instruction range before the 1053 * exception is trapped to kernel or before the exception returning. 1054 * 1055 * The exception packet includes the dummy address values, so don't 1056 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1057 * for generating instruction and branch samples. 1058 */ 1059 if (etmq->prev_packet->sample_type == CS_ETM_RANGE) 1060 etmq->prev_packet->last_instr_taken_branch = true; 1061 1062 return 0; 1063 } 1064 1065 static int cs_etm__flush(struct cs_etm_queue *etmq) 1066 { 1067 int err = 0; 1068 struct cs_etm_auxtrace *etm = etmq->etm; 1069 struct cs_etm_packet *tmp; 1070 1071 /* Handle start tracing packet */ 1072 if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) 1073 goto swap_packet; 1074 1075 if (etmq->etm->synth_opts.last_branch && 1076 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1077 /* 1078 * Generate a last branch event for the branches left in the 1079 * circular buffer at the end of the trace. 1080 * 1081 * Use the address of the end of the last reported execution 1082 * range 1083 */ 1084 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1085 1086 err = cs_etm__synth_instruction_sample( 1087 etmq, addr, 1088 etmq->period_instructions); 1089 if (err) 1090 return err; 1091 1092 etmq->period_instructions = 0; 1093 1094 } 1095 1096 if (etm->sample_branches && 1097 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1098 err = cs_etm__synth_branch_sample(etmq); 1099 if (err) 1100 return err; 1101 } 1102 1103 swap_packet: 1104 if (etm->sample_branches || etm->synth_opts.last_branch) { 1105 /* 1106 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1107 * the next incoming packet. 1108 */ 1109 tmp = etmq->packet; 1110 etmq->packet = etmq->prev_packet; 1111 etmq->prev_packet = tmp; 1112 } 1113 1114 return err; 1115 } 1116 1117 static int cs_etm__end_block(struct cs_etm_queue *etmq) 1118 { 1119 int err; 1120 1121 /* 1122 * It has no new packet coming and 'etmq->packet' contains the stale 1123 * packet which was set at the previous time with packets swapping; 1124 * so skip to generate branch sample to avoid stale packet. 1125 * 1126 * For this case only flush branch stack and generate a last branch 1127 * event for the branches left in the circular buffer at the end of 1128 * the trace. 1129 */ 1130 if (etmq->etm->synth_opts.last_branch && 1131 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1132 /* 1133 * Use the address of the end of the last reported execution 1134 * range. 1135 */ 1136 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1137 1138 err = cs_etm__synth_instruction_sample( 1139 etmq, addr, 1140 etmq->period_instructions); 1141 if (err) 1142 return err; 1143 1144 etmq->period_instructions = 0; 1145 } 1146 1147 return 0; 1148 } 1149 /* 1150 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue 1151 * if need be. 1152 * Returns: < 0 if error 1153 * = 0 if no more auxtrace_buffer to read 1154 * > 0 if the current buffer isn't empty yet 1155 */ 1156 static int cs_etm__get_data_block(struct cs_etm_queue *etmq) 1157 { 1158 int ret; 1159 1160 if (!etmq->buf_len) { 1161 ret = cs_etm__get_trace(etmq); 1162 if (ret <= 0) 1163 return ret; 1164 /* 1165 * We cannot assume consecutive blocks in the data file 1166 * are contiguous, reset the decoder to force re-sync. 1167 */ 1168 ret = cs_etm_decoder__reset(etmq->decoder); 1169 if (ret) 1170 return ret; 1171 } 1172 1173 return etmq->buf_len; 1174 } 1175 1176 static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, 1177 struct cs_etm_packet *packet, 1178 u64 end_addr) 1179 { 1180 u16 instr16; 1181 u32 instr32; 1182 u64 addr; 1183 1184 switch (packet->isa) { 1185 case CS_ETM_ISA_T32: 1186 /* 1187 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: 1188 * 1189 * b'15 b'8 1190 * +-----------------+--------+ 1191 * | 1 1 0 1 1 1 1 1 | imm8 | 1192 * +-----------------+--------+ 1193 * 1194 * According to the specifiction, it only defines SVC for T32 1195 * with 16 bits instruction and has no definition for 32bits; 1196 * so below only read 2 bytes as instruction size for T32. 1197 */ 1198 addr = end_addr - 2; 1199 cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); 1200 if ((instr16 & 0xFF00) == 0xDF00) 1201 return true; 1202 1203 break; 1204 case CS_ETM_ISA_A32: 1205 /* 1206 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: 1207 * 1208 * b'31 b'28 b'27 b'24 1209 * +---------+---------+-------------------------+ 1210 * | !1111 | 1 1 1 1 | imm24 | 1211 * +---------+---------+-------------------------+ 1212 */ 1213 addr = end_addr - 4; 1214 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1215 if ((instr32 & 0x0F000000) == 0x0F000000 && 1216 (instr32 & 0xF0000000) != 0xF0000000) 1217 return true; 1218 1219 break; 1220 case CS_ETM_ISA_A64: 1221 /* 1222 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: 1223 * 1224 * b'31 b'21 b'4 b'0 1225 * +-----------------------+---------+-----------+ 1226 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | 1227 * +-----------------------+---------+-----------+ 1228 */ 1229 addr = end_addr - 4; 1230 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1231 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1232 return true; 1233 1234 break; 1235 case CS_ETM_ISA_UNKNOWN: 1236 default: 1237 break; 1238 } 1239 1240 return false; 1241 } 1242 1243 static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) 1244 { 1245 struct cs_etm_packet *packet = etmq->packet; 1246 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1247 1248 if (magic == __perf_cs_etmv3_magic) 1249 if (packet->exception_number == CS_ETMV3_EXC_SVC) 1250 return true; 1251 1252 /* 1253 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and 1254 * HVC cases; need to check if it's SVC instruction based on 1255 * packet address. 1256 */ 1257 if (magic == __perf_cs_etmv4_magic) { 1258 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1259 cs_etm__is_svc_instr(etmq, prev_packet, 1260 prev_packet->end_addr)) 1261 return true; 1262 } 1263 1264 return false; 1265 } 1266 1267 static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) 1268 { 1269 struct cs_etm_packet *packet = etmq->packet; 1270 1271 if (magic == __perf_cs_etmv3_magic) 1272 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1273 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || 1274 packet->exception_number == CS_ETMV3_EXC_PE_RESET || 1275 packet->exception_number == CS_ETMV3_EXC_IRQ || 1276 packet->exception_number == CS_ETMV3_EXC_FIQ) 1277 return true; 1278 1279 if (magic == __perf_cs_etmv4_magic) 1280 if (packet->exception_number == CS_ETMV4_EXC_RESET || 1281 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || 1282 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || 1283 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || 1284 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || 1285 packet->exception_number == CS_ETMV4_EXC_IRQ || 1286 packet->exception_number == CS_ETMV4_EXC_FIQ) 1287 return true; 1288 1289 return false; 1290 } 1291 1292 static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) 1293 { 1294 struct cs_etm_packet *packet = etmq->packet; 1295 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1296 1297 if (magic == __perf_cs_etmv3_magic) 1298 if (packet->exception_number == CS_ETMV3_EXC_SMC || 1299 packet->exception_number == CS_ETMV3_EXC_HYP || 1300 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || 1301 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || 1302 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || 1303 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || 1304 packet->exception_number == CS_ETMV3_EXC_GENERIC) 1305 return true; 1306 1307 if (magic == __perf_cs_etmv4_magic) { 1308 if (packet->exception_number == CS_ETMV4_EXC_TRAP || 1309 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || 1310 packet->exception_number == CS_ETMV4_EXC_INST_FAULT || 1311 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) 1312 return true; 1313 1314 /* 1315 * For CS_ETMV4_EXC_CALL, except SVC other instructions 1316 * (SMC, HVC) are taken as sync exceptions. 1317 */ 1318 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1319 !cs_etm__is_svc_instr(etmq, prev_packet, 1320 prev_packet->end_addr)) 1321 return true; 1322 1323 /* 1324 * ETMv4 has 5 bits for exception number; if the numbers 1325 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] 1326 * they are implementation defined exceptions. 1327 * 1328 * For this case, simply take it as sync exception. 1329 */ 1330 if (packet->exception_number > CS_ETMV4_EXC_FIQ && 1331 packet->exception_number <= CS_ETMV4_EXC_END) 1332 return true; 1333 } 1334 1335 return false; 1336 } 1337 1338 static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) 1339 { 1340 struct cs_etm_packet *packet = etmq->packet; 1341 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1342 u64 magic; 1343 int ret; 1344 1345 switch (packet->sample_type) { 1346 case CS_ETM_RANGE: 1347 /* 1348 * Immediate branch instruction without neither link nor 1349 * return flag, it's normal branch instruction within 1350 * the function. 1351 */ 1352 if (packet->last_instr_type == OCSD_INSTR_BR && 1353 packet->last_instr_subtype == OCSD_S_INSTR_NONE) { 1354 packet->flags = PERF_IP_FLAG_BRANCH; 1355 1356 if (packet->last_instr_cond) 1357 packet->flags |= PERF_IP_FLAG_CONDITIONAL; 1358 } 1359 1360 /* 1361 * Immediate branch instruction with link (e.g. BL), this is 1362 * branch instruction for function call. 1363 */ 1364 if (packet->last_instr_type == OCSD_INSTR_BR && 1365 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1366 packet->flags = PERF_IP_FLAG_BRANCH | 1367 PERF_IP_FLAG_CALL; 1368 1369 /* 1370 * Indirect branch instruction with link (e.g. BLR), this is 1371 * branch instruction for function call. 1372 */ 1373 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1374 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) 1375 packet->flags = PERF_IP_FLAG_BRANCH | 1376 PERF_IP_FLAG_CALL; 1377 1378 /* 1379 * Indirect branch instruction with subtype of 1380 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for 1381 * function return for A32/T32. 1382 */ 1383 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1384 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) 1385 packet->flags = PERF_IP_FLAG_BRANCH | 1386 PERF_IP_FLAG_RETURN; 1387 1388 /* 1389 * Indirect branch instruction without link (e.g. BR), usually 1390 * this is used for function return, especially for functions 1391 * within dynamic link lib. 1392 */ 1393 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1394 packet->last_instr_subtype == OCSD_S_INSTR_NONE) 1395 packet->flags = PERF_IP_FLAG_BRANCH | 1396 PERF_IP_FLAG_RETURN; 1397 1398 /* Return instruction for function return. */ 1399 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && 1400 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) 1401 packet->flags = PERF_IP_FLAG_BRANCH | 1402 PERF_IP_FLAG_RETURN; 1403 1404 /* 1405 * Decoder might insert a discontinuity in the middle of 1406 * instruction packets, fixup prev_packet with flag 1407 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. 1408 */ 1409 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1410 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1411 PERF_IP_FLAG_TRACE_BEGIN; 1412 1413 /* 1414 * If the previous packet is an exception return packet 1415 * and the return address just follows SVC instuction, 1416 * it needs to calibrate the previous packet sample flags 1417 * as PERF_IP_FLAG_SYSCALLRET. 1418 */ 1419 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1420 PERF_IP_FLAG_RETURN | 1421 PERF_IP_FLAG_INTERRUPT) && 1422 cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) 1423 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1424 PERF_IP_FLAG_RETURN | 1425 PERF_IP_FLAG_SYSCALLRET; 1426 break; 1427 case CS_ETM_DISCONTINUITY: 1428 /* 1429 * The trace is discontinuous, if the previous packet is 1430 * instruction packet, set flag PERF_IP_FLAG_TRACE_END 1431 * for previous packet. 1432 */ 1433 if (prev_packet->sample_type == CS_ETM_RANGE) 1434 prev_packet->flags |= PERF_IP_FLAG_BRANCH | 1435 PERF_IP_FLAG_TRACE_END; 1436 break; 1437 case CS_ETM_EXCEPTION: 1438 ret = cs_etm__get_magic(packet->trace_chan_id, &magic); 1439 if (ret) 1440 return ret; 1441 1442 /* The exception is for system call. */ 1443 if (cs_etm__is_syscall(etmq, magic)) 1444 packet->flags = PERF_IP_FLAG_BRANCH | 1445 PERF_IP_FLAG_CALL | 1446 PERF_IP_FLAG_SYSCALLRET; 1447 /* 1448 * The exceptions are triggered by external signals from bus, 1449 * interrupt controller, debug module, PE reset or halt. 1450 */ 1451 else if (cs_etm__is_async_exception(etmq, magic)) 1452 packet->flags = PERF_IP_FLAG_BRANCH | 1453 PERF_IP_FLAG_CALL | 1454 PERF_IP_FLAG_ASYNC | 1455 PERF_IP_FLAG_INTERRUPT; 1456 /* 1457 * Otherwise, exception is caused by trap, instruction & 1458 * data fault, or alignment errors. 1459 */ 1460 else if (cs_etm__is_sync_exception(etmq, magic)) 1461 packet->flags = PERF_IP_FLAG_BRANCH | 1462 PERF_IP_FLAG_CALL | 1463 PERF_IP_FLAG_INTERRUPT; 1464 1465 /* 1466 * When the exception packet is inserted, since exception 1467 * packet is not used standalone for generating samples 1468 * and it's affiliation to the previous instruction range 1469 * packet; so set previous range packet flags to tell perf 1470 * it is an exception taken branch. 1471 */ 1472 if (prev_packet->sample_type == CS_ETM_RANGE) 1473 prev_packet->flags = packet->flags; 1474 break; 1475 case CS_ETM_EXCEPTION_RET: 1476 /* 1477 * When the exception return packet is inserted, since 1478 * exception return packet is not used standalone for 1479 * generating samples and it's affiliation to the previous 1480 * instruction range packet; so set previous range packet 1481 * flags to tell perf it is an exception return branch. 1482 * 1483 * The exception return can be for either system call or 1484 * other exception types; unfortunately the packet doesn't 1485 * contain exception type related info so we cannot decide 1486 * the exception type purely based on exception return packet. 1487 * If we record the exception number from exception packet and 1488 * reuse it for excpetion return packet, this is not reliable 1489 * due the trace can be discontinuity or the interrupt can 1490 * be nested, thus the recorded exception number cannot be 1491 * used for exception return packet for these two cases. 1492 * 1493 * For exception return packet, we only need to distinguish the 1494 * packet is for system call or for other types. Thus the 1495 * decision can be deferred when receive the next packet which 1496 * contains the return address, based on the return address we 1497 * can read out the previous instruction and check if it's a 1498 * system call instruction and then calibrate the sample flag 1499 * as needed. 1500 */ 1501 if (prev_packet->sample_type == CS_ETM_RANGE) 1502 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1503 PERF_IP_FLAG_RETURN | 1504 PERF_IP_FLAG_INTERRUPT; 1505 break; 1506 case CS_ETM_EMPTY: 1507 default: 1508 break; 1509 } 1510 1511 return 0; 1512 } 1513 1514 static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) 1515 { 1516 int ret = 0; 1517 size_t processed = 0; 1518 1519 /* 1520 * Packets are decoded and added to the decoder's packet queue 1521 * until the decoder packet processing callback has requested that 1522 * processing stops or there is nothing left in the buffer. Normal 1523 * operations that stop processing are a timestamp packet or a full 1524 * decoder buffer queue. 1525 */ 1526 ret = cs_etm_decoder__process_data_block(etmq->decoder, 1527 etmq->offset, 1528 &etmq->buf[etmq->buf_used], 1529 etmq->buf_len, 1530 &processed); 1531 if (ret) 1532 goto out; 1533 1534 etmq->offset += processed; 1535 etmq->buf_used += processed; 1536 etmq->buf_len -= processed; 1537 1538 out: 1539 return ret; 1540 } 1541 1542 static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) 1543 { 1544 int ret; 1545 1546 /* Process each packet in this chunk */ 1547 while (1) { 1548 ret = cs_etm_decoder__get_packet(etmq->decoder, 1549 etmq->packet); 1550 if (ret <= 0) 1551 /* 1552 * Stop processing this chunk on 1553 * end of data or error 1554 */ 1555 break; 1556 1557 /* 1558 * Since packet addresses are swapped in packet 1559 * handling within below switch() statements, 1560 * thus setting sample flags must be called 1561 * prior to switch() statement to use address 1562 * information before packets swapping. 1563 */ 1564 ret = cs_etm__set_sample_flags(etmq); 1565 if (ret < 0) 1566 break; 1567 1568 switch (etmq->packet->sample_type) { 1569 case CS_ETM_RANGE: 1570 /* 1571 * If the packet contains an instruction 1572 * range, generate instruction sequence 1573 * events. 1574 */ 1575 cs_etm__sample(etmq); 1576 break; 1577 case CS_ETM_EXCEPTION: 1578 case CS_ETM_EXCEPTION_RET: 1579 /* 1580 * If the exception packet is coming, 1581 * make sure the previous instruction 1582 * range packet to be handled properly. 1583 */ 1584 cs_etm__exception(etmq); 1585 break; 1586 case CS_ETM_DISCONTINUITY: 1587 /* 1588 * Discontinuity in trace, flush 1589 * previous branch stack 1590 */ 1591 cs_etm__flush(etmq); 1592 break; 1593 case CS_ETM_EMPTY: 1594 /* 1595 * Should not receive empty packet, 1596 * report error. 1597 */ 1598 pr_err("CS ETM Trace: empty packet\n"); 1599 return -EINVAL; 1600 default: 1601 break; 1602 } 1603 } 1604 1605 return ret; 1606 } 1607 1608 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1609 { 1610 int err = 0; 1611 1612 /* Go through each buffer in the queue and decode them one by one */ 1613 while (1) { 1614 err = cs_etm__get_data_block(etmq); 1615 if (err <= 0) 1616 return err; 1617 1618 /* Run trace decoder until buffer consumed or end of trace */ 1619 do { 1620 err = cs_etm__decode_data_block(etmq); 1621 if (err) 1622 return err; 1623 1624 /* 1625 * Process each packet in this chunk, nothing to do if 1626 * an error occurs other than hoping the next one will 1627 * be better. 1628 */ 1629 err = cs_etm__process_decoder_queue(etmq); 1630 1631 } while (etmq->buf_len); 1632 1633 if (err == 0) 1634 /* Flush any remaining branch stack entries */ 1635 err = cs_etm__end_block(etmq); 1636 } 1637 1638 return err; 1639 } 1640 1641 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 1642 pid_t tid) 1643 { 1644 unsigned int i; 1645 struct auxtrace_queues *queues = &etm->queues; 1646 1647 for (i = 0; i < queues->nr_queues; i++) { 1648 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 1649 struct cs_etm_queue *etmq = queue->priv; 1650 1651 if (etmq && ((tid == -1) || (etmq->tid == tid))) { 1652 cs_etm__set_pid_tid_cpu(etm, queue); 1653 cs_etm__run_decoder(etmq); 1654 } 1655 } 1656 1657 return 0; 1658 } 1659 1660 static int cs_etm__process_event(struct perf_session *session, 1661 union perf_event *event, 1662 struct perf_sample *sample, 1663 struct perf_tool *tool) 1664 { 1665 int err = 0; 1666 u64 timestamp; 1667 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 1668 struct cs_etm_auxtrace, 1669 auxtrace); 1670 1671 if (dump_trace) 1672 return 0; 1673 1674 if (!tool->ordered_events) { 1675 pr_err("CoreSight ETM Trace requires ordered events\n"); 1676 return -EINVAL; 1677 } 1678 1679 if (!etm->timeless_decoding) 1680 return -EINVAL; 1681 1682 if (sample->time && (sample->time != (u64) -1)) 1683 timestamp = sample->time; 1684 else 1685 timestamp = 0; 1686 1687 if (timestamp || etm->timeless_decoding) { 1688 err = cs_etm__update_queues(etm); 1689 if (err) 1690 return err; 1691 } 1692 1693 if (event->header.type == PERF_RECORD_EXIT) 1694 return cs_etm__process_timeless_queues(etm, 1695 event->fork.tid); 1696 1697 return 0; 1698 } 1699 1700 static int cs_etm__process_auxtrace_event(struct perf_session *session, 1701 union perf_event *event, 1702 struct perf_tool *tool __maybe_unused) 1703 { 1704 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 1705 struct cs_etm_auxtrace, 1706 auxtrace); 1707 if (!etm->data_queued) { 1708 struct auxtrace_buffer *buffer; 1709 off_t data_offset; 1710 int fd = perf_data__fd(session->data); 1711 bool is_pipe = perf_data__is_pipe(session->data); 1712 int err; 1713 1714 if (is_pipe) 1715 data_offset = 0; 1716 else { 1717 data_offset = lseek(fd, 0, SEEK_CUR); 1718 if (data_offset == -1) 1719 return -errno; 1720 } 1721 1722 err = auxtrace_queues__add_event(&etm->queues, session, 1723 event, data_offset, &buffer); 1724 if (err) 1725 return err; 1726 1727 if (dump_trace) 1728 if (auxtrace_buffer__get_data(buffer, fd)) { 1729 cs_etm__dump_event(etm, buffer); 1730 auxtrace_buffer__put_data(buffer); 1731 } 1732 } 1733 1734 return 0; 1735 } 1736 1737 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) 1738 { 1739 struct perf_evsel *evsel; 1740 struct perf_evlist *evlist = etm->session->evlist; 1741 bool timeless_decoding = true; 1742 1743 /* 1744 * Circle through the list of event and complain if we find one 1745 * with the time bit set. 1746 */ 1747 evlist__for_each_entry(evlist, evsel) { 1748 if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) 1749 timeless_decoding = false; 1750 } 1751 1752 return timeless_decoding; 1753 } 1754 1755 static const char * const cs_etm_global_header_fmts[] = { 1756 [CS_HEADER_VERSION_0] = " Header version %llx\n", 1757 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 1758 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 1759 }; 1760 1761 static const char * const cs_etm_priv_fmts[] = { 1762 [CS_ETM_MAGIC] = " Magic number %llx\n", 1763 [CS_ETM_CPU] = " CPU %lld\n", 1764 [CS_ETM_ETMCR] = " ETMCR %llx\n", 1765 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 1766 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", 1767 [CS_ETM_ETMIDR] = " ETMIDR %llx\n", 1768 }; 1769 1770 static const char * const cs_etmv4_priv_fmts[] = { 1771 [CS_ETM_MAGIC] = " Magic number %llx\n", 1772 [CS_ETM_CPU] = " CPU %lld\n", 1773 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 1774 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 1775 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", 1776 [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", 1777 [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", 1778 [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", 1779 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 1780 }; 1781 1782 static void cs_etm__print_auxtrace_info(u64 *val, int num) 1783 { 1784 int i, j, cpu = 0; 1785 1786 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 1787 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 1788 1789 for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 1790 if (val[i] == __perf_cs_etmv3_magic) 1791 for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 1792 fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 1793 else if (val[i] == __perf_cs_etmv4_magic) 1794 for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 1795 fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 1796 else 1797 /* failure.. return */ 1798 return; 1799 } 1800 } 1801 1802 int cs_etm__process_auxtrace_info(union perf_event *event, 1803 struct perf_session *session) 1804 { 1805 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 1806 struct cs_etm_auxtrace *etm = NULL; 1807 struct int_node *inode; 1808 unsigned int pmu_type; 1809 int event_header_size = sizeof(struct perf_event_header); 1810 int info_header_size; 1811 int total_size = auxtrace_info->header.size; 1812 int priv_size = 0; 1813 int num_cpu; 1814 int err = 0, idx = -1; 1815 int i, j, k; 1816 u64 *ptr, *hdr = NULL; 1817 u64 **metadata = NULL; 1818 1819 /* 1820 * sizeof(auxtrace_info_event::type) + 1821 * sizeof(auxtrace_info_event::reserved) == 8 1822 */ 1823 info_header_size = 8; 1824 1825 if (total_size < (event_header_size + info_header_size)) 1826 return -EINVAL; 1827 1828 priv_size = total_size - event_header_size - info_header_size; 1829 1830 /* First the global part */ 1831 ptr = (u64 *) auxtrace_info->priv; 1832 1833 /* Look for version '0' of the header */ 1834 if (ptr[0] != 0) 1835 return -EINVAL; 1836 1837 hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 1838 if (!hdr) 1839 return -ENOMEM; 1840 1841 /* Extract header information - see cs-etm.h for format */ 1842 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 1843 hdr[i] = ptr[i]; 1844 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 1845 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 1846 0xffffffff); 1847 1848 /* 1849 * Create an RB tree for traceID-metadata tuple. Since the conversion 1850 * has to be made for each packet that gets decoded, optimizing access 1851 * in anything other than a sequential array is worth doing. 1852 */ 1853 traceid_list = intlist__new(NULL); 1854 if (!traceid_list) { 1855 err = -ENOMEM; 1856 goto err_free_hdr; 1857 } 1858 1859 metadata = zalloc(sizeof(*metadata) * num_cpu); 1860 if (!metadata) { 1861 err = -ENOMEM; 1862 goto err_free_traceid_list; 1863 } 1864 1865 /* 1866 * The metadata is stored in the auxtrace_info section and encodes 1867 * the configuration of the ARM embedded trace macrocell which is 1868 * required by the trace decoder to properly decode the trace due 1869 * to its highly compressed nature. 1870 */ 1871 for (j = 0; j < num_cpu; j++) { 1872 if (ptr[i] == __perf_cs_etmv3_magic) { 1873 metadata[j] = zalloc(sizeof(*metadata[j]) * 1874 CS_ETM_PRIV_MAX); 1875 if (!metadata[j]) { 1876 err = -ENOMEM; 1877 goto err_free_metadata; 1878 } 1879 for (k = 0; k < CS_ETM_PRIV_MAX; k++) 1880 metadata[j][k] = ptr[i + k]; 1881 1882 /* The traceID is our handle */ 1883 idx = metadata[j][CS_ETM_ETMTRACEIDR]; 1884 i += CS_ETM_PRIV_MAX; 1885 } else if (ptr[i] == __perf_cs_etmv4_magic) { 1886 metadata[j] = zalloc(sizeof(*metadata[j]) * 1887 CS_ETMV4_PRIV_MAX); 1888 if (!metadata[j]) { 1889 err = -ENOMEM; 1890 goto err_free_metadata; 1891 } 1892 for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 1893 metadata[j][k] = ptr[i + k]; 1894 1895 /* The traceID is our handle */ 1896 idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 1897 i += CS_ETMV4_PRIV_MAX; 1898 } 1899 1900 /* Get an RB node for this CPU */ 1901 inode = intlist__findnew(traceid_list, idx); 1902 1903 /* Something went wrong, no need to continue */ 1904 if (!inode) { 1905 err = PTR_ERR(inode); 1906 goto err_free_metadata; 1907 } 1908 1909 /* 1910 * The node for that CPU should not be taken. 1911 * Back out if that's the case. 1912 */ 1913 if (inode->priv) { 1914 err = -EINVAL; 1915 goto err_free_metadata; 1916 } 1917 /* All good, associate the traceID with the metadata pointer */ 1918 inode->priv = metadata[j]; 1919 } 1920 1921 /* 1922 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 1923 * CS_ETMV4_PRIV_MAX mark how many double words are in the 1924 * global metadata, and each cpu's metadata respectively. 1925 * The following tests if the correct number of double words was 1926 * present in the auxtrace info section. 1927 */ 1928 if (i * 8 != priv_size) { 1929 err = -EINVAL; 1930 goto err_free_metadata; 1931 } 1932 1933 etm = zalloc(sizeof(*etm)); 1934 1935 if (!etm) { 1936 err = -ENOMEM; 1937 goto err_free_metadata; 1938 } 1939 1940 err = auxtrace_queues__init(&etm->queues); 1941 if (err) 1942 goto err_free_etm; 1943 1944 etm->session = session; 1945 etm->machine = &session->machines.host; 1946 1947 etm->num_cpu = num_cpu; 1948 etm->pmu_type = pmu_type; 1949 etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); 1950 etm->metadata = metadata; 1951 etm->auxtrace_type = auxtrace_info->type; 1952 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); 1953 1954 etm->auxtrace.process_event = cs_etm__process_event; 1955 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 1956 etm->auxtrace.flush_events = cs_etm__flush_events; 1957 etm->auxtrace.free_events = cs_etm__free_events; 1958 etm->auxtrace.free = cs_etm__free; 1959 session->auxtrace = &etm->auxtrace; 1960 1961 etm->unknown_thread = thread__new(999999999, 999999999); 1962 if (!etm->unknown_thread) 1963 goto err_free_queues; 1964 1965 /* 1966 * Initialize list node so that at thread__zput() we can avoid 1967 * segmentation fault at list_del_init(). 1968 */ 1969 INIT_LIST_HEAD(&etm->unknown_thread->node); 1970 1971 err = thread__set_comm(etm->unknown_thread, "unknown", 0); 1972 if (err) 1973 goto err_delete_thread; 1974 1975 if (thread__init_map_groups(etm->unknown_thread, etm->machine)) 1976 goto err_delete_thread; 1977 1978 if (dump_trace) { 1979 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 1980 return 0; 1981 } 1982 1983 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1984 etm->synth_opts = *session->itrace_synth_opts; 1985 } else { 1986 itrace_synth_opts__set_default(&etm->synth_opts, 1987 session->itrace_synth_opts->default_no_sample); 1988 etm->synth_opts.callchain = false; 1989 } 1990 1991 err = cs_etm__synth_events(etm, session); 1992 if (err) 1993 goto err_delete_thread; 1994 1995 err = auxtrace_queues__process_index(&etm->queues, session); 1996 if (err) 1997 goto err_delete_thread; 1998 1999 etm->data_queued = etm->queues.populated; 2000 2001 return 0; 2002 2003 err_delete_thread: 2004 thread__zput(etm->unknown_thread); 2005 err_free_queues: 2006 auxtrace_queues__free(&etm->queues); 2007 session->auxtrace = NULL; 2008 err_free_etm: 2009 zfree(&etm); 2010 err_free_metadata: 2011 /* No need to check @metadata[j], free(NULL) is supported */ 2012 for (j = 0; j < num_cpu; j++) 2013 free(metadata[j]); 2014 zfree(&metadata); 2015 err_free_traceid_list: 2016 intlist__delete(traceid_list); 2017 err_free_hdr: 2018 zfree(&hdr); 2019 2020 return -EINVAL; 2021 } 2022