1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2015-2018 Linaro Limited. 4 * 5 * Author: Tor Jeremiassen <tor@ti.com> 6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org> 7 */ 8 9 #include <linux/bitops.h> 10 #include <linux/err.h> 11 #include <linux/kernel.h> 12 #include <linux/log2.h> 13 #include <linux/types.h> 14 15 #include <stdlib.h> 16 17 #include "auxtrace.h" 18 #include "color.h" 19 #include "cs-etm.h" 20 #include "cs-etm-decoder/cs-etm-decoder.h" 21 #include "debug.h" 22 #include "evlist.h" 23 #include "intlist.h" 24 #include "machine.h" 25 #include "map.h" 26 #include "perf.h" 27 #include "thread.h" 28 #include "thread_map.h" 29 #include "thread-stack.h" 30 #include "util.h" 31 32 #define MAX_TIMESTAMP (~0ULL) 33 34 /* 35 * A64 instructions are always 4 bytes 36 * 37 * Only A64 is supported, so can use this constant for converting between 38 * addresses and instruction counts, calculting offsets etc 39 */ 40 #define A64_INSTR_SIZE 4 41 42 struct cs_etm_auxtrace { 43 struct auxtrace auxtrace; 44 struct auxtrace_queues queues; 45 struct auxtrace_heap heap; 46 struct itrace_synth_opts synth_opts; 47 struct perf_session *session; 48 struct machine *machine; 49 struct thread *unknown_thread; 50 51 u8 timeless_decoding; 52 u8 snapshot_mode; 53 u8 data_queued; 54 u8 sample_branches; 55 u8 sample_instructions; 56 57 int num_cpu; 58 u32 auxtrace_type; 59 u64 branches_sample_type; 60 u64 branches_id; 61 u64 instructions_sample_type; 62 u64 instructions_sample_period; 63 u64 instructions_id; 64 u64 **metadata; 65 u64 kernel_start; 66 unsigned int pmu_type; 67 }; 68 69 struct cs_etm_queue { 70 struct cs_etm_auxtrace *etm; 71 struct thread *thread; 72 struct cs_etm_decoder *decoder; 73 struct auxtrace_buffer *buffer; 74 const struct cs_etm_state *state; 75 union perf_event *event_buf; 76 unsigned int queue_nr; 77 pid_t pid, tid; 78 int cpu; 79 u64 time; 80 u64 timestamp; 81 u64 offset; 82 u64 period_instructions; 83 struct branch_stack *last_branch; 84 struct branch_stack *last_branch_rb; 85 size_t last_branch_pos; 86 struct cs_etm_packet *prev_packet; 87 struct cs_etm_packet *packet; 88 }; 89 90 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 91 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 92 pid_t tid, u64 time_); 93 94 static void cs_etm__packet_dump(const char *pkt_string) 95 { 96 const char *color = PERF_COLOR_BLUE; 97 int len = strlen(pkt_string); 98 99 if (len && (pkt_string[len-1] == '\n')) 100 color_fprintf(stdout, color, " %s", pkt_string); 101 else 102 color_fprintf(stdout, color, " %s\n", pkt_string); 103 104 fflush(stdout); 105 } 106 107 static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 108 struct auxtrace_buffer *buffer) 109 { 110 int i, ret; 111 const char *color = PERF_COLOR_BLUE; 112 struct cs_etm_decoder_params d_params; 113 struct cs_etm_trace_params *t_params; 114 struct cs_etm_decoder *decoder; 115 size_t buffer_used = 0; 116 117 fprintf(stdout, "\n"); 118 color_fprintf(stdout, color, 119 ". ... CoreSight ETM Trace data: size %zu bytes\n", 120 buffer->size); 121 122 /* Use metadata to fill in trace parameters for trace decoder */ 123 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 124 for (i = 0; i < etm->num_cpu; i++) { 125 t_params[i].protocol = CS_ETM_PROTO_ETMV4i; 126 t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; 127 t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; 128 t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; 129 t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; 130 t_params[i].etmv4.reg_configr = 131 etm->metadata[i][CS_ETMV4_TRCCONFIGR]; 132 t_params[i].etmv4.reg_traceidr = 133 etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; 134 } 135 136 /* Set decoder parameters to simply print the trace packets */ 137 d_params.packet_printer = cs_etm__packet_dump; 138 d_params.operation = CS_ETM_OPERATION_PRINT; 139 d_params.formatted = true; 140 d_params.fsyncs = false; 141 d_params.hsyncs = false; 142 d_params.frame_aligned = true; 143 144 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 145 146 zfree(&t_params); 147 148 if (!decoder) 149 return; 150 do { 151 size_t consumed; 152 153 ret = cs_etm_decoder__process_data_block( 154 decoder, buffer->offset, 155 &((u8 *)buffer->data)[buffer_used], 156 buffer->size - buffer_used, &consumed); 157 if (ret) 158 break; 159 160 buffer_used += consumed; 161 } while (buffer_used < buffer->size); 162 163 cs_etm_decoder__free(decoder); 164 } 165 166 static int cs_etm__flush_events(struct perf_session *session, 167 struct perf_tool *tool) 168 { 169 int ret; 170 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 171 struct cs_etm_auxtrace, 172 auxtrace); 173 if (dump_trace) 174 return 0; 175 176 if (!tool->ordered_events) 177 return -EINVAL; 178 179 if (!etm->timeless_decoding) 180 return -EINVAL; 181 182 ret = cs_etm__update_queues(etm); 183 184 if (ret < 0) 185 return ret; 186 187 return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); 188 } 189 190 static void cs_etm__free_queue(void *priv) 191 { 192 struct cs_etm_queue *etmq = priv; 193 194 if (!etmq) 195 return; 196 197 thread__zput(etmq->thread); 198 cs_etm_decoder__free(etmq->decoder); 199 zfree(&etmq->event_buf); 200 zfree(&etmq->last_branch); 201 zfree(&etmq->last_branch_rb); 202 zfree(&etmq->prev_packet); 203 zfree(&etmq->packet); 204 free(etmq); 205 } 206 207 static void cs_etm__free_events(struct perf_session *session) 208 { 209 unsigned int i; 210 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 211 struct cs_etm_auxtrace, 212 auxtrace); 213 struct auxtrace_queues *queues = &aux->queues; 214 215 for (i = 0; i < queues->nr_queues; i++) { 216 cs_etm__free_queue(queues->queue_array[i].priv); 217 queues->queue_array[i].priv = NULL; 218 } 219 220 auxtrace_queues__free(queues); 221 } 222 223 static void cs_etm__free(struct perf_session *session) 224 { 225 int i; 226 struct int_node *inode, *tmp; 227 struct cs_etm_auxtrace *aux = container_of(session->auxtrace, 228 struct cs_etm_auxtrace, 229 auxtrace); 230 cs_etm__free_events(session); 231 session->auxtrace = NULL; 232 233 /* First remove all traceID/CPU# nodes for the RB tree */ 234 intlist__for_each_entry_safe(inode, tmp, traceid_list) 235 intlist__remove(traceid_list, inode); 236 /* Then the RB tree itself */ 237 intlist__delete(traceid_list); 238 239 for (i = 0; i < aux->num_cpu; i++) 240 zfree(&aux->metadata[i]); 241 242 zfree(&aux->metadata); 243 zfree(&aux); 244 } 245 246 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, 247 size_t size, u8 *buffer) 248 { 249 u8 cpumode; 250 u64 offset; 251 int len; 252 struct thread *thread; 253 struct machine *machine; 254 struct addr_location al; 255 256 if (!etmq) 257 return -1; 258 259 machine = etmq->etm->machine; 260 if (address >= etmq->etm->kernel_start) 261 cpumode = PERF_RECORD_MISC_KERNEL; 262 else 263 cpumode = PERF_RECORD_MISC_USER; 264 265 thread = etmq->thread; 266 if (!thread) { 267 if (cpumode != PERF_RECORD_MISC_KERNEL) 268 return -EINVAL; 269 thread = etmq->etm->unknown_thread; 270 } 271 272 if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) 273 return 0; 274 275 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 276 dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) 277 return 0; 278 279 offset = al.map->map_ip(al.map, address); 280 281 map__load(al.map); 282 283 len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); 284 285 if (len <= 0) 286 return 0; 287 288 return len; 289 } 290 291 static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, 292 unsigned int queue_nr) 293 { 294 int i; 295 struct cs_etm_decoder_params d_params; 296 struct cs_etm_trace_params *t_params; 297 struct cs_etm_queue *etmq; 298 size_t szp = sizeof(struct cs_etm_packet); 299 300 etmq = zalloc(sizeof(*etmq)); 301 if (!etmq) 302 return NULL; 303 304 etmq->packet = zalloc(szp); 305 if (!etmq->packet) 306 goto out_free; 307 308 if (etm->synth_opts.last_branch || etm->sample_branches) { 309 etmq->prev_packet = zalloc(szp); 310 if (!etmq->prev_packet) 311 goto out_free; 312 } 313 314 if (etm->synth_opts.last_branch) { 315 size_t sz = sizeof(struct branch_stack); 316 317 sz += etm->synth_opts.last_branch_sz * 318 sizeof(struct branch_entry); 319 etmq->last_branch = zalloc(sz); 320 if (!etmq->last_branch) 321 goto out_free; 322 etmq->last_branch_rb = zalloc(sz); 323 if (!etmq->last_branch_rb) 324 goto out_free; 325 } 326 327 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 328 if (!etmq->event_buf) 329 goto out_free; 330 331 etmq->etm = etm; 332 etmq->queue_nr = queue_nr; 333 etmq->pid = -1; 334 etmq->tid = -1; 335 etmq->cpu = -1; 336 337 /* Use metadata to fill in trace parameters for trace decoder */ 338 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 339 340 if (!t_params) 341 goto out_free; 342 343 for (i = 0; i < etm->num_cpu; i++) { 344 t_params[i].protocol = CS_ETM_PROTO_ETMV4i; 345 t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; 346 t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; 347 t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; 348 t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; 349 t_params[i].etmv4.reg_configr = 350 etm->metadata[i][CS_ETMV4_TRCCONFIGR]; 351 t_params[i].etmv4.reg_traceidr = 352 etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; 353 } 354 355 /* Set decoder parameters to simply print the trace packets */ 356 d_params.packet_printer = cs_etm__packet_dump; 357 d_params.operation = CS_ETM_OPERATION_DECODE; 358 d_params.formatted = true; 359 d_params.fsyncs = false; 360 d_params.hsyncs = false; 361 d_params.frame_aligned = true; 362 d_params.data = etmq; 363 364 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 365 366 zfree(&t_params); 367 368 if (!etmq->decoder) 369 goto out_free; 370 371 /* 372 * Register a function to handle all memory accesses required by 373 * the trace decoder library. 374 */ 375 if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 376 0x0L, ((u64) -1L), 377 cs_etm__mem_access)) 378 goto out_free_decoder; 379 380 etmq->offset = 0; 381 etmq->period_instructions = 0; 382 383 return etmq; 384 385 out_free_decoder: 386 cs_etm_decoder__free(etmq->decoder); 387 out_free: 388 zfree(&etmq->event_buf); 389 zfree(&etmq->last_branch); 390 zfree(&etmq->last_branch_rb); 391 zfree(&etmq->prev_packet); 392 zfree(&etmq->packet); 393 free(etmq); 394 395 return NULL; 396 } 397 398 static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, 399 struct auxtrace_queue *queue, 400 unsigned int queue_nr) 401 { 402 struct cs_etm_queue *etmq = queue->priv; 403 404 if (list_empty(&queue->head) || etmq) 405 return 0; 406 407 etmq = cs_etm__alloc_queue(etm, queue_nr); 408 409 if (!etmq) 410 return -ENOMEM; 411 412 queue->priv = etmq; 413 414 if (queue->cpu != -1) 415 etmq->cpu = queue->cpu; 416 417 etmq->tid = queue->tid; 418 419 return 0; 420 } 421 422 static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 423 { 424 unsigned int i; 425 int ret; 426 427 for (i = 0; i < etm->queues.nr_queues; i++) { 428 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 429 if (ret) 430 return ret; 431 } 432 433 return 0; 434 } 435 436 static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) 437 { 438 if (etm->queues.new_data) { 439 etm->queues.new_data = false; 440 return cs_etm__setup_queues(etm); 441 } 442 443 return 0; 444 } 445 446 static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) 447 { 448 struct branch_stack *bs_src = etmq->last_branch_rb; 449 struct branch_stack *bs_dst = etmq->last_branch; 450 size_t nr = 0; 451 452 /* 453 * Set the number of records before early exit: ->nr is used to 454 * determine how many branches to copy from ->entries. 455 */ 456 bs_dst->nr = bs_src->nr; 457 458 /* 459 * Early exit when there is nothing to copy. 460 */ 461 if (!bs_src->nr) 462 return; 463 464 /* 465 * As bs_src->entries is a circular buffer, we need to copy from it in 466 * two steps. First, copy the branches from the most recently inserted 467 * branch ->last_branch_pos until the end of bs_src->entries buffer. 468 */ 469 nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; 470 memcpy(&bs_dst->entries[0], 471 &bs_src->entries[etmq->last_branch_pos], 472 sizeof(struct branch_entry) * nr); 473 474 /* 475 * If we wrapped around at least once, the branches from the beginning 476 * of the bs_src->entries buffer and until the ->last_branch_pos element 477 * are older valid branches: copy them over. The total number of 478 * branches copied over will be equal to the number of branches asked by 479 * the user in last_branch_sz. 480 */ 481 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 482 memcpy(&bs_dst->entries[nr], 483 &bs_src->entries[0], 484 sizeof(struct branch_entry) * etmq->last_branch_pos); 485 } 486 } 487 488 static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) 489 { 490 etmq->last_branch_pos = 0; 491 etmq->last_branch_rb->nr = 0; 492 } 493 494 static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) 495 { 496 /* 497 * The packet records the execution range with an exclusive end address 498 * 499 * A64 instructions are constant size, so the last executed 500 * instruction is A64_INSTR_SIZE before the end address 501 * Will need to do instruction level decode for T32 instructions as 502 * they can be variable size (not yet supported). 503 */ 504 return packet->end_addr - A64_INSTR_SIZE; 505 } 506 507 static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) 508 { 509 /* 510 * Only A64 instructions are currently supported, so can get 511 * instruction count by dividing. 512 * Will need to do instruction level decode for T32 instructions as 513 * they can be variable size (not yet supported). 514 */ 515 return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE; 516 } 517 518 static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet, 519 u64 offset) 520 { 521 /* 522 * Only A64 instructions are currently supported, so can get 523 * instruction address by muliplying. 524 * Will need to do instruction level decode for T32 instructions as 525 * they can be variable size (not yet supported). 526 */ 527 return packet->start_addr + offset * A64_INSTR_SIZE; 528 } 529 530 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) 531 { 532 struct branch_stack *bs = etmq->last_branch_rb; 533 struct branch_entry *be; 534 535 /* 536 * The branches are recorded in a circular buffer in reverse 537 * chronological order: we start recording from the last element of the 538 * buffer down. After writing the first element of the stack, move the 539 * insert position back to the end of the buffer. 540 */ 541 if (!etmq->last_branch_pos) 542 etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 543 544 etmq->last_branch_pos -= 1; 545 546 be = &bs->entries[etmq->last_branch_pos]; 547 be->from = cs_etm__last_executed_instr(etmq->prev_packet); 548 be->to = etmq->packet->start_addr; 549 /* No support for mispredict */ 550 be->flags.mispred = 0; 551 be->flags.predicted = 1; 552 553 /* 554 * Increment bs->nr until reaching the number of last branches asked by 555 * the user on the command line. 556 */ 557 if (bs->nr < etmq->etm->synth_opts.last_branch_sz) 558 bs->nr += 1; 559 } 560 561 static int cs_etm__inject_event(union perf_event *event, 562 struct perf_sample *sample, u64 type) 563 { 564 event->header.size = perf_event__sample_event_size(sample, type, 0); 565 return perf_event__synthesize_sample(event, type, 0, sample); 566 } 567 568 569 static int 570 cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) 571 { 572 struct auxtrace_buffer *aux_buffer = etmq->buffer; 573 struct auxtrace_buffer *old_buffer = aux_buffer; 574 struct auxtrace_queue *queue; 575 576 queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; 577 578 aux_buffer = auxtrace_buffer__next(queue, aux_buffer); 579 580 /* If no more data, drop the previous auxtrace_buffer and return */ 581 if (!aux_buffer) { 582 if (old_buffer) 583 auxtrace_buffer__drop_data(old_buffer); 584 buff->len = 0; 585 return 0; 586 } 587 588 etmq->buffer = aux_buffer; 589 590 /* If the aux_buffer doesn't have data associated, try to load it */ 591 if (!aux_buffer->data) { 592 /* get the file desc associated with the perf data file */ 593 int fd = perf_data__fd(etmq->etm->session->data); 594 595 aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); 596 if (!aux_buffer->data) 597 return -ENOMEM; 598 } 599 600 /* If valid, drop the previous buffer */ 601 if (old_buffer) 602 auxtrace_buffer__drop_data(old_buffer); 603 604 buff->offset = aux_buffer->offset; 605 buff->len = aux_buffer->size; 606 buff->buf = aux_buffer->data; 607 608 buff->ref_timestamp = aux_buffer->reference; 609 610 return buff->len; 611 } 612 613 static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 614 struct auxtrace_queue *queue) 615 { 616 struct cs_etm_queue *etmq = queue->priv; 617 618 /* CPU-wide tracing isn't supported yet */ 619 if (queue->tid == -1) 620 return; 621 622 if ((!etmq->thread) && (etmq->tid != -1)) 623 etmq->thread = machine__find_thread(etm->machine, -1, 624 etmq->tid); 625 626 if (etmq->thread) { 627 etmq->pid = etmq->thread->pid_; 628 if (queue->cpu == -1) 629 etmq->cpu = etmq->thread->cpu; 630 } 631 } 632 633 static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 634 u64 addr, u64 period) 635 { 636 int ret = 0; 637 struct cs_etm_auxtrace *etm = etmq->etm; 638 union perf_event *event = etmq->event_buf; 639 struct perf_sample sample = {.ip = 0,}; 640 641 event->sample.header.type = PERF_RECORD_SAMPLE; 642 event->sample.header.misc = PERF_RECORD_MISC_USER; 643 event->sample.header.size = sizeof(struct perf_event_header); 644 645 sample.ip = addr; 646 sample.pid = etmq->pid; 647 sample.tid = etmq->tid; 648 sample.id = etmq->etm->instructions_id; 649 sample.stream_id = etmq->etm->instructions_id; 650 sample.period = period; 651 sample.cpu = etmq->packet->cpu; 652 sample.flags = 0; 653 sample.insn_len = 1; 654 sample.cpumode = event->header.misc; 655 656 if (etm->synth_opts.last_branch) { 657 cs_etm__copy_last_branch_rb(etmq); 658 sample.branch_stack = etmq->last_branch; 659 } 660 661 if (etm->synth_opts.inject) { 662 ret = cs_etm__inject_event(event, &sample, 663 etm->instructions_sample_type); 664 if (ret) 665 return ret; 666 } 667 668 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 669 670 if (ret) 671 pr_err( 672 "CS ETM Trace: failed to deliver instruction event, error %d\n", 673 ret); 674 675 if (etm->synth_opts.last_branch) 676 cs_etm__reset_last_branch_rb(etmq); 677 678 return ret; 679 } 680 681 /* 682 * The cs etm packet encodes an instruction range between a branch target 683 * and the next taken branch. Generate sample accordingly. 684 */ 685 static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) 686 { 687 int ret = 0; 688 struct cs_etm_auxtrace *etm = etmq->etm; 689 struct perf_sample sample = {.ip = 0,}; 690 union perf_event *event = etmq->event_buf; 691 struct dummy_branch_stack { 692 u64 nr; 693 struct branch_entry entries; 694 } dummy_bs; 695 696 event->sample.header.type = PERF_RECORD_SAMPLE; 697 event->sample.header.misc = PERF_RECORD_MISC_USER; 698 event->sample.header.size = sizeof(struct perf_event_header); 699 700 sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); 701 sample.pid = etmq->pid; 702 sample.tid = etmq->tid; 703 sample.addr = etmq->packet->start_addr; 704 sample.id = etmq->etm->branches_id; 705 sample.stream_id = etmq->etm->branches_id; 706 sample.period = 1; 707 sample.cpu = etmq->packet->cpu; 708 sample.flags = 0; 709 sample.cpumode = PERF_RECORD_MISC_USER; 710 711 /* 712 * perf report cannot handle events without a branch stack 713 */ 714 if (etm->synth_opts.last_branch) { 715 dummy_bs = (struct dummy_branch_stack){ 716 .nr = 1, 717 .entries = { 718 .from = sample.ip, 719 .to = sample.addr, 720 }, 721 }; 722 sample.branch_stack = (struct branch_stack *)&dummy_bs; 723 } 724 725 if (etm->synth_opts.inject) { 726 ret = cs_etm__inject_event(event, &sample, 727 etm->branches_sample_type); 728 if (ret) 729 return ret; 730 } 731 732 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 733 734 if (ret) 735 pr_err( 736 "CS ETM Trace: failed to deliver instruction event, error %d\n", 737 ret); 738 739 return ret; 740 } 741 742 struct cs_etm_synth { 743 struct perf_tool dummy_tool; 744 struct perf_session *session; 745 }; 746 747 static int cs_etm__event_synth(struct perf_tool *tool, 748 union perf_event *event, 749 struct perf_sample *sample __maybe_unused, 750 struct machine *machine __maybe_unused) 751 { 752 struct cs_etm_synth *cs_etm_synth = 753 container_of(tool, struct cs_etm_synth, dummy_tool); 754 755 return perf_session__deliver_synth_event(cs_etm_synth->session, 756 event, NULL); 757 } 758 759 static int cs_etm__synth_event(struct perf_session *session, 760 struct perf_event_attr *attr, u64 id) 761 { 762 struct cs_etm_synth cs_etm_synth; 763 764 memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); 765 cs_etm_synth.session = session; 766 767 return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, 768 &id, cs_etm__event_synth); 769 } 770 771 static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, 772 struct perf_session *session) 773 { 774 struct perf_evlist *evlist = session->evlist; 775 struct perf_evsel *evsel; 776 struct perf_event_attr attr; 777 bool found = false; 778 u64 id; 779 int err; 780 781 evlist__for_each_entry(evlist, evsel) { 782 if (evsel->attr.type == etm->pmu_type) { 783 found = true; 784 break; 785 } 786 } 787 788 if (!found) { 789 pr_debug("No selected events with CoreSight Trace data\n"); 790 return 0; 791 } 792 793 memset(&attr, 0, sizeof(struct perf_event_attr)); 794 attr.size = sizeof(struct perf_event_attr); 795 attr.type = PERF_TYPE_HARDWARE; 796 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; 797 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 798 PERF_SAMPLE_PERIOD; 799 if (etm->timeless_decoding) 800 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 801 else 802 attr.sample_type |= PERF_SAMPLE_TIME; 803 804 attr.exclude_user = evsel->attr.exclude_user; 805 attr.exclude_kernel = evsel->attr.exclude_kernel; 806 attr.exclude_hv = evsel->attr.exclude_hv; 807 attr.exclude_host = evsel->attr.exclude_host; 808 attr.exclude_guest = evsel->attr.exclude_guest; 809 attr.sample_id_all = evsel->attr.sample_id_all; 810 attr.read_format = evsel->attr.read_format; 811 812 /* create new id val to be a fixed offset from evsel id */ 813 id = evsel->id[0] + 1000000000; 814 815 if (!id) 816 id = 1; 817 818 if (etm->synth_opts.branches) { 819 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 820 attr.sample_period = 1; 821 attr.sample_type |= PERF_SAMPLE_ADDR; 822 err = cs_etm__synth_event(session, &attr, id); 823 if (err) 824 return err; 825 etm->sample_branches = true; 826 etm->branches_sample_type = attr.sample_type; 827 etm->branches_id = id; 828 id += 1; 829 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 830 } 831 832 if (etm->synth_opts.last_branch) 833 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 834 835 if (etm->synth_opts.instructions) { 836 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 837 attr.sample_period = etm->synth_opts.period; 838 etm->instructions_sample_period = attr.sample_period; 839 err = cs_etm__synth_event(session, &attr, id); 840 if (err) 841 return err; 842 etm->sample_instructions = true; 843 etm->instructions_sample_type = attr.sample_type; 844 etm->instructions_id = id; 845 id += 1; 846 } 847 848 return 0; 849 } 850 851 static int cs_etm__sample(struct cs_etm_queue *etmq) 852 { 853 struct cs_etm_auxtrace *etm = etmq->etm; 854 struct cs_etm_packet *tmp; 855 int ret; 856 u64 instrs_executed; 857 858 instrs_executed = cs_etm__instr_count(etmq->packet); 859 etmq->period_instructions += instrs_executed; 860 861 /* 862 * Record a branch when the last instruction in 863 * PREV_PACKET is a branch. 864 */ 865 if (etm->synth_opts.last_branch && 866 etmq->prev_packet && 867 etmq->prev_packet->sample_type == CS_ETM_RANGE && 868 etmq->prev_packet->last_instr_taken_branch) 869 cs_etm__update_last_branch_rb(etmq); 870 871 if (etm->sample_instructions && 872 etmq->period_instructions >= etm->instructions_sample_period) { 873 /* 874 * Emit instruction sample periodically 875 * TODO: allow period to be defined in cycles and clock time 876 */ 877 878 /* Get number of instructions executed after the sample point */ 879 u64 instrs_over = etmq->period_instructions - 880 etm->instructions_sample_period; 881 882 /* 883 * Calculate the address of the sampled instruction (-1 as 884 * sample is reported as though instruction has just been 885 * executed, but PC has not advanced to next instruction) 886 */ 887 u64 offset = (instrs_executed - instrs_over - 1); 888 u64 addr = cs_etm__instr_addr(etmq->packet, offset); 889 890 ret = cs_etm__synth_instruction_sample( 891 etmq, addr, etm->instructions_sample_period); 892 if (ret) 893 return ret; 894 895 /* Carry remaining instructions into next sample period */ 896 etmq->period_instructions = instrs_over; 897 } 898 899 if (etm->sample_branches && 900 etmq->prev_packet && 901 etmq->prev_packet->sample_type == CS_ETM_RANGE && 902 etmq->prev_packet->last_instr_taken_branch) { 903 ret = cs_etm__synth_branch_sample(etmq); 904 if (ret) 905 return ret; 906 } 907 908 if (etm->sample_branches || etm->synth_opts.last_branch) { 909 /* 910 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 911 * the next incoming packet. 912 */ 913 tmp = etmq->packet; 914 etmq->packet = etmq->prev_packet; 915 etmq->prev_packet = tmp; 916 } 917 918 return 0; 919 } 920 921 static int cs_etm__flush(struct cs_etm_queue *etmq) 922 { 923 int err = 0; 924 struct cs_etm_packet *tmp; 925 926 if (etmq->etm->synth_opts.last_branch && 927 etmq->prev_packet && 928 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 929 /* 930 * Generate a last branch event for the branches left in the 931 * circular buffer at the end of the trace. 932 * 933 * Use the address of the end of the last reported execution 934 * range 935 */ 936 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 937 938 err = cs_etm__synth_instruction_sample( 939 etmq, addr, 940 etmq->period_instructions); 941 etmq->period_instructions = 0; 942 943 /* 944 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 945 * the next incoming packet. 946 */ 947 tmp = etmq->packet; 948 etmq->packet = etmq->prev_packet; 949 etmq->prev_packet = tmp; 950 } 951 952 return err; 953 } 954 955 static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 956 { 957 struct cs_etm_auxtrace *etm = etmq->etm; 958 struct cs_etm_buffer buffer; 959 size_t buffer_used, processed; 960 int err = 0; 961 962 if (!etm->kernel_start) 963 etm->kernel_start = machine__kernel_start(etm->machine); 964 965 /* Go through each buffer in the queue and decode them one by one */ 966 while (1) { 967 buffer_used = 0; 968 memset(&buffer, 0, sizeof(buffer)); 969 err = cs_etm__get_trace(&buffer, etmq); 970 if (err <= 0) 971 return err; 972 /* 973 * We cannot assume consecutive blocks in the data file are 974 * contiguous, reset the decoder to force re-sync. 975 */ 976 err = cs_etm_decoder__reset(etmq->decoder); 977 if (err != 0) 978 return err; 979 980 /* Run trace decoder until buffer consumed or end of trace */ 981 do { 982 processed = 0; 983 err = cs_etm_decoder__process_data_block( 984 etmq->decoder, 985 etmq->offset, 986 &buffer.buf[buffer_used], 987 buffer.len - buffer_used, 988 &processed); 989 if (err) 990 return err; 991 992 etmq->offset += processed; 993 buffer_used += processed; 994 995 /* Process each packet in this chunk */ 996 while (1) { 997 err = cs_etm_decoder__get_packet(etmq->decoder, 998 etmq->packet); 999 if (err <= 0) 1000 /* 1001 * Stop processing this chunk on 1002 * end of data or error 1003 */ 1004 break; 1005 1006 switch (etmq->packet->sample_type) { 1007 case CS_ETM_RANGE: 1008 /* 1009 * If the packet contains an instruction 1010 * range, generate instruction sequence 1011 * events. 1012 */ 1013 cs_etm__sample(etmq); 1014 break; 1015 case CS_ETM_TRACE_ON: 1016 /* 1017 * Discontinuity in trace, flush 1018 * previous branch stack 1019 */ 1020 cs_etm__flush(etmq); 1021 break; 1022 default: 1023 break; 1024 } 1025 } 1026 } while (buffer.len > buffer_used); 1027 1028 if (err == 0) 1029 /* Flush any remaining branch stack entries */ 1030 err = cs_etm__flush(etmq); 1031 } 1032 1033 return err; 1034 } 1035 1036 static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 1037 pid_t tid, u64 time_) 1038 { 1039 unsigned int i; 1040 struct auxtrace_queues *queues = &etm->queues; 1041 1042 for (i = 0; i < queues->nr_queues; i++) { 1043 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 1044 struct cs_etm_queue *etmq = queue->priv; 1045 1046 if (etmq && ((tid == -1) || (etmq->tid == tid))) { 1047 etmq->time = time_; 1048 cs_etm__set_pid_tid_cpu(etm, queue); 1049 cs_etm__run_decoder(etmq); 1050 } 1051 } 1052 1053 return 0; 1054 } 1055 1056 static int cs_etm__process_event(struct perf_session *session, 1057 union perf_event *event, 1058 struct perf_sample *sample, 1059 struct perf_tool *tool) 1060 { 1061 int err = 0; 1062 u64 timestamp; 1063 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 1064 struct cs_etm_auxtrace, 1065 auxtrace); 1066 1067 if (dump_trace) 1068 return 0; 1069 1070 if (!tool->ordered_events) { 1071 pr_err("CoreSight ETM Trace requires ordered events\n"); 1072 return -EINVAL; 1073 } 1074 1075 if (!etm->timeless_decoding) 1076 return -EINVAL; 1077 1078 if (sample->time && (sample->time != (u64) -1)) 1079 timestamp = sample->time; 1080 else 1081 timestamp = 0; 1082 1083 if (timestamp || etm->timeless_decoding) { 1084 err = cs_etm__update_queues(etm); 1085 if (err) 1086 return err; 1087 } 1088 1089 if (event->header.type == PERF_RECORD_EXIT) 1090 return cs_etm__process_timeless_queues(etm, 1091 event->fork.tid, 1092 sample->time); 1093 1094 return 0; 1095 } 1096 1097 static int cs_etm__process_auxtrace_event(struct perf_session *session, 1098 union perf_event *event, 1099 struct perf_tool *tool __maybe_unused) 1100 { 1101 struct cs_etm_auxtrace *etm = container_of(session->auxtrace, 1102 struct cs_etm_auxtrace, 1103 auxtrace); 1104 if (!etm->data_queued) { 1105 struct auxtrace_buffer *buffer; 1106 off_t data_offset; 1107 int fd = perf_data__fd(session->data); 1108 bool is_pipe = perf_data__is_pipe(session->data); 1109 int err; 1110 1111 if (is_pipe) 1112 data_offset = 0; 1113 else { 1114 data_offset = lseek(fd, 0, SEEK_CUR); 1115 if (data_offset == -1) 1116 return -errno; 1117 } 1118 1119 err = auxtrace_queues__add_event(&etm->queues, session, 1120 event, data_offset, &buffer); 1121 if (err) 1122 return err; 1123 1124 if (dump_trace) 1125 if (auxtrace_buffer__get_data(buffer, fd)) { 1126 cs_etm__dump_event(etm, buffer); 1127 auxtrace_buffer__put_data(buffer); 1128 } 1129 } 1130 1131 return 0; 1132 } 1133 1134 static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) 1135 { 1136 struct perf_evsel *evsel; 1137 struct perf_evlist *evlist = etm->session->evlist; 1138 bool timeless_decoding = true; 1139 1140 /* 1141 * Circle through the list of event and complain if we find one 1142 * with the time bit set. 1143 */ 1144 evlist__for_each_entry(evlist, evsel) { 1145 if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) 1146 timeless_decoding = false; 1147 } 1148 1149 return timeless_decoding; 1150 } 1151 1152 static const char * const cs_etm_global_header_fmts[] = { 1153 [CS_HEADER_VERSION_0] = " Header version %llx\n", 1154 [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", 1155 [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", 1156 }; 1157 1158 static const char * const cs_etm_priv_fmts[] = { 1159 [CS_ETM_MAGIC] = " Magic number %llx\n", 1160 [CS_ETM_CPU] = " CPU %lld\n", 1161 [CS_ETM_ETMCR] = " ETMCR %llx\n", 1162 [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", 1163 [CS_ETM_ETMCCER] = " ETMCCER %llx\n", 1164 [CS_ETM_ETMIDR] = " ETMIDR %llx\n", 1165 }; 1166 1167 static const char * const cs_etmv4_priv_fmts[] = { 1168 [CS_ETM_MAGIC] = " Magic number %llx\n", 1169 [CS_ETM_CPU] = " CPU %lld\n", 1170 [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", 1171 [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", 1172 [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", 1173 [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", 1174 [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", 1175 [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", 1176 [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", 1177 }; 1178 1179 static void cs_etm__print_auxtrace_info(u64 *val, int num) 1180 { 1181 int i, j, cpu = 0; 1182 1183 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 1184 fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); 1185 1186 for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { 1187 if (val[i] == __perf_cs_etmv3_magic) 1188 for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) 1189 fprintf(stdout, cs_etm_priv_fmts[j], val[i]); 1190 else if (val[i] == __perf_cs_etmv4_magic) 1191 for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) 1192 fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); 1193 else 1194 /* failure.. return */ 1195 return; 1196 } 1197 } 1198 1199 int cs_etm__process_auxtrace_info(union perf_event *event, 1200 struct perf_session *session) 1201 { 1202 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 1203 struct cs_etm_auxtrace *etm = NULL; 1204 struct int_node *inode; 1205 unsigned int pmu_type; 1206 int event_header_size = sizeof(struct perf_event_header); 1207 int info_header_size; 1208 int total_size = auxtrace_info->header.size; 1209 int priv_size = 0; 1210 int num_cpu; 1211 int err = 0, idx = -1; 1212 int i, j, k; 1213 u64 *ptr, *hdr = NULL; 1214 u64 **metadata = NULL; 1215 1216 /* 1217 * sizeof(auxtrace_info_event::type) + 1218 * sizeof(auxtrace_info_event::reserved) == 8 1219 */ 1220 info_header_size = 8; 1221 1222 if (total_size < (event_header_size + info_header_size)) 1223 return -EINVAL; 1224 1225 priv_size = total_size - event_header_size - info_header_size; 1226 1227 /* First the global part */ 1228 ptr = (u64 *) auxtrace_info->priv; 1229 1230 /* Look for version '0' of the header */ 1231 if (ptr[0] != 0) 1232 return -EINVAL; 1233 1234 hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); 1235 if (!hdr) 1236 return -ENOMEM; 1237 1238 /* Extract header information - see cs-etm.h for format */ 1239 for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) 1240 hdr[i] = ptr[i]; 1241 num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; 1242 pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & 1243 0xffffffff); 1244 1245 /* 1246 * Create an RB tree for traceID-CPU# tuple. Since the conversion has 1247 * to be made for each packet that gets decoded, optimizing access in 1248 * anything other than a sequential array is worth doing. 1249 */ 1250 traceid_list = intlist__new(NULL); 1251 if (!traceid_list) { 1252 err = -ENOMEM; 1253 goto err_free_hdr; 1254 } 1255 1256 metadata = zalloc(sizeof(*metadata) * num_cpu); 1257 if (!metadata) { 1258 err = -ENOMEM; 1259 goto err_free_traceid_list; 1260 } 1261 1262 /* 1263 * The metadata is stored in the auxtrace_info section and encodes 1264 * the configuration of the ARM embedded trace macrocell which is 1265 * required by the trace decoder to properly decode the trace due 1266 * to its highly compressed nature. 1267 */ 1268 for (j = 0; j < num_cpu; j++) { 1269 if (ptr[i] == __perf_cs_etmv3_magic) { 1270 metadata[j] = zalloc(sizeof(*metadata[j]) * 1271 CS_ETM_PRIV_MAX); 1272 if (!metadata[j]) { 1273 err = -ENOMEM; 1274 goto err_free_metadata; 1275 } 1276 for (k = 0; k < CS_ETM_PRIV_MAX; k++) 1277 metadata[j][k] = ptr[i + k]; 1278 1279 /* The traceID is our handle */ 1280 idx = metadata[j][CS_ETM_ETMTRACEIDR]; 1281 i += CS_ETM_PRIV_MAX; 1282 } else if (ptr[i] == __perf_cs_etmv4_magic) { 1283 metadata[j] = zalloc(sizeof(*metadata[j]) * 1284 CS_ETMV4_PRIV_MAX); 1285 if (!metadata[j]) { 1286 err = -ENOMEM; 1287 goto err_free_metadata; 1288 } 1289 for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) 1290 metadata[j][k] = ptr[i + k]; 1291 1292 /* The traceID is our handle */ 1293 idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; 1294 i += CS_ETMV4_PRIV_MAX; 1295 } 1296 1297 /* Get an RB node for this CPU */ 1298 inode = intlist__findnew(traceid_list, idx); 1299 1300 /* Something went wrong, no need to continue */ 1301 if (!inode) { 1302 err = PTR_ERR(inode); 1303 goto err_free_metadata; 1304 } 1305 1306 /* 1307 * The node for that CPU should not be taken. 1308 * Back out if that's the case. 1309 */ 1310 if (inode->priv) { 1311 err = -EINVAL; 1312 goto err_free_metadata; 1313 } 1314 /* All good, associate the traceID with the CPU# */ 1315 inode->priv = &metadata[j][CS_ETM_CPU]; 1316 } 1317 1318 /* 1319 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and 1320 * CS_ETMV4_PRIV_MAX mark how many double words are in the 1321 * global metadata, and each cpu's metadata respectively. 1322 * The following tests if the correct number of double words was 1323 * present in the auxtrace info section. 1324 */ 1325 if (i * 8 != priv_size) { 1326 err = -EINVAL; 1327 goto err_free_metadata; 1328 } 1329 1330 etm = zalloc(sizeof(*etm)); 1331 1332 if (!etm) { 1333 err = -ENOMEM; 1334 goto err_free_metadata; 1335 } 1336 1337 err = auxtrace_queues__init(&etm->queues); 1338 if (err) 1339 goto err_free_etm; 1340 1341 etm->session = session; 1342 etm->machine = &session->machines.host; 1343 1344 etm->num_cpu = num_cpu; 1345 etm->pmu_type = pmu_type; 1346 etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); 1347 etm->metadata = metadata; 1348 etm->auxtrace_type = auxtrace_info->type; 1349 etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); 1350 1351 etm->auxtrace.process_event = cs_etm__process_event; 1352 etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; 1353 etm->auxtrace.flush_events = cs_etm__flush_events; 1354 etm->auxtrace.free_events = cs_etm__free_events; 1355 etm->auxtrace.free = cs_etm__free; 1356 session->auxtrace = &etm->auxtrace; 1357 1358 if (dump_trace) { 1359 cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); 1360 return 0; 1361 } 1362 1363 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 1364 etm->synth_opts = *session->itrace_synth_opts; 1365 } else { 1366 itrace_synth_opts__set_default(&etm->synth_opts); 1367 etm->synth_opts.callchain = false; 1368 } 1369 1370 err = cs_etm__synth_events(etm, session); 1371 if (err) 1372 goto err_free_queues; 1373 1374 err = auxtrace_queues__process_index(&etm->queues, session); 1375 if (err) 1376 goto err_free_queues; 1377 1378 etm->data_queued = etm->queues.populated; 1379 1380 return 0; 1381 1382 err_free_queues: 1383 auxtrace_queues__free(&etm->queues); 1384 session->auxtrace = NULL; 1385 err_free_etm: 1386 zfree(&etm); 1387 err_free_metadata: 1388 /* No need to check @metadata[j], free(NULL) is supported */ 1389 for (j = 0; j < num_cpu; j++) 1390 free(metadata[j]); 1391 zfree(&metadata); 1392 err_free_traceid_list: 1393 intlist__delete(traceid_list); 1394 err_free_hdr: 1395 zfree(&hdr); 1396 1397 return -EINVAL; 1398 } 1399