1 /* 2 * intel_pt.c: Intel Processor Trace support 3 * Copyright (c) 2013-2015, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 */ 15 16 #include <stdio.h> 17 #include <stdbool.h> 18 #include <errno.h> 19 #include <linux/kernel.h> 20 #include <linux/types.h> 21 22 #include "../perf.h" 23 #include "session.h" 24 #include "machine.h" 25 #include "sort.h" 26 #include "tool.h" 27 #include "event.h" 28 #include "evlist.h" 29 #include "evsel.h" 30 #include "map.h" 31 #include "color.h" 32 #include "util.h" 33 #include "thread.h" 34 #include "thread-stack.h" 35 #include "symbol.h" 36 #include "callchain.h" 37 #include "dso.h" 38 #include "debug.h" 39 #include "auxtrace.h" 40 #include "tsc.h" 41 #include "intel-pt.h" 42 #include "config.h" 43 44 #include "intel-pt-decoder/intel-pt-log.h" 45 #include "intel-pt-decoder/intel-pt-decoder.h" 46 #include "intel-pt-decoder/intel-pt-insn-decoder.h" 47 #include "intel-pt-decoder/intel-pt-pkt-decoder.h" 48 49 #define MAX_TIMESTAMP (~0ULL) 50 51 struct intel_pt { 52 struct auxtrace auxtrace; 53 struct auxtrace_queues queues; 54 struct auxtrace_heap heap; 55 u32 auxtrace_type; 56 struct perf_session *session; 57 struct machine *machine; 58 struct perf_evsel *switch_evsel; 59 struct thread *unknown_thread; 60 bool timeless_decoding; 61 bool sampling_mode; 62 bool snapshot_mode; 63 bool per_cpu_mmaps; 64 bool have_tsc; 65 bool data_queued; 66 bool est_tsc; 67 bool sync_switch; 68 bool mispred_all; 69 int have_sched_switch; 70 u32 pmu_type; 71 u64 kernel_start; 72 u64 switch_ip; 73 u64 ptss_ip; 74 75 struct perf_tsc_conversion tc; 76 bool cap_user_time_zero; 77 78 struct itrace_synth_opts synth_opts; 79 80 bool sample_instructions; 81 u64 instructions_sample_type; 82 u64 instructions_sample_period; 83 u64 instructions_id; 84 85 bool sample_branches; 86 u32 branches_filter; 87 u64 branches_sample_type; 88 u64 branches_id; 89 90 bool sample_transactions; 91 u64 transactions_sample_type; 92 u64 transactions_id; 93 94 bool synth_needs_swap; 95 96 u64 tsc_bit; 97 u64 mtc_bit; 98 u64 mtc_freq_bits; 99 u32 tsc_ctc_ratio_n; 100 u32 tsc_ctc_ratio_d; 101 u64 cyc_bit; 102 u64 noretcomp_bit; 103 unsigned max_non_turbo_ratio; 104 105 unsigned long num_events; 106 }; 107 108 enum switch_state { 109 INTEL_PT_SS_NOT_TRACING, 110 INTEL_PT_SS_UNKNOWN, 111 INTEL_PT_SS_TRACING, 112 INTEL_PT_SS_EXPECTING_SWITCH_EVENT, 113 INTEL_PT_SS_EXPECTING_SWITCH_IP, 114 }; 115 116 struct intel_pt_queue { 117 struct intel_pt *pt; 118 unsigned int queue_nr; 119 struct auxtrace_buffer *buffer; 120 void *decoder; 121 const struct intel_pt_state *state; 122 struct ip_callchain *chain; 123 struct branch_stack *last_branch; 124 struct branch_stack *last_branch_rb; 125 size_t last_branch_pos; 126 union perf_event *event_buf; 127 bool on_heap; 128 bool stop; 129 bool step_through_buffers; 130 bool use_buffer_pid_tid; 131 pid_t pid, tid; 132 int cpu; 133 int switch_state; 134 pid_t next_tid; 135 struct thread *thread; 136 bool exclude_kernel; 137 bool have_sample; 138 u64 time; 139 u64 timestamp; 140 u32 flags; 141 u16 insn_len; 142 u64 last_insn_cnt; 143 }; 144 145 static void intel_pt_dump(struct intel_pt *pt __maybe_unused, 146 unsigned char *buf, size_t len) 147 { 148 struct intel_pt_pkt packet; 149 size_t pos = 0; 150 int ret, pkt_len, i; 151 char desc[INTEL_PT_PKT_DESC_MAX]; 152 const char *color = PERF_COLOR_BLUE; 153 154 color_fprintf(stdout, color, 155 ". ... Intel Processor Trace data: size %zu bytes\n", 156 len); 157 158 while (len) { 159 ret = intel_pt_get_packet(buf, len, &packet); 160 if (ret > 0) 161 pkt_len = ret; 162 else 163 pkt_len = 1; 164 printf("."); 165 color_fprintf(stdout, color, " %08x: ", pos); 166 for (i = 0; i < pkt_len; i++) 167 color_fprintf(stdout, color, " %02x", buf[i]); 168 for (; i < 16; i++) 169 color_fprintf(stdout, color, " "); 170 if (ret > 0) { 171 ret = intel_pt_pkt_desc(&packet, desc, 172 INTEL_PT_PKT_DESC_MAX); 173 if (ret > 0) 174 color_fprintf(stdout, color, " %s\n", desc); 175 } else { 176 color_fprintf(stdout, color, " Bad packet!\n"); 177 } 178 pos += pkt_len; 179 buf += pkt_len; 180 len -= pkt_len; 181 } 182 } 183 184 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, 185 size_t len) 186 { 187 printf(".\n"); 188 intel_pt_dump(pt, buf, len); 189 } 190 191 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, 192 struct auxtrace_buffer *b) 193 { 194 void *start; 195 196 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, 197 pt->have_tsc); 198 if (!start) 199 return -EINVAL; 200 b->use_size = b->data + b->size - start; 201 b->use_data = start; 202 return 0; 203 } 204 205 static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, 206 struct auxtrace_queue *queue, 207 struct auxtrace_buffer *buffer) 208 { 209 if (queue->cpu == -1 && buffer->cpu != -1) 210 ptq->cpu = buffer->cpu; 211 212 ptq->pid = buffer->pid; 213 ptq->tid = buffer->tid; 214 215 intel_pt_log("queue %u cpu %d pid %d tid %d\n", 216 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 217 218 thread__zput(ptq->thread); 219 220 if (ptq->tid != -1) { 221 if (ptq->pid != -1) 222 ptq->thread = machine__findnew_thread(ptq->pt->machine, 223 ptq->pid, 224 ptq->tid); 225 else 226 ptq->thread = machine__find_thread(ptq->pt->machine, -1, 227 ptq->tid); 228 } 229 } 230 231 /* This function assumes data is processed sequentially only */ 232 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 233 { 234 struct intel_pt_queue *ptq = data; 235 struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; 236 struct auxtrace_queue *queue; 237 238 if (ptq->stop) { 239 b->len = 0; 240 return 0; 241 } 242 243 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 244 245 buffer = auxtrace_buffer__next(queue, buffer); 246 if (!buffer) { 247 if (old_buffer) 248 auxtrace_buffer__drop_data(old_buffer); 249 b->len = 0; 250 return 0; 251 } 252 253 ptq->buffer = buffer; 254 255 if (!buffer->data) { 256 int fd = perf_data_file__fd(ptq->pt->session->file); 257 258 buffer->data = auxtrace_buffer__get_data(buffer, fd); 259 if (!buffer->data) 260 return -ENOMEM; 261 } 262 263 if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && 264 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) 265 return -ENOMEM; 266 267 if (old_buffer) 268 auxtrace_buffer__drop_data(old_buffer); 269 270 if (buffer->use_data) { 271 b->len = buffer->use_size; 272 b->buf = buffer->use_data; 273 } else { 274 b->len = buffer->size; 275 b->buf = buffer->data; 276 } 277 b->ref_timestamp = buffer->reference; 278 279 if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && 280 !buffer->consecutive)) { 281 b->consecutive = false; 282 b->trace_nr = buffer->buffer_nr + 1; 283 } else { 284 b->consecutive = true; 285 } 286 287 if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || 288 ptq->tid != buffer->tid)) 289 intel_pt_use_buffer_pid_tid(ptq, queue, buffer); 290 291 if (ptq->step_through_buffers) 292 ptq->stop = true; 293 294 if (!b->len) 295 return intel_pt_get_trace(b, data); 296 297 return 0; 298 } 299 300 struct intel_pt_cache_entry { 301 struct auxtrace_cache_entry entry; 302 u64 insn_cnt; 303 u64 byte_cnt; 304 enum intel_pt_insn_op op; 305 enum intel_pt_insn_branch branch; 306 int length; 307 int32_t rel; 308 }; 309 310 static int intel_pt_config_div(const char *var, const char *value, void *data) 311 { 312 int *d = data; 313 long val; 314 315 if (!strcmp(var, "intel-pt.cache-divisor")) { 316 val = strtol(value, NULL, 0); 317 if (val > 0 && val <= INT_MAX) 318 *d = val; 319 } 320 321 return 0; 322 } 323 324 static int intel_pt_cache_divisor(void) 325 { 326 static int d; 327 328 if (d) 329 return d; 330 331 perf_config(intel_pt_config_div, &d); 332 333 if (!d) 334 d = 64; 335 336 return d; 337 } 338 339 static unsigned int intel_pt_cache_size(struct dso *dso, 340 struct machine *machine) 341 { 342 off_t size; 343 344 size = dso__data_size(dso, machine); 345 size /= intel_pt_cache_divisor(); 346 if (size < 1000) 347 return 10; 348 if (size > (1 << 21)) 349 return 21; 350 return 32 - __builtin_clz(size); 351 } 352 353 static struct auxtrace_cache *intel_pt_cache(struct dso *dso, 354 struct machine *machine) 355 { 356 struct auxtrace_cache *c; 357 unsigned int bits; 358 359 if (dso->auxtrace_cache) 360 return dso->auxtrace_cache; 361 362 bits = intel_pt_cache_size(dso, machine); 363 364 /* Ignoring cache creation failure */ 365 c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); 366 367 dso->auxtrace_cache = c; 368 369 return c; 370 } 371 372 static int intel_pt_cache_add(struct dso *dso, struct machine *machine, 373 u64 offset, u64 insn_cnt, u64 byte_cnt, 374 struct intel_pt_insn *intel_pt_insn) 375 { 376 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 377 struct intel_pt_cache_entry *e; 378 int err; 379 380 if (!c) 381 return -ENOMEM; 382 383 e = auxtrace_cache__alloc_entry(c); 384 if (!e) 385 return -ENOMEM; 386 387 e->insn_cnt = insn_cnt; 388 e->byte_cnt = byte_cnt; 389 e->op = intel_pt_insn->op; 390 e->branch = intel_pt_insn->branch; 391 e->length = intel_pt_insn->length; 392 e->rel = intel_pt_insn->rel; 393 394 err = auxtrace_cache__add(c, offset, &e->entry); 395 if (err) 396 auxtrace_cache__free_entry(c, e); 397 398 return err; 399 } 400 401 static struct intel_pt_cache_entry * 402 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) 403 { 404 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 405 406 if (!c) 407 return NULL; 408 409 return auxtrace_cache__lookup(dso->auxtrace_cache, offset); 410 } 411 412 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, 413 uint64_t *insn_cnt_ptr, uint64_t *ip, 414 uint64_t to_ip, uint64_t max_insn_cnt, 415 void *data) 416 { 417 struct intel_pt_queue *ptq = data; 418 struct machine *machine = ptq->pt->machine; 419 struct thread *thread; 420 struct addr_location al; 421 unsigned char buf[1024]; 422 size_t bufsz; 423 ssize_t len; 424 int x86_64; 425 u8 cpumode; 426 u64 offset, start_offset, start_ip; 427 u64 insn_cnt = 0; 428 bool one_map = true; 429 430 if (to_ip && *ip == to_ip) 431 goto out_no_cache; 432 433 bufsz = intel_pt_insn_max_size(); 434 435 if (*ip >= ptq->pt->kernel_start) 436 cpumode = PERF_RECORD_MISC_KERNEL; 437 else 438 cpumode = PERF_RECORD_MISC_USER; 439 440 thread = ptq->thread; 441 if (!thread) { 442 if (cpumode != PERF_RECORD_MISC_KERNEL) 443 return -EINVAL; 444 thread = ptq->pt->unknown_thread; 445 } 446 447 while (1) { 448 thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); 449 if (!al.map || !al.map->dso) 450 return -EINVAL; 451 452 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 453 dso__data_status_seen(al.map->dso, 454 DSO_DATA_STATUS_SEEN_ITRACE)) 455 return -ENOENT; 456 457 offset = al.map->map_ip(al.map, *ip); 458 459 if (!to_ip && one_map) { 460 struct intel_pt_cache_entry *e; 461 462 e = intel_pt_cache_lookup(al.map->dso, machine, offset); 463 if (e && 464 (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { 465 *insn_cnt_ptr = e->insn_cnt; 466 *ip += e->byte_cnt; 467 intel_pt_insn->op = e->op; 468 intel_pt_insn->branch = e->branch; 469 intel_pt_insn->length = e->length; 470 intel_pt_insn->rel = e->rel; 471 intel_pt_log_insn_no_data(intel_pt_insn, *ip); 472 return 0; 473 } 474 } 475 476 start_offset = offset; 477 start_ip = *ip; 478 479 /* Load maps to ensure dso->is_64_bit has been updated */ 480 map__load(al.map, machine->symbol_filter); 481 482 x86_64 = al.map->dso->is_64_bit; 483 484 while (1) { 485 len = dso__data_read_offset(al.map->dso, machine, 486 offset, buf, bufsz); 487 if (len <= 0) 488 return -EINVAL; 489 490 if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) 491 return -EINVAL; 492 493 intel_pt_log_insn(intel_pt_insn, *ip); 494 495 insn_cnt += 1; 496 497 if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) 498 goto out; 499 500 if (max_insn_cnt && insn_cnt >= max_insn_cnt) 501 goto out_no_cache; 502 503 *ip += intel_pt_insn->length; 504 505 if (to_ip && *ip == to_ip) 506 goto out_no_cache; 507 508 if (*ip >= al.map->end) 509 break; 510 511 offset += intel_pt_insn->length; 512 } 513 one_map = false; 514 } 515 out: 516 *insn_cnt_ptr = insn_cnt; 517 518 if (!one_map) 519 goto out_no_cache; 520 521 /* 522 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate 523 * entries. 524 */ 525 if (to_ip) { 526 struct intel_pt_cache_entry *e; 527 528 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); 529 if (e) 530 return 0; 531 } 532 533 /* Ignore cache errors */ 534 intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, 535 *ip - start_ip, intel_pt_insn); 536 537 return 0; 538 539 out_no_cache: 540 *insn_cnt_ptr = insn_cnt; 541 return 0; 542 } 543 544 static bool intel_pt_get_config(struct intel_pt *pt, 545 struct perf_event_attr *attr, u64 *config) 546 { 547 if (attr->type == pt->pmu_type) { 548 if (config) 549 *config = attr->config; 550 return true; 551 } 552 553 return false; 554 } 555 556 static bool intel_pt_exclude_kernel(struct intel_pt *pt) 557 { 558 struct perf_evsel *evsel; 559 560 evlist__for_each_entry(pt->session->evlist, evsel) { 561 if (intel_pt_get_config(pt, &evsel->attr, NULL) && 562 !evsel->attr.exclude_kernel) 563 return false; 564 } 565 return true; 566 } 567 568 static bool intel_pt_return_compression(struct intel_pt *pt) 569 { 570 struct perf_evsel *evsel; 571 u64 config; 572 573 if (!pt->noretcomp_bit) 574 return true; 575 576 evlist__for_each_entry(pt->session->evlist, evsel) { 577 if (intel_pt_get_config(pt, &evsel->attr, &config) && 578 (config & pt->noretcomp_bit)) 579 return false; 580 } 581 return true; 582 } 583 584 static unsigned int intel_pt_mtc_period(struct intel_pt *pt) 585 { 586 struct perf_evsel *evsel; 587 unsigned int shift; 588 u64 config; 589 590 if (!pt->mtc_freq_bits) 591 return 0; 592 593 for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) 594 config >>= 1; 595 596 evlist__for_each_entry(pt->session->evlist, evsel) { 597 if (intel_pt_get_config(pt, &evsel->attr, &config)) 598 return (config & pt->mtc_freq_bits) >> shift; 599 } 600 return 0; 601 } 602 603 static bool intel_pt_timeless_decoding(struct intel_pt *pt) 604 { 605 struct perf_evsel *evsel; 606 bool timeless_decoding = true; 607 u64 config; 608 609 if (!pt->tsc_bit || !pt->cap_user_time_zero) 610 return true; 611 612 evlist__for_each_entry(pt->session->evlist, evsel) { 613 if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) 614 return true; 615 if (intel_pt_get_config(pt, &evsel->attr, &config)) { 616 if (config & pt->tsc_bit) 617 timeless_decoding = false; 618 else 619 return true; 620 } 621 } 622 return timeless_decoding; 623 } 624 625 static bool intel_pt_tracing_kernel(struct intel_pt *pt) 626 { 627 struct perf_evsel *evsel; 628 629 evlist__for_each_entry(pt->session->evlist, evsel) { 630 if (intel_pt_get_config(pt, &evsel->attr, NULL) && 631 !evsel->attr.exclude_kernel) 632 return true; 633 } 634 return false; 635 } 636 637 static bool intel_pt_have_tsc(struct intel_pt *pt) 638 { 639 struct perf_evsel *evsel; 640 bool have_tsc = false; 641 u64 config; 642 643 if (!pt->tsc_bit) 644 return false; 645 646 evlist__for_each_entry(pt->session->evlist, evsel) { 647 if (intel_pt_get_config(pt, &evsel->attr, &config)) { 648 if (config & pt->tsc_bit) 649 have_tsc = true; 650 else 651 return false; 652 } 653 } 654 return have_tsc; 655 } 656 657 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) 658 { 659 u64 quot, rem; 660 661 quot = ns / pt->tc.time_mult; 662 rem = ns % pt->tc.time_mult; 663 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / 664 pt->tc.time_mult; 665 } 666 667 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, 668 unsigned int queue_nr) 669 { 670 struct intel_pt_params params = { .get_trace = 0, }; 671 struct intel_pt_queue *ptq; 672 673 ptq = zalloc(sizeof(struct intel_pt_queue)); 674 if (!ptq) 675 return NULL; 676 677 if (pt->synth_opts.callchain) { 678 size_t sz = sizeof(struct ip_callchain); 679 680 sz += pt->synth_opts.callchain_sz * sizeof(u64); 681 ptq->chain = zalloc(sz); 682 if (!ptq->chain) 683 goto out_free; 684 } 685 686 if (pt->synth_opts.last_branch) { 687 size_t sz = sizeof(struct branch_stack); 688 689 sz += pt->synth_opts.last_branch_sz * 690 sizeof(struct branch_entry); 691 ptq->last_branch = zalloc(sz); 692 if (!ptq->last_branch) 693 goto out_free; 694 ptq->last_branch_rb = zalloc(sz); 695 if (!ptq->last_branch_rb) 696 goto out_free; 697 } 698 699 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 700 if (!ptq->event_buf) 701 goto out_free; 702 703 ptq->pt = pt; 704 ptq->queue_nr = queue_nr; 705 ptq->exclude_kernel = intel_pt_exclude_kernel(pt); 706 ptq->pid = -1; 707 ptq->tid = -1; 708 ptq->cpu = -1; 709 ptq->next_tid = -1; 710 711 params.get_trace = intel_pt_get_trace; 712 params.walk_insn = intel_pt_walk_next_insn; 713 params.data = ptq; 714 params.return_compression = intel_pt_return_compression(pt); 715 params.max_non_turbo_ratio = pt->max_non_turbo_ratio; 716 params.mtc_period = intel_pt_mtc_period(pt); 717 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; 718 params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; 719 720 if (pt->synth_opts.instructions) { 721 if (pt->synth_opts.period) { 722 switch (pt->synth_opts.period_type) { 723 case PERF_ITRACE_PERIOD_INSTRUCTIONS: 724 params.period_type = 725 INTEL_PT_PERIOD_INSTRUCTIONS; 726 params.period = pt->synth_opts.period; 727 break; 728 case PERF_ITRACE_PERIOD_TICKS: 729 params.period_type = INTEL_PT_PERIOD_TICKS; 730 params.period = pt->synth_opts.period; 731 break; 732 case PERF_ITRACE_PERIOD_NANOSECS: 733 params.period_type = INTEL_PT_PERIOD_TICKS; 734 params.period = intel_pt_ns_to_ticks(pt, 735 pt->synth_opts.period); 736 break; 737 default: 738 break; 739 } 740 } 741 742 if (!params.period) { 743 params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; 744 params.period = 1; 745 } 746 } 747 748 ptq->decoder = intel_pt_decoder_new(¶ms); 749 if (!ptq->decoder) 750 goto out_free; 751 752 return ptq; 753 754 out_free: 755 zfree(&ptq->event_buf); 756 zfree(&ptq->last_branch); 757 zfree(&ptq->last_branch_rb); 758 zfree(&ptq->chain); 759 free(ptq); 760 return NULL; 761 } 762 763 static void intel_pt_free_queue(void *priv) 764 { 765 struct intel_pt_queue *ptq = priv; 766 767 if (!ptq) 768 return; 769 thread__zput(ptq->thread); 770 intel_pt_decoder_free(ptq->decoder); 771 zfree(&ptq->event_buf); 772 zfree(&ptq->last_branch); 773 zfree(&ptq->last_branch_rb); 774 zfree(&ptq->chain); 775 free(ptq); 776 } 777 778 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, 779 struct auxtrace_queue *queue) 780 { 781 struct intel_pt_queue *ptq = queue->priv; 782 783 if (queue->tid == -1 || pt->have_sched_switch) { 784 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); 785 thread__zput(ptq->thread); 786 } 787 788 if (!ptq->thread && ptq->tid != -1) 789 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); 790 791 if (ptq->thread) { 792 ptq->pid = ptq->thread->pid_; 793 if (queue->cpu == -1) 794 ptq->cpu = ptq->thread->cpu; 795 } 796 } 797 798 static void intel_pt_sample_flags(struct intel_pt_queue *ptq) 799 { 800 if (ptq->state->flags & INTEL_PT_ABORT_TX) { 801 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; 802 } else if (ptq->state->flags & INTEL_PT_ASYNC) { 803 if (ptq->state->to_ip) 804 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | 805 PERF_IP_FLAG_ASYNC | 806 PERF_IP_FLAG_INTERRUPT; 807 else 808 ptq->flags = PERF_IP_FLAG_BRANCH | 809 PERF_IP_FLAG_TRACE_END; 810 ptq->insn_len = 0; 811 } else { 812 if (ptq->state->from_ip) 813 ptq->flags = intel_pt_insn_type(ptq->state->insn_op); 814 else 815 ptq->flags = PERF_IP_FLAG_BRANCH | 816 PERF_IP_FLAG_TRACE_BEGIN; 817 if (ptq->state->flags & INTEL_PT_IN_TX) 818 ptq->flags |= PERF_IP_FLAG_IN_TX; 819 ptq->insn_len = ptq->state->insn_len; 820 } 821 } 822 823 static int intel_pt_setup_queue(struct intel_pt *pt, 824 struct auxtrace_queue *queue, 825 unsigned int queue_nr) 826 { 827 struct intel_pt_queue *ptq = queue->priv; 828 829 if (list_empty(&queue->head)) 830 return 0; 831 832 if (!ptq) { 833 ptq = intel_pt_alloc_queue(pt, queue_nr); 834 if (!ptq) 835 return -ENOMEM; 836 queue->priv = ptq; 837 838 if (queue->cpu != -1) 839 ptq->cpu = queue->cpu; 840 ptq->tid = queue->tid; 841 842 if (pt->sampling_mode) { 843 if (pt->timeless_decoding) 844 ptq->step_through_buffers = true; 845 if (pt->timeless_decoding || !pt->have_sched_switch) 846 ptq->use_buffer_pid_tid = true; 847 } 848 } 849 850 if (!ptq->on_heap && 851 (!pt->sync_switch || 852 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { 853 const struct intel_pt_state *state; 854 int ret; 855 856 if (pt->timeless_decoding) 857 return 0; 858 859 intel_pt_log("queue %u getting timestamp\n", queue_nr); 860 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 861 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 862 while (1) { 863 state = intel_pt_decode(ptq->decoder); 864 if (state->err) { 865 if (state->err == INTEL_PT_ERR_NODATA) { 866 intel_pt_log("queue %u has no timestamp\n", 867 queue_nr); 868 return 0; 869 } 870 continue; 871 } 872 if (state->timestamp) 873 break; 874 } 875 876 ptq->timestamp = state->timestamp; 877 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", 878 queue_nr, ptq->timestamp); 879 ptq->state = state; 880 ptq->have_sample = true; 881 intel_pt_sample_flags(ptq); 882 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 883 if (ret) 884 return ret; 885 ptq->on_heap = true; 886 } 887 888 return 0; 889 } 890 891 static int intel_pt_setup_queues(struct intel_pt *pt) 892 { 893 unsigned int i; 894 int ret; 895 896 for (i = 0; i < pt->queues.nr_queues; i++) { 897 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); 898 if (ret) 899 return ret; 900 } 901 return 0; 902 } 903 904 static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq) 905 { 906 struct branch_stack *bs_src = ptq->last_branch_rb; 907 struct branch_stack *bs_dst = ptq->last_branch; 908 size_t nr = 0; 909 910 bs_dst->nr = bs_src->nr; 911 912 if (!bs_src->nr) 913 return; 914 915 nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos; 916 memcpy(&bs_dst->entries[0], 917 &bs_src->entries[ptq->last_branch_pos], 918 sizeof(struct branch_entry) * nr); 919 920 if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) { 921 memcpy(&bs_dst->entries[nr], 922 &bs_src->entries[0], 923 sizeof(struct branch_entry) * ptq->last_branch_pos); 924 } 925 } 926 927 static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq) 928 { 929 ptq->last_branch_pos = 0; 930 ptq->last_branch_rb->nr = 0; 931 } 932 933 static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq) 934 { 935 const struct intel_pt_state *state = ptq->state; 936 struct branch_stack *bs = ptq->last_branch_rb; 937 struct branch_entry *be; 938 939 if (!ptq->last_branch_pos) 940 ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz; 941 942 ptq->last_branch_pos -= 1; 943 944 be = &bs->entries[ptq->last_branch_pos]; 945 be->from = state->from_ip; 946 be->to = state->to_ip; 947 be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX); 948 be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX); 949 /* No support for mispredict */ 950 be->flags.mispred = ptq->pt->mispred_all; 951 952 if (bs->nr < ptq->pt->synth_opts.last_branch_sz) 953 bs->nr += 1; 954 } 955 956 static int intel_pt_inject_event(union perf_event *event, 957 struct perf_sample *sample, u64 type, 958 bool swapped) 959 { 960 event->header.size = perf_event__sample_event_size(sample, type, 0); 961 return perf_event__synthesize_sample(event, type, 0, sample, swapped); 962 } 963 964 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) 965 { 966 int ret; 967 struct intel_pt *pt = ptq->pt; 968 union perf_event *event = ptq->event_buf; 969 struct perf_sample sample = { .ip = 0, }; 970 struct dummy_branch_stack { 971 u64 nr; 972 struct branch_entry entries; 973 } dummy_bs; 974 975 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 976 return 0; 977 978 if (pt->synth_opts.initial_skip && 979 pt->num_events++ < pt->synth_opts.initial_skip) 980 return 0; 981 982 event->sample.header.type = PERF_RECORD_SAMPLE; 983 event->sample.header.misc = PERF_RECORD_MISC_USER; 984 event->sample.header.size = sizeof(struct perf_event_header); 985 986 if (!pt->timeless_decoding) 987 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 988 989 sample.cpumode = PERF_RECORD_MISC_USER; 990 sample.ip = ptq->state->from_ip; 991 sample.pid = ptq->pid; 992 sample.tid = ptq->tid; 993 sample.addr = ptq->state->to_ip; 994 sample.id = ptq->pt->branches_id; 995 sample.stream_id = ptq->pt->branches_id; 996 sample.period = 1; 997 sample.cpu = ptq->cpu; 998 sample.flags = ptq->flags; 999 sample.insn_len = ptq->insn_len; 1000 1001 /* 1002 * perf report cannot handle events without a branch stack when using 1003 * SORT_MODE__BRANCH so make a dummy one. 1004 */ 1005 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { 1006 dummy_bs = (struct dummy_branch_stack){ 1007 .nr = 1, 1008 .entries = { 1009 .from = sample.ip, 1010 .to = sample.addr, 1011 }, 1012 }; 1013 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1014 } 1015 1016 if (pt->synth_opts.inject) { 1017 ret = intel_pt_inject_event(event, &sample, 1018 pt->branches_sample_type, 1019 pt->synth_needs_swap); 1020 if (ret) 1021 return ret; 1022 } 1023 1024 ret = perf_session__deliver_synth_event(pt->session, event, &sample); 1025 if (ret) 1026 pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", 1027 ret); 1028 1029 return ret; 1030 } 1031 1032 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) 1033 { 1034 int ret; 1035 struct intel_pt *pt = ptq->pt; 1036 union perf_event *event = ptq->event_buf; 1037 struct perf_sample sample = { .ip = 0, }; 1038 1039 if (pt->synth_opts.initial_skip && 1040 pt->num_events++ < pt->synth_opts.initial_skip) 1041 return 0; 1042 1043 event->sample.header.type = PERF_RECORD_SAMPLE; 1044 event->sample.header.misc = PERF_RECORD_MISC_USER; 1045 event->sample.header.size = sizeof(struct perf_event_header); 1046 1047 if (!pt->timeless_decoding) 1048 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1049 1050 sample.cpumode = PERF_RECORD_MISC_USER; 1051 sample.ip = ptq->state->from_ip; 1052 sample.pid = ptq->pid; 1053 sample.tid = ptq->tid; 1054 sample.addr = ptq->state->to_ip; 1055 sample.id = ptq->pt->instructions_id; 1056 sample.stream_id = ptq->pt->instructions_id; 1057 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; 1058 sample.cpu = ptq->cpu; 1059 sample.flags = ptq->flags; 1060 sample.insn_len = ptq->insn_len; 1061 1062 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1063 1064 if (pt->synth_opts.callchain) { 1065 thread_stack__sample(ptq->thread, ptq->chain, 1066 pt->synth_opts.callchain_sz, sample.ip); 1067 sample.callchain = ptq->chain; 1068 } 1069 1070 if (pt->synth_opts.last_branch) { 1071 intel_pt_copy_last_branch_rb(ptq); 1072 sample.branch_stack = ptq->last_branch; 1073 } 1074 1075 if (pt->synth_opts.inject) { 1076 ret = intel_pt_inject_event(event, &sample, 1077 pt->instructions_sample_type, 1078 pt->synth_needs_swap); 1079 if (ret) 1080 return ret; 1081 } 1082 1083 ret = perf_session__deliver_synth_event(pt->session, event, &sample); 1084 if (ret) 1085 pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", 1086 ret); 1087 1088 if (pt->synth_opts.last_branch) 1089 intel_pt_reset_last_branch_rb(ptq); 1090 1091 return ret; 1092 } 1093 1094 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) 1095 { 1096 int ret; 1097 struct intel_pt *pt = ptq->pt; 1098 union perf_event *event = ptq->event_buf; 1099 struct perf_sample sample = { .ip = 0, }; 1100 1101 if (pt->synth_opts.initial_skip && 1102 pt->num_events++ < pt->synth_opts.initial_skip) 1103 return 0; 1104 1105 event->sample.header.type = PERF_RECORD_SAMPLE; 1106 event->sample.header.misc = PERF_RECORD_MISC_USER; 1107 event->sample.header.size = sizeof(struct perf_event_header); 1108 1109 if (!pt->timeless_decoding) 1110 sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1111 1112 sample.cpumode = PERF_RECORD_MISC_USER; 1113 sample.ip = ptq->state->from_ip; 1114 sample.pid = ptq->pid; 1115 sample.tid = ptq->tid; 1116 sample.addr = ptq->state->to_ip; 1117 sample.id = ptq->pt->transactions_id; 1118 sample.stream_id = ptq->pt->transactions_id; 1119 sample.period = 1; 1120 sample.cpu = ptq->cpu; 1121 sample.flags = ptq->flags; 1122 sample.insn_len = ptq->insn_len; 1123 1124 if (pt->synth_opts.callchain) { 1125 thread_stack__sample(ptq->thread, ptq->chain, 1126 pt->synth_opts.callchain_sz, sample.ip); 1127 sample.callchain = ptq->chain; 1128 } 1129 1130 if (pt->synth_opts.last_branch) { 1131 intel_pt_copy_last_branch_rb(ptq); 1132 sample.branch_stack = ptq->last_branch; 1133 } 1134 1135 if (pt->synth_opts.inject) { 1136 ret = intel_pt_inject_event(event, &sample, 1137 pt->transactions_sample_type, 1138 pt->synth_needs_swap); 1139 if (ret) 1140 return ret; 1141 } 1142 1143 ret = perf_session__deliver_synth_event(pt->session, event, &sample); 1144 if (ret) 1145 pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", 1146 ret); 1147 1148 if (pt->synth_opts.last_branch) 1149 intel_pt_reset_last_branch_rb(ptq); 1150 1151 return ret; 1152 } 1153 1154 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1155 pid_t pid, pid_t tid, u64 ip) 1156 { 1157 union perf_event event; 1158 char msg[MAX_AUXTRACE_ERROR_MSG]; 1159 int err; 1160 1161 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); 1162 1163 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 1164 code, cpu, pid, tid, ip, msg); 1165 1166 err = perf_session__deliver_synth_event(pt->session, &event, NULL); 1167 if (err) 1168 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", 1169 err); 1170 1171 return err; 1172 } 1173 1174 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) 1175 { 1176 struct auxtrace_queue *queue; 1177 pid_t tid = ptq->next_tid; 1178 int err; 1179 1180 if (tid == -1) 1181 return 0; 1182 1183 intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); 1184 1185 err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); 1186 1187 queue = &pt->queues.queue_array[ptq->queue_nr]; 1188 intel_pt_set_pid_tid_cpu(pt, queue); 1189 1190 ptq->next_tid = -1; 1191 1192 return err; 1193 } 1194 1195 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) 1196 { 1197 struct intel_pt *pt = ptq->pt; 1198 1199 return ip == pt->switch_ip && 1200 (ptq->flags & PERF_IP_FLAG_BRANCH) && 1201 !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | 1202 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); 1203 } 1204 1205 static int intel_pt_sample(struct intel_pt_queue *ptq) 1206 { 1207 const struct intel_pt_state *state = ptq->state; 1208 struct intel_pt *pt = ptq->pt; 1209 int err; 1210 1211 if (!ptq->have_sample) 1212 return 0; 1213 1214 ptq->have_sample = false; 1215 1216 if (pt->sample_instructions && 1217 (state->type & INTEL_PT_INSTRUCTION) && 1218 (!pt->synth_opts.initial_skip || 1219 pt->num_events++ >= pt->synth_opts.initial_skip)) { 1220 err = intel_pt_synth_instruction_sample(ptq); 1221 if (err) 1222 return err; 1223 } 1224 1225 if (pt->sample_transactions && 1226 (state->type & INTEL_PT_TRANSACTION) && 1227 (!pt->synth_opts.initial_skip || 1228 pt->num_events++ >= pt->synth_opts.initial_skip)) { 1229 err = intel_pt_synth_transaction_sample(ptq); 1230 if (err) 1231 return err; 1232 } 1233 1234 if (!(state->type & INTEL_PT_BRANCH)) 1235 return 0; 1236 1237 if (pt->synth_opts.callchain || pt->synth_opts.thread_stack) 1238 thread_stack__event(ptq->thread, ptq->flags, state->from_ip, 1239 state->to_ip, ptq->insn_len, 1240 state->trace_nr); 1241 else 1242 thread_stack__set_trace_nr(ptq->thread, state->trace_nr); 1243 1244 if (pt->sample_branches) { 1245 err = intel_pt_synth_branch_sample(ptq); 1246 if (err) 1247 return err; 1248 } 1249 1250 if (pt->synth_opts.last_branch) 1251 intel_pt_update_last_branch_rb(ptq); 1252 1253 if (!pt->sync_switch) 1254 return 0; 1255 1256 if (intel_pt_is_switch_ip(ptq, state->to_ip)) { 1257 switch (ptq->switch_state) { 1258 case INTEL_PT_SS_UNKNOWN: 1259 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 1260 err = intel_pt_next_tid(pt, ptq); 1261 if (err) 1262 return err; 1263 ptq->switch_state = INTEL_PT_SS_TRACING; 1264 break; 1265 default: 1266 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; 1267 return 1; 1268 } 1269 } else if (!state->to_ip) { 1270 ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 1271 } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { 1272 ptq->switch_state = INTEL_PT_SS_UNKNOWN; 1273 } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && 1274 state->to_ip == pt->ptss_ip && 1275 (ptq->flags & PERF_IP_FLAG_CALL)) { 1276 ptq->switch_state = INTEL_PT_SS_TRACING; 1277 } 1278 1279 return 0; 1280 } 1281 1282 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) 1283 { 1284 struct machine *machine = pt->machine; 1285 struct map *map; 1286 struct symbol *sym, *start; 1287 u64 ip, switch_ip = 0; 1288 const char *ptss; 1289 1290 if (ptss_ip) 1291 *ptss_ip = 0; 1292 1293 map = machine__kernel_map(machine); 1294 if (!map) 1295 return 0; 1296 1297 if (map__load(map, machine->symbol_filter)) 1298 return 0; 1299 1300 start = dso__first_symbol(map->dso, MAP__FUNCTION); 1301 1302 for (sym = start; sym; sym = dso__next_symbol(sym)) { 1303 if (sym->binding == STB_GLOBAL && 1304 !strcmp(sym->name, "__switch_to")) { 1305 ip = map->unmap_ip(map, sym->start); 1306 if (ip >= map->start && ip < map->end) { 1307 switch_ip = ip; 1308 break; 1309 } 1310 } 1311 } 1312 1313 if (!switch_ip || !ptss_ip) 1314 return 0; 1315 1316 if (pt->have_sched_switch == 1) 1317 ptss = "perf_trace_sched_switch"; 1318 else 1319 ptss = "__perf_event_task_sched_out"; 1320 1321 for (sym = start; sym; sym = dso__next_symbol(sym)) { 1322 if (!strcmp(sym->name, ptss)) { 1323 ip = map->unmap_ip(map, sym->start); 1324 if (ip >= map->start && ip < map->end) { 1325 *ptss_ip = ip; 1326 break; 1327 } 1328 } 1329 } 1330 1331 return switch_ip; 1332 } 1333 1334 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 1335 { 1336 const struct intel_pt_state *state = ptq->state; 1337 struct intel_pt *pt = ptq->pt; 1338 int err; 1339 1340 if (!pt->kernel_start) { 1341 pt->kernel_start = machine__kernel_start(pt->machine); 1342 if (pt->per_cpu_mmaps && 1343 (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && 1344 !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && 1345 !pt->sampling_mode) { 1346 pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); 1347 if (pt->switch_ip) { 1348 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", 1349 pt->switch_ip, pt->ptss_ip); 1350 pt->sync_switch = true; 1351 } 1352 } 1353 } 1354 1355 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1356 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 1357 while (1) { 1358 err = intel_pt_sample(ptq); 1359 if (err) 1360 return err; 1361 1362 state = intel_pt_decode(ptq->decoder); 1363 if (state->err) { 1364 if (state->err == INTEL_PT_ERR_NODATA) 1365 return 1; 1366 if (pt->sync_switch && 1367 state->from_ip >= pt->kernel_start) { 1368 pt->sync_switch = false; 1369 intel_pt_next_tid(pt, ptq); 1370 } 1371 if (pt->synth_opts.errors) { 1372 err = intel_pt_synth_error(pt, state->err, 1373 ptq->cpu, ptq->pid, 1374 ptq->tid, 1375 state->from_ip); 1376 if (err) 1377 return err; 1378 } 1379 continue; 1380 } 1381 1382 ptq->state = state; 1383 ptq->have_sample = true; 1384 intel_pt_sample_flags(ptq); 1385 1386 /* Use estimated TSC upon return to user space */ 1387 if (pt->est_tsc && 1388 (state->from_ip >= pt->kernel_start || !state->from_ip) && 1389 state->to_ip && state->to_ip < pt->kernel_start) { 1390 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 1391 state->timestamp, state->est_timestamp); 1392 ptq->timestamp = state->est_timestamp; 1393 /* Use estimated TSC in unknown switch state */ 1394 } else if (pt->sync_switch && 1395 ptq->switch_state == INTEL_PT_SS_UNKNOWN && 1396 intel_pt_is_switch_ip(ptq, state->to_ip) && 1397 ptq->next_tid == -1) { 1398 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 1399 state->timestamp, state->est_timestamp); 1400 ptq->timestamp = state->est_timestamp; 1401 } else if (state->timestamp > ptq->timestamp) { 1402 ptq->timestamp = state->timestamp; 1403 } 1404 1405 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { 1406 *timestamp = ptq->timestamp; 1407 return 0; 1408 } 1409 } 1410 return 0; 1411 } 1412 1413 static inline int intel_pt_update_queues(struct intel_pt *pt) 1414 { 1415 if (pt->queues.new_data) { 1416 pt->queues.new_data = false; 1417 return intel_pt_setup_queues(pt); 1418 } 1419 return 0; 1420 } 1421 1422 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) 1423 { 1424 unsigned int queue_nr; 1425 u64 ts; 1426 int ret; 1427 1428 while (1) { 1429 struct auxtrace_queue *queue; 1430 struct intel_pt_queue *ptq; 1431 1432 if (!pt->heap.heap_cnt) 1433 return 0; 1434 1435 if (pt->heap.heap_array[0].ordinal >= timestamp) 1436 return 0; 1437 1438 queue_nr = pt->heap.heap_array[0].queue_nr; 1439 queue = &pt->queues.queue_array[queue_nr]; 1440 ptq = queue->priv; 1441 1442 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", 1443 queue_nr, pt->heap.heap_array[0].ordinal, 1444 timestamp); 1445 1446 auxtrace_heap__pop(&pt->heap); 1447 1448 if (pt->heap.heap_cnt) { 1449 ts = pt->heap.heap_array[0].ordinal + 1; 1450 if (ts > timestamp) 1451 ts = timestamp; 1452 } else { 1453 ts = timestamp; 1454 } 1455 1456 intel_pt_set_pid_tid_cpu(pt, queue); 1457 1458 ret = intel_pt_run_decoder(ptq, &ts); 1459 1460 if (ret < 0) { 1461 auxtrace_heap__add(&pt->heap, queue_nr, ts); 1462 return ret; 1463 } 1464 1465 if (!ret) { 1466 ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); 1467 if (ret < 0) 1468 return ret; 1469 } else { 1470 ptq->on_heap = false; 1471 } 1472 } 1473 1474 return 0; 1475 } 1476 1477 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, 1478 u64 time_) 1479 { 1480 struct auxtrace_queues *queues = &pt->queues; 1481 unsigned int i; 1482 u64 ts = 0; 1483 1484 for (i = 0; i < queues->nr_queues; i++) { 1485 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 1486 struct intel_pt_queue *ptq = queue->priv; 1487 1488 if (ptq && (tid == -1 || ptq->tid == tid)) { 1489 ptq->time = time_; 1490 intel_pt_set_pid_tid_cpu(pt, queue); 1491 intel_pt_run_decoder(ptq, &ts); 1492 } 1493 } 1494 return 0; 1495 } 1496 1497 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) 1498 { 1499 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, 1500 sample->pid, sample->tid, 0); 1501 } 1502 1503 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) 1504 { 1505 unsigned i, j; 1506 1507 if (cpu < 0 || !pt->queues.nr_queues) 1508 return NULL; 1509 1510 if ((unsigned)cpu >= pt->queues.nr_queues) 1511 i = pt->queues.nr_queues - 1; 1512 else 1513 i = cpu; 1514 1515 if (pt->queues.queue_array[i].cpu == cpu) 1516 return pt->queues.queue_array[i].priv; 1517 1518 for (j = 0; i > 0; j++) { 1519 if (pt->queues.queue_array[--i].cpu == cpu) 1520 return pt->queues.queue_array[i].priv; 1521 } 1522 1523 for (; j < pt->queues.nr_queues; j++) { 1524 if (pt->queues.queue_array[j].cpu == cpu) 1525 return pt->queues.queue_array[j].priv; 1526 } 1527 1528 return NULL; 1529 } 1530 1531 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, 1532 u64 timestamp) 1533 { 1534 struct intel_pt_queue *ptq; 1535 int err; 1536 1537 if (!pt->sync_switch) 1538 return 1; 1539 1540 ptq = intel_pt_cpu_to_ptq(pt, cpu); 1541 if (!ptq) 1542 return 1; 1543 1544 switch (ptq->switch_state) { 1545 case INTEL_PT_SS_NOT_TRACING: 1546 ptq->next_tid = -1; 1547 break; 1548 case INTEL_PT_SS_UNKNOWN: 1549 case INTEL_PT_SS_TRACING: 1550 ptq->next_tid = tid; 1551 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; 1552 return 0; 1553 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 1554 if (!ptq->on_heap) { 1555 ptq->timestamp = perf_time_to_tsc(timestamp, 1556 &pt->tc); 1557 err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, 1558 ptq->timestamp); 1559 if (err) 1560 return err; 1561 ptq->on_heap = true; 1562 } 1563 ptq->switch_state = INTEL_PT_SS_TRACING; 1564 break; 1565 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 1566 ptq->next_tid = tid; 1567 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); 1568 break; 1569 default: 1570 break; 1571 } 1572 1573 return 1; 1574 } 1575 1576 static int intel_pt_process_switch(struct intel_pt *pt, 1577 struct perf_sample *sample) 1578 { 1579 struct perf_evsel *evsel; 1580 pid_t tid; 1581 int cpu, ret; 1582 1583 evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); 1584 if (evsel != pt->switch_evsel) 1585 return 0; 1586 1587 tid = perf_evsel__intval(evsel, sample, "next_pid"); 1588 cpu = sample->cpu; 1589 1590 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 1591 cpu, tid, sample->time, perf_time_to_tsc(sample->time, 1592 &pt->tc)); 1593 1594 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 1595 if (ret <= 0) 1596 return ret; 1597 1598 return machine__set_current_tid(pt->machine, cpu, -1, tid); 1599 } 1600 1601 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, 1602 struct perf_sample *sample) 1603 { 1604 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 1605 pid_t pid, tid; 1606 int cpu, ret; 1607 1608 cpu = sample->cpu; 1609 1610 if (pt->have_sched_switch == 3) { 1611 if (!out) 1612 return 0; 1613 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { 1614 pr_err("Expecting CPU-wide context switch event\n"); 1615 return -EINVAL; 1616 } 1617 pid = event->context_switch.next_prev_pid; 1618 tid = event->context_switch.next_prev_tid; 1619 } else { 1620 if (out) 1621 return 0; 1622 pid = sample->pid; 1623 tid = sample->tid; 1624 } 1625 1626 if (tid == -1) { 1627 pr_err("context_switch event has no tid\n"); 1628 return -EINVAL; 1629 } 1630 1631 intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 1632 cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time, 1633 &pt->tc)); 1634 1635 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 1636 if (ret <= 0) 1637 return ret; 1638 1639 return machine__set_current_tid(pt->machine, cpu, pid, tid); 1640 } 1641 1642 static int intel_pt_process_itrace_start(struct intel_pt *pt, 1643 union perf_event *event, 1644 struct perf_sample *sample) 1645 { 1646 if (!pt->per_cpu_mmaps) 1647 return 0; 1648 1649 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 1650 sample->cpu, event->itrace_start.pid, 1651 event->itrace_start.tid, sample->time, 1652 perf_time_to_tsc(sample->time, &pt->tc)); 1653 1654 return machine__set_current_tid(pt->machine, sample->cpu, 1655 event->itrace_start.pid, 1656 event->itrace_start.tid); 1657 } 1658 1659 static int intel_pt_process_event(struct perf_session *session, 1660 union perf_event *event, 1661 struct perf_sample *sample, 1662 struct perf_tool *tool) 1663 { 1664 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1665 auxtrace); 1666 u64 timestamp; 1667 int err = 0; 1668 1669 if (dump_trace) 1670 return 0; 1671 1672 if (!tool->ordered_events) { 1673 pr_err("Intel Processor Trace requires ordered events\n"); 1674 return -EINVAL; 1675 } 1676 1677 if (sample->time && sample->time != (u64)-1) 1678 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 1679 else 1680 timestamp = 0; 1681 1682 if (timestamp || pt->timeless_decoding) { 1683 err = intel_pt_update_queues(pt); 1684 if (err) 1685 return err; 1686 } 1687 1688 if (pt->timeless_decoding) { 1689 if (event->header.type == PERF_RECORD_EXIT) { 1690 err = intel_pt_process_timeless_queues(pt, 1691 event->fork.tid, 1692 sample->time); 1693 } 1694 } else if (timestamp) { 1695 err = intel_pt_process_queues(pt, timestamp); 1696 } 1697 if (err) 1698 return err; 1699 1700 if (event->header.type == PERF_RECORD_AUX && 1701 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && 1702 pt->synth_opts.errors) { 1703 err = intel_pt_lost(pt, sample); 1704 if (err) 1705 return err; 1706 } 1707 1708 if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) 1709 err = intel_pt_process_switch(pt, sample); 1710 else if (event->header.type == PERF_RECORD_ITRACE_START) 1711 err = intel_pt_process_itrace_start(pt, event, sample); 1712 else if (event->header.type == PERF_RECORD_SWITCH || 1713 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 1714 err = intel_pt_context_switch(pt, event, sample); 1715 1716 intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", 1717 perf_event__name(event->header.type), event->header.type, 1718 sample->cpu, sample->time, timestamp); 1719 1720 return err; 1721 } 1722 1723 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) 1724 { 1725 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1726 auxtrace); 1727 int ret; 1728 1729 if (dump_trace) 1730 return 0; 1731 1732 if (!tool->ordered_events) 1733 return -EINVAL; 1734 1735 ret = intel_pt_update_queues(pt); 1736 if (ret < 0) 1737 return ret; 1738 1739 if (pt->timeless_decoding) 1740 return intel_pt_process_timeless_queues(pt, -1, 1741 MAX_TIMESTAMP - 1); 1742 1743 return intel_pt_process_queues(pt, MAX_TIMESTAMP); 1744 } 1745 1746 static void intel_pt_free_events(struct perf_session *session) 1747 { 1748 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1749 auxtrace); 1750 struct auxtrace_queues *queues = &pt->queues; 1751 unsigned int i; 1752 1753 for (i = 0; i < queues->nr_queues; i++) { 1754 intel_pt_free_queue(queues->queue_array[i].priv); 1755 queues->queue_array[i].priv = NULL; 1756 } 1757 intel_pt_log_disable(); 1758 auxtrace_queues__free(queues); 1759 } 1760 1761 static void intel_pt_free(struct perf_session *session) 1762 { 1763 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1764 auxtrace); 1765 1766 auxtrace_heap__free(&pt->heap); 1767 intel_pt_free_events(session); 1768 session->auxtrace = NULL; 1769 thread__put(pt->unknown_thread); 1770 free(pt); 1771 } 1772 1773 static int intel_pt_process_auxtrace_event(struct perf_session *session, 1774 union perf_event *event, 1775 struct perf_tool *tool __maybe_unused) 1776 { 1777 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 1778 auxtrace); 1779 1780 if (pt->sampling_mode) 1781 return 0; 1782 1783 if (!pt->data_queued) { 1784 struct auxtrace_buffer *buffer; 1785 off_t data_offset; 1786 int fd = perf_data_file__fd(session->file); 1787 int err; 1788 1789 if (perf_data_file__is_pipe(session->file)) { 1790 data_offset = 0; 1791 } else { 1792 data_offset = lseek(fd, 0, SEEK_CUR); 1793 if (data_offset == -1) 1794 return -errno; 1795 } 1796 1797 err = auxtrace_queues__add_event(&pt->queues, session, event, 1798 data_offset, &buffer); 1799 if (err) 1800 return err; 1801 1802 /* Dump here now we have copied a piped trace out of the pipe */ 1803 if (dump_trace) { 1804 if (auxtrace_buffer__get_data(buffer, fd)) { 1805 intel_pt_dump_event(pt, buffer->data, 1806 buffer->size); 1807 auxtrace_buffer__put_data(buffer); 1808 } 1809 } 1810 } 1811 1812 return 0; 1813 } 1814 1815 struct intel_pt_synth { 1816 struct perf_tool dummy_tool; 1817 struct perf_session *session; 1818 }; 1819 1820 static int intel_pt_event_synth(struct perf_tool *tool, 1821 union perf_event *event, 1822 struct perf_sample *sample __maybe_unused, 1823 struct machine *machine __maybe_unused) 1824 { 1825 struct intel_pt_synth *intel_pt_synth = 1826 container_of(tool, struct intel_pt_synth, dummy_tool); 1827 1828 return perf_session__deliver_synth_event(intel_pt_synth->session, event, 1829 NULL); 1830 } 1831 1832 static int intel_pt_synth_event(struct perf_session *session, 1833 struct perf_event_attr *attr, u64 id) 1834 { 1835 struct intel_pt_synth intel_pt_synth; 1836 1837 memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); 1838 intel_pt_synth.session = session; 1839 1840 return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, 1841 &id, intel_pt_event_synth); 1842 } 1843 1844 static int intel_pt_synth_events(struct intel_pt *pt, 1845 struct perf_session *session) 1846 { 1847 struct perf_evlist *evlist = session->evlist; 1848 struct perf_evsel *evsel; 1849 struct perf_event_attr attr; 1850 bool found = false; 1851 u64 id; 1852 int err; 1853 1854 evlist__for_each_entry(evlist, evsel) { 1855 if (evsel->attr.type == pt->pmu_type && evsel->ids) { 1856 found = true; 1857 break; 1858 } 1859 } 1860 1861 if (!found) { 1862 pr_debug("There are no selected events with Intel Processor Trace data\n"); 1863 return 0; 1864 } 1865 1866 memset(&attr, 0, sizeof(struct perf_event_attr)); 1867 attr.size = sizeof(struct perf_event_attr); 1868 attr.type = PERF_TYPE_HARDWARE; 1869 attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; 1870 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 1871 PERF_SAMPLE_PERIOD; 1872 if (pt->timeless_decoding) 1873 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 1874 else 1875 attr.sample_type |= PERF_SAMPLE_TIME; 1876 if (!pt->per_cpu_mmaps) 1877 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; 1878 attr.exclude_user = evsel->attr.exclude_user; 1879 attr.exclude_kernel = evsel->attr.exclude_kernel; 1880 attr.exclude_hv = evsel->attr.exclude_hv; 1881 attr.exclude_host = evsel->attr.exclude_host; 1882 attr.exclude_guest = evsel->attr.exclude_guest; 1883 attr.sample_id_all = evsel->attr.sample_id_all; 1884 attr.read_format = evsel->attr.read_format; 1885 1886 id = evsel->id[0] + 1000000000; 1887 if (!id) 1888 id = 1; 1889 1890 if (pt->synth_opts.instructions) { 1891 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1892 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) 1893 attr.sample_period = 1894 intel_pt_ns_to_ticks(pt, pt->synth_opts.period); 1895 else 1896 attr.sample_period = pt->synth_opts.period; 1897 pt->instructions_sample_period = attr.sample_period; 1898 if (pt->synth_opts.callchain) 1899 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 1900 if (pt->synth_opts.last_branch) 1901 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1902 pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1903 id, (u64)attr.sample_type); 1904 err = intel_pt_synth_event(session, &attr, id); 1905 if (err) { 1906 pr_err("%s: failed to synthesize 'instructions' event type\n", 1907 __func__); 1908 return err; 1909 } 1910 pt->sample_instructions = true; 1911 pt->instructions_sample_type = attr.sample_type; 1912 pt->instructions_id = id; 1913 id += 1; 1914 } 1915 1916 if (pt->synth_opts.transactions) { 1917 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 1918 attr.sample_period = 1; 1919 if (pt->synth_opts.callchain) 1920 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 1921 if (pt->synth_opts.last_branch) 1922 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 1923 pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1924 id, (u64)attr.sample_type); 1925 err = intel_pt_synth_event(session, &attr, id); 1926 if (err) { 1927 pr_err("%s: failed to synthesize 'transactions' event type\n", 1928 __func__); 1929 return err; 1930 } 1931 pt->sample_transactions = true; 1932 pt->transactions_id = id; 1933 id += 1; 1934 evlist__for_each_entry(evlist, evsel) { 1935 if (evsel->id && evsel->id[0] == pt->transactions_id) { 1936 if (evsel->name) 1937 zfree(&evsel->name); 1938 evsel->name = strdup("transactions"); 1939 break; 1940 } 1941 } 1942 } 1943 1944 if (pt->synth_opts.branches) { 1945 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 1946 attr.sample_period = 1; 1947 attr.sample_type |= PERF_SAMPLE_ADDR; 1948 attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; 1949 attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK; 1950 pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 1951 id, (u64)attr.sample_type); 1952 err = intel_pt_synth_event(session, &attr, id); 1953 if (err) { 1954 pr_err("%s: failed to synthesize 'branches' event type\n", 1955 __func__); 1956 return err; 1957 } 1958 pt->sample_branches = true; 1959 pt->branches_sample_type = attr.sample_type; 1960 pt->branches_id = id; 1961 } 1962 1963 pt->synth_needs_swap = evsel->needs_swap; 1964 1965 return 0; 1966 } 1967 1968 static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) 1969 { 1970 struct perf_evsel *evsel; 1971 1972 evlist__for_each_entry_reverse(evlist, evsel) { 1973 const char *name = perf_evsel__name(evsel); 1974 1975 if (!strcmp(name, "sched:sched_switch")) 1976 return evsel; 1977 } 1978 1979 return NULL; 1980 } 1981 1982 static bool intel_pt_find_switch(struct perf_evlist *evlist) 1983 { 1984 struct perf_evsel *evsel; 1985 1986 evlist__for_each_entry(evlist, evsel) { 1987 if (evsel->attr.context_switch) 1988 return true; 1989 } 1990 1991 return false; 1992 } 1993 1994 static int intel_pt_perf_config(const char *var, const char *value, void *data) 1995 { 1996 struct intel_pt *pt = data; 1997 1998 if (!strcmp(var, "intel-pt.mispred-all")) 1999 pt->mispred_all = perf_config_bool(var, value); 2000 2001 return 0; 2002 } 2003 2004 static const char * const intel_pt_info_fmts[] = { 2005 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 2006 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 2007 [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", 2008 [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", 2009 [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", 2010 [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", 2011 [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", 2012 [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", 2013 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", 2014 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", 2015 [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", 2016 [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", 2017 [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", 2018 [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", 2019 }; 2020 2021 static void intel_pt_print_info(u64 *arr, int start, int finish) 2022 { 2023 int i; 2024 2025 if (!dump_trace) 2026 return; 2027 2028 for (i = start; i <= finish; i++) 2029 fprintf(stdout, intel_pt_info_fmts[i], arr[i]); 2030 } 2031 2032 int intel_pt_process_auxtrace_info(union perf_event *event, 2033 struct perf_session *session) 2034 { 2035 struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; 2036 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; 2037 struct intel_pt *pt; 2038 int err; 2039 2040 if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + 2041 min_sz) 2042 return -EINVAL; 2043 2044 pt = zalloc(sizeof(struct intel_pt)); 2045 if (!pt) 2046 return -ENOMEM; 2047 2048 perf_config(intel_pt_perf_config, pt); 2049 2050 err = auxtrace_queues__init(&pt->queues); 2051 if (err) 2052 goto err_free; 2053 2054 intel_pt_log_set_name(INTEL_PT_PMU_NAME); 2055 2056 pt->session = session; 2057 pt->machine = &session->machines.host; /* No kvm support */ 2058 pt->auxtrace_type = auxtrace_info->type; 2059 pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; 2060 pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; 2061 pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; 2062 pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; 2063 pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; 2064 pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; 2065 pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; 2066 pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; 2067 pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; 2068 pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; 2069 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, 2070 INTEL_PT_PER_CPU_MMAPS); 2071 2072 if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + 2073 (sizeof(u64) * INTEL_PT_CYC_BIT)) { 2074 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; 2075 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; 2076 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; 2077 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; 2078 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; 2079 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, 2080 INTEL_PT_CYC_BIT); 2081 } 2082 2083 pt->timeless_decoding = intel_pt_timeless_decoding(pt); 2084 pt->have_tsc = intel_pt_have_tsc(pt); 2085 pt->sampling_mode = false; 2086 pt->est_tsc = !pt->timeless_decoding; 2087 2088 pt->unknown_thread = thread__new(999999999, 999999999); 2089 if (!pt->unknown_thread) { 2090 err = -ENOMEM; 2091 goto err_free_queues; 2092 } 2093 2094 /* 2095 * Since this thread will not be kept in any rbtree not in a 2096 * list, initialize its list node so that at thread__put() the 2097 * current thread lifetime assuption is kept and we don't segfault 2098 * at list_del_init(). 2099 */ 2100 INIT_LIST_HEAD(&pt->unknown_thread->node); 2101 2102 err = thread__set_comm(pt->unknown_thread, "unknown", 0); 2103 if (err) 2104 goto err_delete_thread; 2105 if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { 2106 err = -ENOMEM; 2107 goto err_delete_thread; 2108 } 2109 2110 pt->auxtrace.process_event = intel_pt_process_event; 2111 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; 2112 pt->auxtrace.flush_events = intel_pt_flush; 2113 pt->auxtrace.free_events = intel_pt_free_events; 2114 pt->auxtrace.free = intel_pt_free; 2115 session->auxtrace = &pt->auxtrace; 2116 2117 if (dump_trace) 2118 return 0; 2119 2120 if (pt->have_sched_switch == 1) { 2121 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); 2122 if (!pt->switch_evsel) { 2123 pr_err("%s: missing sched_switch event\n", __func__); 2124 goto err_delete_thread; 2125 } 2126 } else if (pt->have_sched_switch == 2 && 2127 !intel_pt_find_switch(session->evlist)) { 2128 pr_err("%s: missing context_switch attribute flag\n", __func__); 2129 goto err_delete_thread; 2130 } 2131 2132 if (session->itrace_synth_opts && session->itrace_synth_opts->set) { 2133 pt->synth_opts = *session->itrace_synth_opts; 2134 } else { 2135 itrace_synth_opts__set_default(&pt->synth_opts); 2136 if (use_browser != -1) { 2137 pt->synth_opts.branches = false; 2138 pt->synth_opts.callchain = true; 2139 } 2140 if (session->itrace_synth_opts) 2141 pt->synth_opts.thread_stack = 2142 session->itrace_synth_opts->thread_stack; 2143 } 2144 2145 if (pt->synth_opts.log) 2146 intel_pt_log_enable(); 2147 2148 /* Maximum non-turbo ratio is TSC freq / 100 MHz */ 2149 if (pt->tc.time_mult) { 2150 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); 2151 2152 pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; 2153 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); 2154 intel_pt_log("Maximum non-turbo ratio %u\n", 2155 pt->max_non_turbo_ratio); 2156 } 2157 2158 if (pt->synth_opts.calls) 2159 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | 2160 PERF_IP_FLAG_TRACE_END; 2161 if (pt->synth_opts.returns) 2162 pt->branches_filter |= PERF_IP_FLAG_RETURN | 2163 PERF_IP_FLAG_TRACE_BEGIN; 2164 2165 if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { 2166 symbol_conf.use_callchain = true; 2167 if (callchain_register_param(&callchain_param) < 0) { 2168 symbol_conf.use_callchain = false; 2169 pt->synth_opts.callchain = false; 2170 } 2171 } 2172 2173 err = intel_pt_synth_events(pt, session); 2174 if (err) 2175 goto err_delete_thread; 2176 2177 err = auxtrace_queues__process_index(&pt->queues, session); 2178 if (err) 2179 goto err_delete_thread; 2180 2181 if (pt->queues.populated) 2182 pt->data_queued = true; 2183 2184 if (pt->timeless_decoding) 2185 pr_debug2("Intel PT decoding without timestamps\n"); 2186 2187 return 0; 2188 2189 err_delete_thread: 2190 thread__zput(pt->unknown_thread); 2191 err_free_queues: 2192 intel_pt_log_disable(); 2193 auxtrace_queues__free(&pt->queues); 2194 session->auxtrace = NULL; 2195 err_free: 2196 free(pt); 2197 return err; 2198 } 2199