1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_pt.c: Intel Processor Trace support 4 * Copyright (c) 2013-2015, Intel Corporation. 5 */ 6 7 #include <inttypes.h> 8 #include <stdio.h> 9 #include <stdbool.h> 10 #include <errno.h> 11 #include <linux/kernel.h> 12 #include <linux/string.h> 13 #include <linux/types.h> 14 #include <linux/zalloc.h> 15 16 #include "session.h" 17 #include "machine.h" 18 #include "memswap.h" 19 #include "sort.h" 20 #include "tool.h" 21 #include "event.h" 22 #include "evlist.h" 23 #include "evsel.h" 24 #include "map.h" 25 #include "color.h" 26 #include "thread.h" 27 #include "thread-stack.h" 28 #include "symbol.h" 29 #include "callchain.h" 30 #include "dso.h" 31 #include "debug.h" 32 #include "auxtrace.h" 33 #include "tsc.h" 34 #include "intel-pt.h" 35 #include "config.h" 36 #include "util/perf_api_probe.h" 37 #include "util/synthetic-events.h" 38 #include "time-utils.h" 39 40 #include "../arch/x86/include/uapi/asm/perf_regs.h" 41 42 #include "intel-pt-decoder/intel-pt-log.h" 43 #include "intel-pt-decoder/intel-pt-decoder.h" 44 #include "intel-pt-decoder/intel-pt-insn-decoder.h" 45 #include "intel-pt-decoder/intel-pt-pkt-decoder.h" 46 47 #define MAX_TIMESTAMP (~0ULL) 48 49 struct range { 50 u64 start; 51 u64 end; 52 }; 53 54 struct intel_pt { 55 struct auxtrace auxtrace; 56 struct auxtrace_queues queues; 57 struct auxtrace_heap heap; 58 u32 auxtrace_type; 59 struct perf_session *session; 60 struct machine *machine; 61 struct evsel *switch_evsel; 62 struct thread *unknown_thread; 63 bool timeless_decoding; 64 bool sampling_mode; 65 bool snapshot_mode; 66 bool per_cpu_mmaps; 67 bool have_tsc; 68 bool data_queued; 69 bool est_tsc; 70 bool sync_switch; 71 bool mispred_all; 72 bool use_thread_stack; 73 bool callstack; 74 unsigned int br_stack_sz; 75 unsigned int br_stack_sz_plus; 76 int have_sched_switch; 77 u32 pmu_type; 78 u64 kernel_start; 79 u64 switch_ip; 80 u64 ptss_ip; 81 u64 first_timestamp; 82 83 struct perf_tsc_conversion tc; 84 bool cap_user_time_zero; 85 86 struct itrace_synth_opts synth_opts; 87 88 bool sample_instructions; 89 u64 instructions_sample_type; 90 u64 instructions_id; 91 92 bool sample_branches; 93 u32 branches_filter; 94 u64 branches_sample_type; 95 u64 branches_id; 96 97 bool sample_transactions; 98 u64 transactions_sample_type; 99 u64 transactions_id; 100 101 bool sample_ptwrites; 102 u64 ptwrites_sample_type; 103 u64 ptwrites_id; 104 105 bool sample_pwr_events; 106 u64 pwr_events_sample_type; 107 u64 mwait_id; 108 u64 pwre_id; 109 u64 exstop_id; 110 u64 pwrx_id; 111 u64 cbr_id; 112 u64 psb_id; 113 114 bool sample_pebs; 115 struct evsel *pebs_evsel; 116 117 u64 tsc_bit; 118 u64 mtc_bit; 119 u64 mtc_freq_bits; 120 u32 tsc_ctc_ratio_n; 121 u32 tsc_ctc_ratio_d; 122 u64 cyc_bit; 123 u64 noretcomp_bit; 124 unsigned max_non_turbo_ratio; 125 unsigned cbr2khz; 126 127 unsigned long num_events; 128 129 char *filter; 130 struct addr_filters filts; 131 132 struct range *time_ranges; 133 unsigned int range_cnt; 134 135 struct ip_callchain *chain; 136 struct branch_stack *br_stack; 137 138 u64 dflt_tsc_offset; 139 struct rb_root vmcs_info; 140 }; 141 142 enum switch_state { 143 INTEL_PT_SS_NOT_TRACING, 144 INTEL_PT_SS_UNKNOWN, 145 INTEL_PT_SS_TRACING, 146 INTEL_PT_SS_EXPECTING_SWITCH_EVENT, 147 INTEL_PT_SS_EXPECTING_SWITCH_IP, 148 }; 149 150 struct intel_pt_queue { 151 struct intel_pt *pt; 152 unsigned int queue_nr; 153 struct auxtrace_buffer *buffer; 154 struct auxtrace_buffer *old_buffer; 155 void *decoder; 156 const struct intel_pt_state *state; 157 struct ip_callchain *chain; 158 struct branch_stack *last_branch; 159 union perf_event *event_buf; 160 bool on_heap; 161 bool stop; 162 bool step_through_buffers; 163 bool use_buffer_pid_tid; 164 bool sync_switch; 165 pid_t pid, tid; 166 int cpu; 167 int switch_state; 168 pid_t next_tid; 169 struct thread *thread; 170 struct machine *guest_machine; 171 struct thread *unknown_guest_thread; 172 pid_t guest_machine_pid; 173 bool exclude_kernel; 174 bool have_sample; 175 u64 time; 176 u64 timestamp; 177 u64 sel_timestamp; 178 bool sel_start; 179 unsigned int sel_idx; 180 u32 flags; 181 u16 insn_len; 182 u64 last_insn_cnt; 183 u64 ipc_insn_cnt; 184 u64 ipc_cyc_cnt; 185 u64 last_in_insn_cnt; 186 u64 last_in_cyc_cnt; 187 u64 last_br_insn_cnt; 188 u64 last_br_cyc_cnt; 189 unsigned int cbr_seen; 190 char insn[INTEL_PT_INSN_BUF_SZ]; 191 }; 192 193 static void intel_pt_dump(struct intel_pt *pt __maybe_unused, 194 unsigned char *buf, size_t len) 195 { 196 struct intel_pt_pkt packet; 197 size_t pos = 0; 198 int ret, pkt_len, i; 199 char desc[INTEL_PT_PKT_DESC_MAX]; 200 const char *color = PERF_COLOR_BLUE; 201 enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX; 202 203 color_fprintf(stdout, color, 204 ". ... Intel Processor Trace data: size %zu bytes\n", 205 len); 206 207 while (len) { 208 ret = intel_pt_get_packet(buf, len, &packet, &ctx); 209 if (ret > 0) 210 pkt_len = ret; 211 else 212 pkt_len = 1; 213 printf("."); 214 color_fprintf(stdout, color, " %08x: ", pos); 215 for (i = 0; i < pkt_len; i++) 216 color_fprintf(stdout, color, " %02x", buf[i]); 217 for (; i < 16; i++) 218 color_fprintf(stdout, color, " "); 219 if (ret > 0) { 220 ret = intel_pt_pkt_desc(&packet, desc, 221 INTEL_PT_PKT_DESC_MAX); 222 if (ret > 0) 223 color_fprintf(stdout, color, " %s\n", desc); 224 } else { 225 color_fprintf(stdout, color, " Bad packet!\n"); 226 } 227 pos += pkt_len; 228 buf += pkt_len; 229 len -= pkt_len; 230 } 231 } 232 233 static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, 234 size_t len) 235 { 236 printf(".\n"); 237 intel_pt_dump(pt, buf, len); 238 } 239 240 static void intel_pt_log_event(union perf_event *event) 241 { 242 FILE *f = intel_pt_log_fp(); 243 244 if (!intel_pt_enable_logging || !f) 245 return; 246 247 perf_event__fprintf(event, NULL, f); 248 } 249 250 static void intel_pt_dump_sample(struct perf_session *session, 251 struct perf_sample *sample) 252 { 253 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 254 auxtrace); 255 256 printf("\n"); 257 intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size); 258 } 259 260 static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) 261 { 262 struct perf_time_interval *range = pt->synth_opts.ptime_range; 263 int n = pt->synth_opts.range_num; 264 265 if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) 266 return true; 267 268 if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS) 269 return false; 270 271 /* perf_time__ranges_skip_sample does not work if time is zero */ 272 if (!tm) 273 tm = 1; 274 275 return !n || !perf_time__ranges_skip_sample(range, n, tm); 276 } 277 278 static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root, 279 u64 vmcs, 280 u64 dflt_tsc_offset) 281 { 282 struct rb_node **p = &rb_root->rb_node; 283 struct rb_node *parent = NULL; 284 struct intel_pt_vmcs_info *v; 285 286 while (*p) { 287 parent = *p; 288 v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node); 289 290 if (v->vmcs == vmcs) 291 return v; 292 293 if (vmcs < v->vmcs) 294 p = &(*p)->rb_left; 295 else 296 p = &(*p)->rb_right; 297 } 298 299 v = zalloc(sizeof(*v)); 300 if (v) { 301 v->vmcs = vmcs; 302 v->tsc_offset = dflt_tsc_offset; 303 v->reliable = dflt_tsc_offset; 304 305 rb_link_node(&v->rb_node, parent, p); 306 rb_insert_color(&v->rb_node, rb_root); 307 } 308 309 return v; 310 } 311 312 static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs) 313 { 314 struct intel_pt_queue *ptq = data; 315 struct intel_pt *pt = ptq->pt; 316 317 if (!vmcs && !pt->dflt_tsc_offset) 318 return NULL; 319 320 return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset); 321 } 322 323 static void intel_pt_free_vmcs_info(struct intel_pt *pt) 324 { 325 struct intel_pt_vmcs_info *v; 326 struct rb_node *n; 327 328 n = rb_first(&pt->vmcs_info); 329 while (n) { 330 v = rb_entry(n, struct intel_pt_vmcs_info, rb_node); 331 n = rb_next(n); 332 rb_erase(&v->rb_node, &pt->vmcs_info); 333 free(v); 334 } 335 } 336 337 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, 338 struct auxtrace_buffer *b) 339 { 340 bool consecutive = false; 341 void *start; 342 343 start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, 344 pt->have_tsc, &consecutive, 345 pt->synth_opts.vm_time_correlation); 346 if (!start) 347 return -EINVAL; 348 /* 349 * In the case of vm_time_correlation, the overlap might contain TSC 350 * packets that will not be fixed, and that will then no longer work for 351 * overlap detection. Avoid that by zeroing out the overlap. 352 */ 353 if (pt->synth_opts.vm_time_correlation) 354 memset(b->data, 0, start - b->data); 355 b->use_size = b->data + b->size - start; 356 b->use_data = start; 357 if (b->use_size && consecutive) 358 b->consecutive = true; 359 return 0; 360 } 361 362 static int intel_pt_get_buffer(struct intel_pt_queue *ptq, 363 struct auxtrace_buffer *buffer, 364 struct auxtrace_buffer *old_buffer, 365 struct intel_pt_buffer *b) 366 { 367 bool might_overlap; 368 369 if (!buffer->data) { 370 int fd = perf_data__fd(ptq->pt->session->data); 371 372 buffer->data = auxtrace_buffer__get_data(buffer, fd); 373 if (!buffer->data) 374 return -ENOMEM; 375 } 376 377 might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode; 378 if (might_overlap && !buffer->consecutive && old_buffer && 379 intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) 380 return -ENOMEM; 381 382 if (buffer->use_data) { 383 b->len = buffer->use_size; 384 b->buf = buffer->use_data; 385 } else { 386 b->len = buffer->size; 387 b->buf = buffer->data; 388 } 389 b->ref_timestamp = buffer->reference; 390 391 if (!old_buffer || (might_overlap && !buffer->consecutive)) { 392 b->consecutive = false; 393 b->trace_nr = buffer->buffer_nr + 1; 394 } else { 395 b->consecutive = true; 396 } 397 398 return 0; 399 } 400 401 /* Do not drop buffers with references - refer intel_pt_get_trace() */ 402 static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq, 403 struct auxtrace_buffer *buffer) 404 { 405 if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer) 406 return; 407 408 auxtrace_buffer__drop_data(buffer); 409 } 410 411 /* Must be serialized with respect to intel_pt_get_trace() */ 412 static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb, 413 void *cb_data) 414 { 415 struct intel_pt_queue *ptq = data; 416 struct auxtrace_buffer *buffer = ptq->buffer; 417 struct auxtrace_buffer *old_buffer = ptq->old_buffer; 418 struct auxtrace_queue *queue; 419 int err = 0; 420 421 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 422 423 while (1) { 424 struct intel_pt_buffer b = { .len = 0 }; 425 426 buffer = auxtrace_buffer__next(queue, buffer); 427 if (!buffer) 428 break; 429 430 err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b); 431 if (err) 432 break; 433 434 if (b.len) { 435 intel_pt_lookahead_drop_buffer(ptq, old_buffer); 436 old_buffer = buffer; 437 } else { 438 intel_pt_lookahead_drop_buffer(ptq, buffer); 439 continue; 440 } 441 442 err = cb(&b, cb_data); 443 if (err) 444 break; 445 } 446 447 if (buffer != old_buffer) 448 intel_pt_lookahead_drop_buffer(ptq, buffer); 449 intel_pt_lookahead_drop_buffer(ptq, old_buffer); 450 451 return err; 452 } 453 454 /* 455 * This function assumes data is processed sequentially only. 456 * Must be serialized with respect to intel_pt_lookahead() 457 */ 458 static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 459 { 460 struct intel_pt_queue *ptq = data; 461 struct auxtrace_buffer *buffer = ptq->buffer; 462 struct auxtrace_buffer *old_buffer = ptq->old_buffer; 463 struct auxtrace_queue *queue; 464 int err; 465 466 if (ptq->stop) { 467 b->len = 0; 468 return 0; 469 } 470 471 queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; 472 473 buffer = auxtrace_buffer__next(queue, buffer); 474 if (!buffer) { 475 if (old_buffer) 476 auxtrace_buffer__drop_data(old_buffer); 477 b->len = 0; 478 return 0; 479 } 480 481 ptq->buffer = buffer; 482 483 err = intel_pt_get_buffer(ptq, buffer, old_buffer, b); 484 if (err) 485 return err; 486 487 if (ptq->step_through_buffers) 488 ptq->stop = true; 489 490 if (b->len) { 491 if (old_buffer) 492 auxtrace_buffer__drop_data(old_buffer); 493 ptq->old_buffer = buffer; 494 } else { 495 auxtrace_buffer__drop_data(buffer); 496 return intel_pt_get_trace(b, data); 497 } 498 499 return 0; 500 } 501 502 struct intel_pt_cache_entry { 503 struct auxtrace_cache_entry entry; 504 u64 insn_cnt; 505 u64 byte_cnt; 506 enum intel_pt_insn_op op; 507 enum intel_pt_insn_branch branch; 508 int length; 509 int32_t rel; 510 char insn[INTEL_PT_INSN_BUF_SZ]; 511 }; 512 513 static int intel_pt_config_div(const char *var, const char *value, void *data) 514 { 515 int *d = data; 516 long val; 517 518 if (!strcmp(var, "intel-pt.cache-divisor")) { 519 val = strtol(value, NULL, 0); 520 if (val > 0 && val <= INT_MAX) 521 *d = val; 522 } 523 524 return 0; 525 } 526 527 static int intel_pt_cache_divisor(void) 528 { 529 static int d; 530 531 if (d) 532 return d; 533 534 perf_config(intel_pt_config_div, &d); 535 536 if (!d) 537 d = 64; 538 539 return d; 540 } 541 542 static unsigned int intel_pt_cache_size(struct dso *dso, 543 struct machine *machine) 544 { 545 off_t size; 546 547 size = dso__data_size(dso, machine); 548 size /= intel_pt_cache_divisor(); 549 if (size < 1000) 550 return 10; 551 if (size > (1 << 21)) 552 return 21; 553 return 32 - __builtin_clz(size); 554 } 555 556 static struct auxtrace_cache *intel_pt_cache(struct dso *dso, 557 struct machine *machine) 558 { 559 struct auxtrace_cache *c; 560 unsigned int bits; 561 562 if (dso->auxtrace_cache) 563 return dso->auxtrace_cache; 564 565 bits = intel_pt_cache_size(dso, machine); 566 567 /* Ignoring cache creation failure */ 568 c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); 569 570 dso->auxtrace_cache = c; 571 572 return c; 573 } 574 575 static int intel_pt_cache_add(struct dso *dso, struct machine *machine, 576 u64 offset, u64 insn_cnt, u64 byte_cnt, 577 struct intel_pt_insn *intel_pt_insn) 578 { 579 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 580 struct intel_pt_cache_entry *e; 581 int err; 582 583 if (!c) 584 return -ENOMEM; 585 586 e = auxtrace_cache__alloc_entry(c); 587 if (!e) 588 return -ENOMEM; 589 590 e->insn_cnt = insn_cnt; 591 e->byte_cnt = byte_cnt; 592 e->op = intel_pt_insn->op; 593 e->branch = intel_pt_insn->branch; 594 e->length = intel_pt_insn->length; 595 e->rel = intel_pt_insn->rel; 596 memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ); 597 598 err = auxtrace_cache__add(c, offset, &e->entry); 599 if (err) 600 auxtrace_cache__free_entry(c, e); 601 602 return err; 603 } 604 605 static struct intel_pt_cache_entry * 606 intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) 607 { 608 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 609 610 if (!c) 611 return NULL; 612 613 return auxtrace_cache__lookup(dso->auxtrace_cache, offset); 614 } 615 616 static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine, 617 u64 offset) 618 { 619 struct auxtrace_cache *c = intel_pt_cache(dso, machine); 620 621 if (!c) 622 return; 623 624 auxtrace_cache__remove(dso->auxtrace_cache, offset); 625 } 626 627 static inline bool intel_pt_guest_kernel_ip(uint64_t ip) 628 { 629 /* Assumes 64-bit kernel */ 630 return ip & (1ULL << 63); 631 } 632 633 static inline u8 intel_pt_nr_cpumode(struct intel_pt_queue *ptq, uint64_t ip, bool nr) 634 { 635 if (nr) { 636 return intel_pt_guest_kernel_ip(ip) ? 637 PERF_RECORD_MISC_GUEST_KERNEL : 638 PERF_RECORD_MISC_GUEST_USER; 639 } 640 641 return ip >= ptq->pt->kernel_start ? 642 PERF_RECORD_MISC_KERNEL : 643 PERF_RECORD_MISC_USER; 644 } 645 646 static inline u8 intel_pt_cpumode(struct intel_pt_queue *ptq, uint64_t from_ip, uint64_t to_ip) 647 { 648 /* No support for non-zero CS base */ 649 if (from_ip) 650 return intel_pt_nr_cpumode(ptq, from_ip, ptq->state->from_nr); 651 return intel_pt_nr_cpumode(ptq, to_ip, ptq->state->to_nr); 652 } 653 654 static int intel_pt_get_guest(struct intel_pt_queue *ptq) 655 { 656 struct machines *machines = &ptq->pt->session->machines; 657 struct machine *machine; 658 pid_t pid = ptq->pid <= 0 ? DEFAULT_GUEST_KERNEL_ID : ptq->pid; 659 660 if (ptq->guest_machine && pid == ptq->guest_machine_pid) 661 return 0; 662 663 ptq->guest_machine = NULL; 664 thread__zput(ptq->unknown_guest_thread); 665 666 machine = machines__find_guest(machines, pid); 667 if (!machine) 668 return -1; 669 670 ptq->unknown_guest_thread = machine__idle_thread(machine); 671 if (!ptq->unknown_guest_thread) 672 return -1; 673 674 ptq->guest_machine = machine; 675 ptq->guest_machine_pid = pid; 676 677 return 0; 678 } 679 680 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, 681 uint64_t *insn_cnt_ptr, uint64_t *ip, 682 uint64_t to_ip, uint64_t max_insn_cnt, 683 void *data) 684 { 685 struct intel_pt_queue *ptq = data; 686 struct machine *machine = ptq->pt->machine; 687 struct thread *thread; 688 struct addr_location al; 689 unsigned char buf[INTEL_PT_INSN_BUF_SZ]; 690 ssize_t len; 691 int x86_64; 692 u8 cpumode; 693 u64 offset, start_offset, start_ip; 694 u64 insn_cnt = 0; 695 bool one_map = true; 696 bool nr; 697 698 intel_pt_insn->length = 0; 699 700 if (to_ip && *ip == to_ip) 701 goto out_no_cache; 702 703 nr = ptq->state->to_nr; 704 cpumode = intel_pt_nr_cpumode(ptq, *ip, nr); 705 706 if (nr) { 707 if (cpumode != PERF_RECORD_MISC_GUEST_KERNEL || 708 intel_pt_get_guest(ptq)) 709 return -EINVAL; 710 machine = ptq->guest_machine; 711 thread = ptq->unknown_guest_thread; 712 } else { 713 thread = ptq->thread; 714 if (!thread) { 715 if (cpumode != PERF_RECORD_MISC_KERNEL) 716 return -EINVAL; 717 thread = ptq->pt->unknown_thread; 718 } 719 } 720 721 while (1) { 722 if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) 723 return -EINVAL; 724 725 if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && 726 dso__data_status_seen(al.map->dso, 727 DSO_DATA_STATUS_SEEN_ITRACE)) 728 return -ENOENT; 729 730 offset = al.map->map_ip(al.map, *ip); 731 732 if (!to_ip && one_map) { 733 struct intel_pt_cache_entry *e; 734 735 e = intel_pt_cache_lookup(al.map->dso, machine, offset); 736 if (e && 737 (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { 738 *insn_cnt_ptr = e->insn_cnt; 739 *ip += e->byte_cnt; 740 intel_pt_insn->op = e->op; 741 intel_pt_insn->branch = e->branch; 742 intel_pt_insn->length = e->length; 743 intel_pt_insn->rel = e->rel; 744 memcpy(intel_pt_insn->buf, e->insn, 745 INTEL_PT_INSN_BUF_SZ); 746 intel_pt_log_insn_no_data(intel_pt_insn, *ip); 747 return 0; 748 } 749 } 750 751 start_offset = offset; 752 start_ip = *ip; 753 754 /* Load maps to ensure dso->is_64_bit has been updated */ 755 map__load(al.map); 756 757 x86_64 = al.map->dso->is_64_bit; 758 759 while (1) { 760 len = dso__data_read_offset(al.map->dso, machine, 761 offset, buf, 762 INTEL_PT_INSN_BUF_SZ); 763 if (len <= 0) 764 return -EINVAL; 765 766 if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) 767 return -EINVAL; 768 769 intel_pt_log_insn(intel_pt_insn, *ip); 770 771 insn_cnt += 1; 772 773 if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) 774 goto out; 775 776 if (max_insn_cnt && insn_cnt >= max_insn_cnt) 777 goto out_no_cache; 778 779 *ip += intel_pt_insn->length; 780 781 if (to_ip && *ip == to_ip) 782 goto out_no_cache; 783 784 if (*ip >= al.map->end) 785 break; 786 787 offset += intel_pt_insn->length; 788 } 789 one_map = false; 790 } 791 out: 792 *insn_cnt_ptr = insn_cnt; 793 794 if (!one_map) 795 goto out_no_cache; 796 797 /* 798 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate 799 * entries. 800 */ 801 if (to_ip) { 802 struct intel_pt_cache_entry *e; 803 804 e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); 805 if (e) 806 return 0; 807 } 808 809 /* Ignore cache errors */ 810 intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, 811 *ip - start_ip, intel_pt_insn); 812 813 return 0; 814 815 out_no_cache: 816 *insn_cnt_ptr = insn_cnt; 817 return 0; 818 } 819 820 static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip, 821 uint64_t offset, const char *filename) 822 { 823 struct addr_filter *filt; 824 bool have_filter = false; 825 bool hit_tracestop = false; 826 bool hit_filter = false; 827 828 list_for_each_entry(filt, &pt->filts.head, list) { 829 if (filt->start) 830 have_filter = true; 831 832 if ((filename && !filt->filename) || 833 (!filename && filt->filename) || 834 (filename && strcmp(filename, filt->filename))) 835 continue; 836 837 if (!(offset >= filt->addr && offset < filt->addr + filt->size)) 838 continue; 839 840 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n", 841 ip, offset, filename ? filename : "[kernel]", 842 filt->start ? "filter" : "stop", 843 filt->addr, filt->size); 844 845 if (filt->start) 846 hit_filter = true; 847 else 848 hit_tracestop = true; 849 } 850 851 if (!hit_tracestop && !hit_filter) 852 intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n", 853 ip, offset, filename ? filename : "[kernel]"); 854 855 return hit_tracestop || (have_filter && !hit_filter); 856 } 857 858 static int __intel_pt_pgd_ip(uint64_t ip, void *data) 859 { 860 struct intel_pt_queue *ptq = data; 861 struct thread *thread; 862 struct addr_location al; 863 u8 cpumode; 864 u64 offset; 865 866 if (ptq->state->to_nr) { 867 if (intel_pt_guest_kernel_ip(ip)) 868 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); 869 /* No support for decoding guest user space */ 870 return -EINVAL; 871 } else if (ip >= ptq->pt->kernel_start) { 872 return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL); 873 } 874 875 cpumode = PERF_RECORD_MISC_USER; 876 877 thread = ptq->thread; 878 if (!thread) 879 return -EINVAL; 880 881 if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) 882 return -EINVAL; 883 884 offset = al.map->map_ip(al.map, ip); 885 886 return intel_pt_match_pgd_ip(ptq->pt, ip, offset, 887 al.map->dso->long_name); 888 } 889 890 static bool intel_pt_pgd_ip(uint64_t ip, void *data) 891 { 892 return __intel_pt_pgd_ip(ip, data) > 0; 893 } 894 895 static bool intel_pt_get_config(struct intel_pt *pt, 896 struct perf_event_attr *attr, u64 *config) 897 { 898 if (attr->type == pt->pmu_type) { 899 if (config) 900 *config = attr->config; 901 return true; 902 } 903 904 return false; 905 } 906 907 static bool intel_pt_exclude_kernel(struct intel_pt *pt) 908 { 909 struct evsel *evsel; 910 911 evlist__for_each_entry(pt->session->evlist, evsel) { 912 if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && 913 !evsel->core.attr.exclude_kernel) 914 return false; 915 } 916 return true; 917 } 918 919 static bool intel_pt_return_compression(struct intel_pt *pt) 920 { 921 struct evsel *evsel; 922 u64 config; 923 924 if (!pt->noretcomp_bit) 925 return true; 926 927 evlist__for_each_entry(pt->session->evlist, evsel) { 928 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 929 (config & pt->noretcomp_bit)) 930 return false; 931 } 932 return true; 933 } 934 935 static bool intel_pt_branch_enable(struct intel_pt *pt) 936 { 937 struct evsel *evsel; 938 u64 config; 939 940 evlist__for_each_entry(pt->session->evlist, evsel) { 941 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 942 (config & 1) && !(config & 0x2000)) 943 return false; 944 } 945 return true; 946 } 947 948 static unsigned int intel_pt_mtc_period(struct intel_pt *pt) 949 { 950 struct evsel *evsel; 951 unsigned int shift; 952 u64 config; 953 954 if (!pt->mtc_freq_bits) 955 return 0; 956 957 for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) 958 config >>= 1; 959 960 evlist__for_each_entry(pt->session->evlist, evsel) { 961 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) 962 return (config & pt->mtc_freq_bits) >> shift; 963 } 964 return 0; 965 } 966 967 static bool intel_pt_timeless_decoding(struct intel_pt *pt) 968 { 969 struct evsel *evsel; 970 bool timeless_decoding = true; 971 u64 config; 972 973 if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding) 974 return true; 975 976 evlist__for_each_entry(pt->session->evlist, evsel) { 977 if (!(evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) 978 return true; 979 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { 980 if (config & pt->tsc_bit) 981 timeless_decoding = false; 982 else 983 return true; 984 } 985 } 986 return timeless_decoding; 987 } 988 989 static bool intel_pt_tracing_kernel(struct intel_pt *pt) 990 { 991 struct evsel *evsel; 992 993 evlist__for_each_entry(pt->session->evlist, evsel) { 994 if (intel_pt_get_config(pt, &evsel->core.attr, NULL) && 995 !evsel->core.attr.exclude_kernel) 996 return true; 997 } 998 return false; 999 } 1000 1001 static bool intel_pt_have_tsc(struct intel_pt *pt) 1002 { 1003 struct evsel *evsel; 1004 bool have_tsc = false; 1005 u64 config; 1006 1007 if (!pt->tsc_bit) 1008 return false; 1009 1010 evlist__for_each_entry(pt->session->evlist, evsel) { 1011 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) { 1012 if (config & pt->tsc_bit) 1013 have_tsc = true; 1014 else 1015 return false; 1016 } 1017 } 1018 return have_tsc; 1019 } 1020 1021 static bool intel_pt_have_mtc(struct intel_pt *pt) 1022 { 1023 struct evsel *evsel; 1024 u64 config; 1025 1026 evlist__for_each_entry(pt->session->evlist, evsel) { 1027 if (intel_pt_get_config(pt, &evsel->core.attr, &config) && 1028 (config & pt->mtc_bit)) 1029 return true; 1030 } 1031 return false; 1032 } 1033 1034 static bool intel_pt_sampling_mode(struct intel_pt *pt) 1035 { 1036 struct evsel *evsel; 1037 1038 evlist__for_each_entry(pt->session->evlist, evsel) { 1039 if ((evsel->core.attr.sample_type & PERF_SAMPLE_AUX) && 1040 evsel->core.attr.aux_sample_size) 1041 return true; 1042 } 1043 return false; 1044 } 1045 1046 static u64 intel_pt_ctl(struct intel_pt *pt) 1047 { 1048 struct evsel *evsel; 1049 u64 config; 1050 1051 evlist__for_each_entry(pt->session->evlist, evsel) { 1052 if (intel_pt_get_config(pt, &evsel->core.attr, &config)) 1053 return config; 1054 } 1055 return 0; 1056 } 1057 1058 static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) 1059 { 1060 u64 quot, rem; 1061 1062 quot = ns / pt->tc.time_mult; 1063 rem = ns % pt->tc.time_mult; 1064 return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / 1065 pt->tc.time_mult; 1066 } 1067 1068 static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt) 1069 { 1070 size_t sz = sizeof(struct ip_callchain); 1071 1072 /* Add 1 to callchain_sz for callchain context */ 1073 sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); 1074 return zalloc(sz); 1075 } 1076 1077 static int intel_pt_callchain_init(struct intel_pt *pt) 1078 { 1079 struct evsel *evsel; 1080 1081 evlist__for_each_entry(pt->session->evlist, evsel) { 1082 if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN)) 1083 evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN; 1084 } 1085 1086 pt->chain = intel_pt_alloc_chain(pt); 1087 if (!pt->chain) 1088 return -ENOMEM; 1089 1090 return 0; 1091 } 1092 1093 static void intel_pt_add_callchain(struct intel_pt *pt, 1094 struct perf_sample *sample) 1095 { 1096 struct thread *thread = machine__findnew_thread(pt->machine, 1097 sample->pid, 1098 sample->tid); 1099 1100 thread_stack__sample_late(thread, sample->cpu, pt->chain, 1101 pt->synth_opts.callchain_sz + 1, sample->ip, 1102 pt->kernel_start); 1103 1104 sample->callchain = pt->chain; 1105 } 1106 1107 static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt) 1108 { 1109 size_t sz = sizeof(struct branch_stack); 1110 1111 sz += entry_cnt * sizeof(struct branch_entry); 1112 return zalloc(sz); 1113 } 1114 1115 static int intel_pt_br_stack_init(struct intel_pt *pt) 1116 { 1117 struct evsel *evsel; 1118 1119 evlist__for_each_entry(pt->session->evlist, evsel) { 1120 if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK)) 1121 evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK; 1122 } 1123 1124 pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz); 1125 if (!pt->br_stack) 1126 return -ENOMEM; 1127 1128 return 0; 1129 } 1130 1131 static void intel_pt_add_br_stack(struct intel_pt *pt, 1132 struct perf_sample *sample) 1133 { 1134 struct thread *thread = machine__findnew_thread(pt->machine, 1135 sample->pid, 1136 sample->tid); 1137 1138 thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack, 1139 pt->br_stack_sz, sample->ip, 1140 pt->kernel_start); 1141 1142 sample->branch_stack = pt->br_stack; 1143 } 1144 1145 /* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */ 1146 #define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U) 1147 1148 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, 1149 unsigned int queue_nr) 1150 { 1151 struct intel_pt_params params = { .get_trace = 0, }; 1152 struct perf_env *env = pt->machine->env; 1153 struct intel_pt_queue *ptq; 1154 1155 ptq = zalloc(sizeof(struct intel_pt_queue)); 1156 if (!ptq) 1157 return NULL; 1158 1159 if (pt->synth_opts.callchain) { 1160 ptq->chain = intel_pt_alloc_chain(pt); 1161 if (!ptq->chain) 1162 goto out_free; 1163 } 1164 1165 if (pt->synth_opts.last_branch || pt->synth_opts.other_events) { 1166 unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz); 1167 1168 ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt); 1169 if (!ptq->last_branch) 1170 goto out_free; 1171 } 1172 1173 ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 1174 if (!ptq->event_buf) 1175 goto out_free; 1176 1177 ptq->pt = pt; 1178 ptq->queue_nr = queue_nr; 1179 ptq->exclude_kernel = intel_pt_exclude_kernel(pt); 1180 ptq->pid = -1; 1181 ptq->tid = -1; 1182 ptq->cpu = -1; 1183 ptq->next_tid = -1; 1184 1185 params.get_trace = intel_pt_get_trace; 1186 params.walk_insn = intel_pt_walk_next_insn; 1187 params.lookahead = intel_pt_lookahead; 1188 params.findnew_vmcs_info = intel_pt_findnew_vmcs_info; 1189 params.data = ptq; 1190 params.return_compression = intel_pt_return_compression(pt); 1191 params.branch_enable = intel_pt_branch_enable(pt); 1192 params.ctl = intel_pt_ctl(pt); 1193 params.max_non_turbo_ratio = pt->max_non_turbo_ratio; 1194 params.mtc_period = intel_pt_mtc_period(pt); 1195 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; 1196 params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; 1197 params.quick = pt->synth_opts.quick; 1198 params.vm_time_correlation = pt->synth_opts.vm_time_correlation; 1199 params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; 1200 params.first_timestamp = pt->first_timestamp; 1201 1202 if (pt->filts.cnt > 0) 1203 params.pgd_ip = intel_pt_pgd_ip; 1204 1205 if (pt->synth_opts.instructions) { 1206 if (pt->synth_opts.period) { 1207 switch (pt->synth_opts.period_type) { 1208 case PERF_ITRACE_PERIOD_INSTRUCTIONS: 1209 params.period_type = 1210 INTEL_PT_PERIOD_INSTRUCTIONS; 1211 params.period = pt->synth_opts.period; 1212 break; 1213 case PERF_ITRACE_PERIOD_TICKS: 1214 params.period_type = INTEL_PT_PERIOD_TICKS; 1215 params.period = pt->synth_opts.period; 1216 break; 1217 case PERF_ITRACE_PERIOD_NANOSECS: 1218 params.period_type = INTEL_PT_PERIOD_TICKS; 1219 params.period = intel_pt_ns_to_ticks(pt, 1220 pt->synth_opts.period); 1221 break; 1222 default: 1223 break; 1224 } 1225 } 1226 1227 if (!params.period) { 1228 params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; 1229 params.period = 1; 1230 } 1231 } 1232 1233 if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18)) 1234 params.flags |= INTEL_PT_FUP_WITH_NLIP; 1235 1236 ptq->decoder = intel_pt_decoder_new(¶ms); 1237 if (!ptq->decoder) 1238 goto out_free; 1239 1240 return ptq; 1241 1242 out_free: 1243 zfree(&ptq->event_buf); 1244 zfree(&ptq->last_branch); 1245 zfree(&ptq->chain); 1246 free(ptq); 1247 return NULL; 1248 } 1249 1250 static void intel_pt_free_queue(void *priv) 1251 { 1252 struct intel_pt_queue *ptq = priv; 1253 1254 if (!ptq) 1255 return; 1256 thread__zput(ptq->thread); 1257 thread__zput(ptq->unknown_guest_thread); 1258 intel_pt_decoder_free(ptq->decoder); 1259 zfree(&ptq->event_buf); 1260 zfree(&ptq->last_branch); 1261 zfree(&ptq->chain); 1262 free(ptq); 1263 } 1264 1265 static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp) 1266 { 1267 unsigned int i; 1268 1269 pt->first_timestamp = timestamp; 1270 1271 for (i = 0; i < pt->queues.nr_queues; i++) { 1272 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 1273 struct intel_pt_queue *ptq = queue->priv; 1274 1275 if (ptq && ptq->decoder) 1276 intel_pt_set_first_timestamp(ptq->decoder, timestamp); 1277 } 1278 } 1279 1280 static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, 1281 struct auxtrace_queue *queue) 1282 { 1283 struct intel_pt_queue *ptq = queue->priv; 1284 1285 if (queue->tid == -1 || pt->have_sched_switch) { 1286 ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); 1287 if (ptq->tid == -1) 1288 ptq->pid = -1; 1289 thread__zput(ptq->thread); 1290 } 1291 1292 if (!ptq->thread && ptq->tid != -1) 1293 ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); 1294 1295 if (ptq->thread) { 1296 ptq->pid = ptq->thread->pid_; 1297 if (queue->cpu == -1) 1298 ptq->cpu = ptq->thread->cpu; 1299 } 1300 } 1301 1302 static void intel_pt_sample_flags(struct intel_pt_queue *ptq) 1303 { 1304 if (ptq->state->flags & INTEL_PT_ABORT_TX) { 1305 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; 1306 } else if (ptq->state->flags & INTEL_PT_ASYNC) { 1307 if (!ptq->state->to_ip) 1308 ptq->flags = PERF_IP_FLAG_BRANCH | 1309 PERF_IP_FLAG_TRACE_END; 1310 else if (ptq->state->from_nr && !ptq->state->to_nr) 1311 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | 1312 PERF_IP_FLAG_VMEXIT; 1313 else 1314 ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | 1315 PERF_IP_FLAG_ASYNC | 1316 PERF_IP_FLAG_INTERRUPT; 1317 ptq->insn_len = 0; 1318 } else { 1319 if (ptq->state->from_ip) 1320 ptq->flags = intel_pt_insn_type(ptq->state->insn_op); 1321 else 1322 ptq->flags = PERF_IP_FLAG_BRANCH | 1323 PERF_IP_FLAG_TRACE_BEGIN; 1324 if (ptq->state->flags & INTEL_PT_IN_TX) 1325 ptq->flags |= PERF_IP_FLAG_IN_TX; 1326 ptq->insn_len = ptq->state->insn_len; 1327 memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); 1328 } 1329 1330 if (ptq->state->type & INTEL_PT_TRACE_BEGIN) 1331 ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; 1332 if (ptq->state->type & INTEL_PT_TRACE_END) 1333 ptq->flags |= PERF_IP_FLAG_TRACE_END; 1334 } 1335 1336 static void intel_pt_setup_time_range(struct intel_pt *pt, 1337 struct intel_pt_queue *ptq) 1338 { 1339 if (!pt->range_cnt) 1340 return; 1341 1342 ptq->sel_timestamp = pt->time_ranges[0].start; 1343 ptq->sel_idx = 0; 1344 1345 if (ptq->sel_timestamp) { 1346 ptq->sel_start = true; 1347 } else { 1348 ptq->sel_timestamp = pt->time_ranges[0].end; 1349 ptq->sel_start = false; 1350 } 1351 } 1352 1353 static int intel_pt_setup_queue(struct intel_pt *pt, 1354 struct auxtrace_queue *queue, 1355 unsigned int queue_nr) 1356 { 1357 struct intel_pt_queue *ptq = queue->priv; 1358 1359 if (list_empty(&queue->head)) 1360 return 0; 1361 1362 if (!ptq) { 1363 ptq = intel_pt_alloc_queue(pt, queue_nr); 1364 if (!ptq) 1365 return -ENOMEM; 1366 queue->priv = ptq; 1367 1368 if (queue->cpu != -1) 1369 ptq->cpu = queue->cpu; 1370 ptq->tid = queue->tid; 1371 1372 ptq->cbr_seen = UINT_MAX; 1373 1374 if (pt->sampling_mode && !pt->snapshot_mode && 1375 pt->timeless_decoding) 1376 ptq->step_through_buffers = true; 1377 1378 ptq->sync_switch = pt->sync_switch; 1379 1380 intel_pt_setup_time_range(pt, ptq); 1381 } 1382 1383 if (!ptq->on_heap && 1384 (!ptq->sync_switch || 1385 ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { 1386 const struct intel_pt_state *state; 1387 int ret; 1388 1389 if (pt->timeless_decoding) 1390 return 0; 1391 1392 intel_pt_log("queue %u getting timestamp\n", queue_nr); 1393 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1394 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 1395 1396 if (ptq->sel_start && ptq->sel_timestamp) { 1397 ret = intel_pt_fast_forward(ptq->decoder, 1398 ptq->sel_timestamp); 1399 if (ret) 1400 return ret; 1401 } 1402 1403 while (1) { 1404 state = intel_pt_decode(ptq->decoder); 1405 if (state->err) { 1406 if (state->err == INTEL_PT_ERR_NODATA) { 1407 intel_pt_log("queue %u has no timestamp\n", 1408 queue_nr); 1409 return 0; 1410 } 1411 continue; 1412 } 1413 if (state->timestamp) 1414 break; 1415 } 1416 1417 ptq->timestamp = state->timestamp; 1418 intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", 1419 queue_nr, ptq->timestamp); 1420 ptq->state = state; 1421 ptq->have_sample = true; 1422 if (ptq->sel_start && ptq->sel_timestamp && 1423 ptq->timestamp < ptq->sel_timestamp) 1424 ptq->have_sample = false; 1425 intel_pt_sample_flags(ptq); 1426 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 1427 if (ret) 1428 return ret; 1429 ptq->on_heap = true; 1430 } 1431 1432 return 0; 1433 } 1434 1435 static int intel_pt_setup_queues(struct intel_pt *pt) 1436 { 1437 unsigned int i; 1438 int ret; 1439 1440 for (i = 0; i < pt->queues.nr_queues; i++) { 1441 ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); 1442 if (ret) 1443 return ret; 1444 } 1445 return 0; 1446 } 1447 1448 static inline bool intel_pt_skip_event(struct intel_pt *pt) 1449 { 1450 return pt->synth_opts.initial_skip && 1451 pt->num_events++ < pt->synth_opts.initial_skip; 1452 } 1453 1454 /* 1455 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen. 1456 * Also ensure CBR is first non-skipped event by allowing for 4 more samples 1457 * from this decoder state. 1458 */ 1459 static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt) 1460 { 1461 return pt->synth_opts.initial_skip && 1462 pt->num_events + 4 < pt->synth_opts.initial_skip; 1463 } 1464 1465 static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq, 1466 union perf_event *event, 1467 struct perf_sample *sample) 1468 { 1469 event->sample.header.type = PERF_RECORD_SAMPLE; 1470 event->sample.header.size = sizeof(struct perf_event_header); 1471 1472 sample->pid = ptq->pid; 1473 sample->tid = ptq->tid; 1474 sample->cpu = ptq->cpu; 1475 sample->insn_len = ptq->insn_len; 1476 memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); 1477 } 1478 1479 static void intel_pt_prep_b_sample(struct intel_pt *pt, 1480 struct intel_pt_queue *ptq, 1481 union perf_event *event, 1482 struct perf_sample *sample) 1483 { 1484 intel_pt_prep_a_sample(ptq, event, sample); 1485 1486 if (!pt->timeless_decoding) 1487 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1488 1489 sample->ip = ptq->state->from_ip; 1490 sample->addr = ptq->state->to_ip; 1491 sample->cpumode = intel_pt_cpumode(ptq, sample->ip, sample->addr); 1492 sample->period = 1; 1493 sample->flags = ptq->flags; 1494 1495 event->sample.header.misc = sample->cpumode; 1496 } 1497 1498 static int intel_pt_inject_event(union perf_event *event, 1499 struct perf_sample *sample, u64 type) 1500 { 1501 event->header.size = perf_event__sample_event_size(sample, type, 0); 1502 return perf_event__synthesize_sample(event, type, 0, sample); 1503 } 1504 1505 static inline int intel_pt_opt_inject(struct intel_pt *pt, 1506 union perf_event *event, 1507 struct perf_sample *sample, u64 type) 1508 { 1509 if (!pt->synth_opts.inject) 1510 return 0; 1511 1512 return intel_pt_inject_event(event, sample, type); 1513 } 1514 1515 static int intel_pt_deliver_synth_event(struct intel_pt *pt, 1516 union perf_event *event, 1517 struct perf_sample *sample, u64 type) 1518 { 1519 int ret; 1520 1521 ret = intel_pt_opt_inject(pt, event, sample, type); 1522 if (ret) 1523 return ret; 1524 1525 ret = perf_session__deliver_synth_event(pt->session, event, sample); 1526 if (ret) 1527 pr_err("Intel PT: failed to deliver event, error %d\n", ret); 1528 1529 return ret; 1530 } 1531 1532 static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) 1533 { 1534 struct intel_pt *pt = ptq->pt; 1535 union perf_event *event = ptq->event_buf; 1536 struct perf_sample sample = { .ip = 0, }; 1537 struct dummy_branch_stack { 1538 u64 nr; 1539 u64 hw_idx; 1540 struct branch_entry entries; 1541 } dummy_bs; 1542 1543 if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) 1544 return 0; 1545 1546 if (intel_pt_skip_event(pt)) 1547 return 0; 1548 1549 intel_pt_prep_b_sample(pt, ptq, event, &sample); 1550 1551 sample.id = ptq->pt->branches_id; 1552 sample.stream_id = ptq->pt->branches_id; 1553 1554 /* 1555 * perf report cannot handle events without a branch stack when using 1556 * SORT_MODE__BRANCH so make a dummy one. 1557 */ 1558 if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { 1559 dummy_bs = (struct dummy_branch_stack){ 1560 .nr = 1, 1561 .hw_idx = -1ULL, 1562 .entries = { 1563 .from = sample.ip, 1564 .to = sample.addr, 1565 }, 1566 }; 1567 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1568 } 1569 1570 if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) 1571 sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; 1572 if (sample.cyc_cnt) { 1573 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; 1574 ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; 1575 ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt; 1576 } 1577 1578 return intel_pt_deliver_synth_event(pt, event, &sample, 1579 pt->branches_sample_type); 1580 } 1581 1582 static void intel_pt_prep_sample(struct intel_pt *pt, 1583 struct intel_pt_queue *ptq, 1584 union perf_event *event, 1585 struct perf_sample *sample) 1586 { 1587 intel_pt_prep_b_sample(pt, ptq, event, sample); 1588 1589 if (pt->synth_opts.callchain) { 1590 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, 1591 pt->synth_opts.callchain_sz + 1, 1592 sample->ip, pt->kernel_start); 1593 sample->callchain = ptq->chain; 1594 } 1595 1596 if (pt->synth_opts.last_branch) { 1597 thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch, 1598 pt->br_stack_sz); 1599 sample->branch_stack = ptq->last_branch; 1600 } 1601 } 1602 1603 static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) 1604 { 1605 struct intel_pt *pt = ptq->pt; 1606 union perf_event *event = ptq->event_buf; 1607 struct perf_sample sample = { .ip = 0, }; 1608 1609 if (intel_pt_skip_event(pt)) 1610 return 0; 1611 1612 intel_pt_prep_sample(pt, ptq, event, &sample); 1613 1614 sample.id = ptq->pt->instructions_id; 1615 sample.stream_id = ptq->pt->instructions_id; 1616 if (pt->synth_opts.quick) 1617 sample.period = 1; 1618 else 1619 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; 1620 1621 if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) 1622 sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; 1623 if (sample.cyc_cnt) { 1624 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; 1625 ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; 1626 ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt; 1627 } 1628 1629 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1630 1631 return intel_pt_deliver_synth_event(pt, event, &sample, 1632 pt->instructions_sample_type); 1633 } 1634 1635 static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) 1636 { 1637 struct intel_pt *pt = ptq->pt; 1638 union perf_event *event = ptq->event_buf; 1639 struct perf_sample sample = { .ip = 0, }; 1640 1641 if (intel_pt_skip_event(pt)) 1642 return 0; 1643 1644 intel_pt_prep_sample(pt, ptq, event, &sample); 1645 1646 sample.id = ptq->pt->transactions_id; 1647 sample.stream_id = ptq->pt->transactions_id; 1648 1649 return intel_pt_deliver_synth_event(pt, event, &sample, 1650 pt->transactions_sample_type); 1651 } 1652 1653 static void intel_pt_prep_p_sample(struct intel_pt *pt, 1654 struct intel_pt_queue *ptq, 1655 union perf_event *event, 1656 struct perf_sample *sample) 1657 { 1658 intel_pt_prep_sample(pt, ptq, event, sample); 1659 1660 /* 1661 * Zero IP is used to mean "trace start" but that is not the case for 1662 * power or PTWRITE events with no IP, so clear the flags. 1663 */ 1664 if (!sample->ip) 1665 sample->flags = 0; 1666 } 1667 1668 static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq) 1669 { 1670 struct intel_pt *pt = ptq->pt; 1671 union perf_event *event = ptq->event_buf; 1672 struct perf_sample sample = { .ip = 0, }; 1673 struct perf_synth_intel_ptwrite raw; 1674 1675 if (intel_pt_skip_event(pt)) 1676 return 0; 1677 1678 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1679 1680 sample.id = ptq->pt->ptwrites_id; 1681 sample.stream_id = ptq->pt->ptwrites_id; 1682 1683 raw.flags = 0; 1684 raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); 1685 raw.payload = cpu_to_le64(ptq->state->ptw_payload); 1686 1687 sample.raw_size = perf_synth__raw_size(raw); 1688 sample.raw_data = perf_synth__raw_data(&raw); 1689 1690 return intel_pt_deliver_synth_event(pt, event, &sample, 1691 pt->ptwrites_sample_type); 1692 } 1693 1694 static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq) 1695 { 1696 struct intel_pt *pt = ptq->pt; 1697 union perf_event *event = ptq->event_buf; 1698 struct perf_sample sample = { .ip = 0, }; 1699 struct perf_synth_intel_cbr raw; 1700 u32 flags; 1701 1702 if (intel_pt_skip_cbr_event(pt)) 1703 return 0; 1704 1705 ptq->cbr_seen = ptq->state->cbr; 1706 1707 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1708 1709 sample.id = ptq->pt->cbr_id; 1710 sample.stream_id = ptq->pt->cbr_id; 1711 1712 flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16); 1713 raw.flags = cpu_to_le32(flags); 1714 raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz); 1715 raw.reserved3 = 0; 1716 1717 sample.raw_size = perf_synth__raw_size(raw); 1718 sample.raw_data = perf_synth__raw_data(&raw); 1719 1720 return intel_pt_deliver_synth_event(pt, event, &sample, 1721 pt->pwr_events_sample_type); 1722 } 1723 1724 static int intel_pt_synth_psb_sample(struct intel_pt_queue *ptq) 1725 { 1726 struct intel_pt *pt = ptq->pt; 1727 union perf_event *event = ptq->event_buf; 1728 struct perf_sample sample = { .ip = 0, }; 1729 struct perf_synth_intel_psb raw; 1730 1731 if (intel_pt_skip_event(pt)) 1732 return 0; 1733 1734 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1735 1736 sample.id = ptq->pt->psb_id; 1737 sample.stream_id = ptq->pt->psb_id; 1738 sample.flags = 0; 1739 1740 raw.reserved = 0; 1741 raw.offset = ptq->state->psb_offset; 1742 1743 sample.raw_size = perf_synth__raw_size(raw); 1744 sample.raw_data = perf_synth__raw_data(&raw); 1745 1746 return intel_pt_deliver_synth_event(pt, event, &sample, 1747 pt->pwr_events_sample_type); 1748 } 1749 1750 static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq) 1751 { 1752 struct intel_pt *pt = ptq->pt; 1753 union perf_event *event = ptq->event_buf; 1754 struct perf_sample sample = { .ip = 0, }; 1755 struct perf_synth_intel_mwait raw; 1756 1757 if (intel_pt_skip_event(pt)) 1758 return 0; 1759 1760 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1761 1762 sample.id = ptq->pt->mwait_id; 1763 sample.stream_id = ptq->pt->mwait_id; 1764 1765 raw.reserved = 0; 1766 raw.payload = cpu_to_le64(ptq->state->mwait_payload); 1767 1768 sample.raw_size = perf_synth__raw_size(raw); 1769 sample.raw_data = perf_synth__raw_data(&raw); 1770 1771 return intel_pt_deliver_synth_event(pt, event, &sample, 1772 pt->pwr_events_sample_type); 1773 } 1774 1775 static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq) 1776 { 1777 struct intel_pt *pt = ptq->pt; 1778 union perf_event *event = ptq->event_buf; 1779 struct perf_sample sample = { .ip = 0, }; 1780 struct perf_synth_intel_pwre raw; 1781 1782 if (intel_pt_skip_event(pt)) 1783 return 0; 1784 1785 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1786 1787 sample.id = ptq->pt->pwre_id; 1788 sample.stream_id = ptq->pt->pwre_id; 1789 1790 raw.reserved = 0; 1791 raw.payload = cpu_to_le64(ptq->state->pwre_payload); 1792 1793 sample.raw_size = perf_synth__raw_size(raw); 1794 sample.raw_data = perf_synth__raw_data(&raw); 1795 1796 return intel_pt_deliver_synth_event(pt, event, &sample, 1797 pt->pwr_events_sample_type); 1798 } 1799 1800 static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq) 1801 { 1802 struct intel_pt *pt = ptq->pt; 1803 union perf_event *event = ptq->event_buf; 1804 struct perf_sample sample = { .ip = 0, }; 1805 struct perf_synth_intel_exstop raw; 1806 1807 if (intel_pt_skip_event(pt)) 1808 return 0; 1809 1810 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1811 1812 sample.id = ptq->pt->exstop_id; 1813 sample.stream_id = ptq->pt->exstop_id; 1814 1815 raw.flags = 0; 1816 raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP); 1817 1818 sample.raw_size = perf_synth__raw_size(raw); 1819 sample.raw_data = perf_synth__raw_data(&raw); 1820 1821 return intel_pt_deliver_synth_event(pt, event, &sample, 1822 pt->pwr_events_sample_type); 1823 } 1824 1825 static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) 1826 { 1827 struct intel_pt *pt = ptq->pt; 1828 union perf_event *event = ptq->event_buf; 1829 struct perf_sample sample = { .ip = 0, }; 1830 struct perf_synth_intel_pwrx raw; 1831 1832 if (intel_pt_skip_event(pt)) 1833 return 0; 1834 1835 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1836 1837 sample.id = ptq->pt->pwrx_id; 1838 sample.stream_id = ptq->pt->pwrx_id; 1839 1840 raw.reserved = 0; 1841 raw.payload = cpu_to_le64(ptq->state->pwrx_payload); 1842 1843 sample.raw_size = perf_synth__raw_size(raw); 1844 sample.raw_data = perf_synth__raw_data(&raw); 1845 1846 return intel_pt_deliver_synth_event(pt, event, &sample, 1847 pt->pwr_events_sample_type); 1848 } 1849 1850 /* 1851 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer 1852 * intel_pt_add_gp_regs(). 1853 */ 1854 static const int pebs_gp_regs[] = { 1855 [PERF_REG_X86_FLAGS] = 1, 1856 [PERF_REG_X86_IP] = 2, 1857 [PERF_REG_X86_AX] = 3, 1858 [PERF_REG_X86_CX] = 4, 1859 [PERF_REG_X86_DX] = 5, 1860 [PERF_REG_X86_BX] = 6, 1861 [PERF_REG_X86_SP] = 7, 1862 [PERF_REG_X86_BP] = 8, 1863 [PERF_REG_X86_SI] = 9, 1864 [PERF_REG_X86_DI] = 10, 1865 [PERF_REG_X86_R8] = 11, 1866 [PERF_REG_X86_R9] = 12, 1867 [PERF_REG_X86_R10] = 13, 1868 [PERF_REG_X86_R11] = 14, 1869 [PERF_REG_X86_R12] = 15, 1870 [PERF_REG_X86_R13] = 16, 1871 [PERF_REG_X86_R14] = 17, 1872 [PERF_REG_X86_R15] = 18, 1873 }; 1874 1875 static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos, 1876 const struct intel_pt_blk_items *items, 1877 u64 regs_mask) 1878 { 1879 const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS]; 1880 u32 mask = items->mask[INTEL_PT_GP_REGS_POS]; 1881 u32 bit; 1882 int i; 1883 1884 for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) { 1885 /* Get the PEBS gp_regs array index */ 1886 int n = pebs_gp_regs[i] - 1; 1887 1888 if (n < 0) 1889 continue; 1890 /* 1891 * Add only registers that were requested (i.e. 'regs_mask') and 1892 * that were provided (i.e. 'mask'), and update the resulting 1893 * mask (i.e. 'intr_regs->mask') accordingly. 1894 */ 1895 if (mask & 1 << n && regs_mask & bit) { 1896 intr_regs->mask |= bit; 1897 *pos++ = gp_regs[n]; 1898 } 1899 } 1900 1901 return pos; 1902 } 1903 1904 #ifndef PERF_REG_X86_XMM0 1905 #define PERF_REG_X86_XMM0 32 1906 #endif 1907 1908 static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos, 1909 const struct intel_pt_blk_items *items, 1910 u64 regs_mask) 1911 { 1912 u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0); 1913 const u64 *xmm = items->xmm; 1914 1915 /* 1916 * If there are any XMM registers, then there should be all of them. 1917 * Nevertheless, follow the logic to add only registers that were 1918 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'), 1919 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly. 1920 */ 1921 intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0; 1922 1923 for (; mask; mask >>= 1, xmm++) { 1924 if (mask & 1) 1925 *pos++ = *xmm; 1926 } 1927 } 1928 1929 #define LBR_INFO_MISPRED (1ULL << 63) 1930 #define LBR_INFO_IN_TX (1ULL << 62) 1931 #define LBR_INFO_ABORT (1ULL << 61) 1932 #define LBR_INFO_CYCLES 0xffff 1933 1934 /* Refer kernel's intel_pmu_store_pebs_lbrs() */ 1935 static u64 intel_pt_lbr_flags(u64 info) 1936 { 1937 union { 1938 struct branch_flags flags; 1939 u64 result; 1940 } u; 1941 1942 u.result = 0; 1943 u.flags.mispred = !!(info & LBR_INFO_MISPRED); 1944 u.flags.predicted = !(info & LBR_INFO_MISPRED); 1945 u.flags.in_tx = !!(info & LBR_INFO_IN_TX); 1946 u.flags.abort = !!(info & LBR_INFO_ABORT); 1947 u.flags.cycles = info & LBR_INFO_CYCLES; 1948 1949 return u.result; 1950 } 1951 1952 static void intel_pt_add_lbrs(struct branch_stack *br_stack, 1953 const struct intel_pt_blk_items *items) 1954 { 1955 u64 *to; 1956 int i; 1957 1958 br_stack->nr = 0; 1959 1960 to = &br_stack->entries[0].from; 1961 1962 for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) { 1963 u32 mask = items->mask[i]; 1964 const u64 *from = items->val[i]; 1965 1966 for (; mask; mask >>= 3, from += 3) { 1967 if ((mask & 7) == 7) { 1968 *to++ = from[0]; 1969 *to++ = from[1]; 1970 *to++ = intel_pt_lbr_flags(from[2]); 1971 br_stack->nr += 1; 1972 } 1973 } 1974 } 1975 } 1976 1977 static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) 1978 { 1979 const struct intel_pt_blk_items *items = &ptq->state->items; 1980 struct perf_sample sample = { .ip = 0, }; 1981 union perf_event *event = ptq->event_buf; 1982 struct intel_pt *pt = ptq->pt; 1983 struct evsel *evsel = pt->pebs_evsel; 1984 u64 sample_type = evsel->core.attr.sample_type; 1985 u64 id = evsel->core.id[0]; 1986 u8 cpumode; 1987 u64 regs[8 * sizeof(sample.intr_regs.mask)]; 1988 1989 if (intel_pt_skip_event(pt)) 1990 return 0; 1991 1992 intel_pt_prep_a_sample(ptq, event, &sample); 1993 1994 sample.id = id; 1995 sample.stream_id = id; 1996 1997 if (!evsel->core.attr.freq) 1998 sample.period = evsel->core.attr.sample_period; 1999 2000 /* No support for non-zero CS base */ 2001 if (items->has_ip) 2002 sample.ip = items->ip; 2003 else if (items->has_rip) 2004 sample.ip = items->rip; 2005 else 2006 sample.ip = ptq->state->from_ip; 2007 2008 cpumode = intel_pt_cpumode(ptq, sample.ip, 0); 2009 2010 event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP; 2011 2012 sample.cpumode = cpumode; 2013 2014 if (sample_type & PERF_SAMPLE_TIME) { 2015 u64 timestamp = 0; 2016 2017 if (items->has_timestamp) 2018 timestamp = items->timestamp; 2019 else if (!pt->timeless_decoding) 2020 timestamp = ptq->timestamp; 2021 if (timestamp) 2022 sample.time = tsc_to_perf_time(timestamp, &pt->tc); 2023 } 2024 2025 if (sample_type & PERF_SAMPLE_CALLCHAIN && 2026 pt->synth_opts.callchain) { 2027 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain, 2028 pt->synth_opts.callchain_sz, sample.ip, 2029 pt->kernel_start); 2030 sample.callchain = ptq->chain; 2031 } 2032 2033 if (sample_type & PERF_SAMPLE_REGS_INTR && 2034 (items->mask[INTEL_PT_GP_REGS_POS] || 2035 items->mask[INTEL_PT_XMM_POS])) { 2036 u64 regs_mask = evsel->core.attr.sample_regs_intr; 2037 u64 *pos; 2038 2039 sample.intr_regs.abi = items->is_32_bit ? 2040 PERF_SAMPLE_REGS_ABI_32 : 2041 PERF_SAMPLE_REGS_ABI_64; 2042 sample.intr_regs.regs = regs; 2043 2044 pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask); 2045 2046 intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask); 2047 } 2048 2049 if (sample_type & PERF_SAMPLE_BRANCH_STACK) { 2050 if (items->mask[INTEL_PT_LBR_0_POS] || 2051 items->mask[INTEL_PT_LBR_1_POS] || 2052 items->mask[INTEL_PT_LBR_2_POS]) { 2053 intel_pt_add_lbrs(ptq->last_branch, items); 2054 } else if (pt->synth_opts.last_branch) { 2055 thread_stack__br_sample(ptq->thread, ptq->cpu, 2056 ptq->last_branch, 2057 pt->br_stack_sz); 2058 } else { 2059 ptq->last_branch->nr = 0; 2060 } 2061 sample.branch_stack = ptq->last_branch; 2062 } 2063 2064 if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address) 2065 sample.addr = items->mem_access_address; 2066 2067 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { 2068 /* 2069 * Refer kernel's setup_pebs_adaptive_sample_data() and 2070 * intel_hsw_weight(). 2071 */ 2072 if (items->has_mem_access_latency) { 2073 u64 weight = items->mem_access_latency >> 32; 2074 2075 /* 2076 * Starts from SPR, the mem access latency field 2077 * contains both cache latency [47:32] and instruction 2078 * latency [15:0]. The cache latency is the same as the 2079 * mem access latency on previous platforms. 2080 * 2081 * In practice, no memory access could last than 4G 2082 * cycles. Use latency >> 32 to distinguish the 2083 * different format of the mem access latency field. 2084 */ 2085 if (weight > 0) { 2086 sample.weight = weight & 0xffff; 2087 sample.ins_lat = items->mem_access_latency & 0xffff; 2088 } else 2089 sample.weight = items->mem_access_latency; 2090 } 2091 if (!sample.weight && items->has_tsx_aux_info) { 2092 /* Cycles last block */ 2093 sample.weight = (u32)items->tsx_aux_info; 2094 } 2095 } 2096 2097 if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { 2098 u64 ax = items->has_rax ? items->rax : 0; 2099 /* Refer kernel's intel_hsw_transaction() */ 2100 u64 txn = (u8)(items->tsx_aux_info >> 32); 2101 2102 /* For RTM XABORTs also log the abort code from AX */ 2103 if (txn & PERF_TXN_TRANSACTION && ax & 1) 2104 txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; 2105 sample.transaction = txn; 2106 } 2107 2108 return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); 2109 } 2110 2111 static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 2112 pid_t pid, pid_t tid, u64 ip, u64 timestamp) 2113 { 2114 union perf_event event; 2115 char msg[MAX_AUXTRACE_ERROR_MSG]; 2116 int err; 2117 2118 if (pt->synth_opts.error_minus_flags) { 2119 if (code == INTEL_PT_ERR_OVR && 2120 pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW) 2121 return 0; 2122 if (code == INTEL_PT_ERR_LOST && 2123 pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST) 2124 return 0; 2125 } 2126 2127 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); 2128 2129 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 2130 code, cpu, pid, tid, ip, msg, timestamp); 2131 2132 err = perf_session__deliver_synth_event(pt->session, &event, NULL); 2133 if (err) 2134 pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", 2135 err); 2136 2137 return err; 2138 } 2139 2140 static int intel_ptq_synth_error(struct intel_pt_queue *ptq, 2141 const struct intel_pt_state *state) 2142 { 2143 struct intel_pt *pt = ptq->pt; 2144 u64 tm = ptq->timestamp; 2145 2146 tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); 2147 2148 return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, 2149 ptq->tid, state->from_ip, tm); 2150 } 2151 2152 static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) 2153 { 2154 struct auxtrace_queue *queue; 2155 pid_t tid = ptq->next_tid; 2156 int err; 2157 2158 if (tid == -1) 2159 return 0; 2160 2161 intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); 2162 2163 err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); 2164 2165 queue = &pt->queues.queue_array[ptq->queue_nr]; 2166 intel_pt_set_pid_tid_cpu(pt, queue); 2167 2168 ptq->next_tid = -1; 2169 2170 return err; 2171 } 2172 2173 static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) 2174 { 2175 struct intel_pt *pt = ptq->pt; 2176 2177 return ip == pt->switch_ip && 2178 (ptq->flags & PERF_IP_FLAG_BRANCH) && 2179 !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | 2180 PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); 2181 } 2182 2183 #define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ 2184 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT) 2185 2186 static int intel_pt_sample(struct intel_pt_queue *ptq) 2187 { 2188 const struct intel_pt_state *state = ptq->state; 2189 struct intel_pt *pt = ptq->pt; 2190 int err; 2191 2192 if (!ptq->have_sample) 2193 return 0; 2194 2195 ptq->have_sample = false; 2196 2197 ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; 2198 ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; 2199 2200 /* 2201 * Do PEBS first to allow for the possibility that the PEBS timestamp 2202 * precedes the current timestamp. 2203 */ 2204 if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) { 2205 err = intel_pt_synth_pebs_sample(ptq); 2206 if (err) 2207 return err; 2208 } 2209 2210 if (pt->sample_pwr_events) { 2211 if (state->type & INTEL_PT_PSB_EVT) { 2212 err = intel_pt_synth_psb_sample(ptq); 2213 if (err) 2214 return err; 2215 } 2216 if (ptq->state->cbr != ptq->cbr_seen) { 2217 err = intel_pt_synth_cbr_sample(ptq); 2218 if (err) 2219 return err; 2220 } 2221 if (state->type & INTEL_PT_PWR_EVT) { 2222 if (state->type & INTEL_PT_MWAIT_OP) { 2223 err = intel_pt_synth_mwait_sample(ptq); 2224 if (err) 2225 return err; 2226 } 2227 if (state->type & INTEL_PT_PWR_ENTRY) { 2228 err = intel_pt_synth_pwre_sample(ptq); 2229 if (err) 2230 return err; 2231 } 2232 if (state->type & INTEL_PT_EX_STOP) { 2233 err = intel_pt_synth_exstop_sample(ptq); 2234 if (err) 2235 return err; 2236 } 2237 if (state->type & INTEL_PT_PWR_EXIT) { 2238 err = intel_pt_synth_pwrx_sample(ptq); 2239 if (err) 2240 return err; 2241 } 2242 } 2243 } 2244 2245 if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) { 2246 err = intel_pt_synth_instruction_sample(ptq); 2247 if (err) 2248 return err; 2249 } 2250 2251 if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) { 2252 err = intel_pt_synth_transaction_sample(ptq); 2253 if (err) 2254 return err; 2255 } 2256 2257 if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) { 2258 err = intel_pt_synth_ptwrite_sample(ptq); 2259 if (err) 2260 return err; 2261 } 2262 2263 if (!(state->type & INTEL_PT_BRANCH)) 2264 return 0; 2265 2266 if (pt->use_thread_stack) { 2267 thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, 2268 state->from_ip, state->to_ip, ptq->insn_len, 2269 state->trace_nr, pt->callstack, 2270 pt->br_stack_sz_plus, 2271 pt->mispred_all); 2272 } else { 2273 thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr); 2274 } 2275 2276 if (pt->sample_branches) { 2277 if (state->from_nr != state->to_nr && 2278 state->from_ip && state->to_ip) { 2279 struct intel_pt_state *st = (struct intel_pt_state *)state; 2280 u64 to_ip = st->to_ip; 2281 u64 from_ip = st->from_ip; 2282 2283 /* 2284 * perf cannot handle having different machines for ip 2285 * and addr, so create 2 branches. 2286 */ 2287 st->to_ip = 0; 2288 err = intel_pt_synth_branch_sample(ptq); 2289 if (err) 2290 return err; 2291 st->from_ip = 0; 2292 st->to_ip = to_ip; 2293 err = intel_pt_synth_branch_sample(ptq); 2294 st->from_ip = from_ip; 2295 } else { 2296 err = intel_pt_synth_branch_sample(ptq); 2297 } 2298 if (err) 2299 return err; 2300 } 2301 2302 if (!ptq->sync_switch) 2303 return 0; 2304 2305 if (intel_pt_is_switch_ip(ptq, state->to_ip)) { 2306 switch (ptq->switch_state) { 2307 case INTEL_PT_SS_NOT_TRACING: 2308 case INTEL_PT_SS_UNKNOWN: 2309 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 2310 err = intel_pt_next_tid(pt, ptq); 2311 if (err) 2312 return err; 2313 ptq->switch_state = INTEL_PT_SS_TRACING; 2314 break; 2315 default: 2316 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; 2317 return 1; 2318 } 2319 } else if (!state->to_ip) { 2320 ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 2321 } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { 2322 ptq->switch_state = INTEL_PT_SS_UNKNOWN; 2323 } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && 2324 state->to_ip == pt->ptss_ip && 2325 (ptq->flags & PERF_IP_FLAG_CALL)) { 2326 ptq->switch_state = INTEL_PT_SS_TRACING; 2327 } 2328 2329 return 0; 2330 } 2331 2332 static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) 2333 { 2334 struct machine *machine = pt->machine; 2335 struct map *map; 2336 struct symbol *sym, *start; 2337 u64 ip, switch_ip = 0; 2338 const char *ptss; 2339 2340 if (ptss_ip) 2341 *ptss_ip = 0; 2342 2343 map = machine__kernel_map(machine); 2344 if (!map) 2345 return 0; 2346 2347 if (map__load(map)) 2348 return 0; 2349 2350 start = dso__first_symbol(map->dso); 2351 2352 for (sym = start; sym; sym = dso__next_symbol(sym)) { 2353 if (sym->binding == STB_GLOBAL && 2354 !strcmp(sym->name, "__switch_to")) { 2355 ip = map->unmap_ip(map, sym->start); 2356 if (ip >= map->start && ip < map->end) { 2357 switch_ip = ip; 2358 break; 2359 } 2360 } 2361 } 2362 2363 if (!switch_ip || !ptss_ip) 2364 return 0; 2365 2366 if (pt->have_sched_switch == 1) 2367 ptss = "perf_trace_sched_switch"; 2368 else 2369 ptss = "__perf_event_task_sched_out"; 2370 2371 for (sym = start; sym; sym = dso__next_symbol(sym)) { 2372 if (!strcmp(sym->name, ptss)) { 2373 ip = map->unmap_ip(map, sym->start); 2374 if (ip >= map->start && ip < map->end) { 2375 *ptss_ip = ip; 2376 break; 2377 } 2378 } 2379 } 2380 2381 return switch_ip; 2382 } 2383 2384 static void intel_pt_enable_sync_switch(struct intel_pt *pt) 2385 { 2386 unsigned int i; 2387 2388 pt->sync_switch = true; 2389 2390 for (i = 0; i < pt->queues.nr_queues; i++) { 2391 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 2392 struct intel_pt_queue *ptq = queue->priv; 2393 2394 if (ptq) 2395 ptq->sync_switch = true; 2396 } 2397 } 2398 2399 /* 2400 * To filter against time ranges, it is only necessary to look at the next start 2401 * or end time. 2402 */ 2403 static bool intel_pt_next_time(struct intel_pt_queue *ptq) 2404 { 2405 struct intel_pt *pt = ptq->pt; 2406 2407 if (ptq->sel_start) { 2408 /* Next time is an end time */ 2409 ptq->sel_start = false; 2410 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end; 2411 return true; 2412 } else if (ptq->sel_idx + 1 < pt->range_cnt) { 2413 /* Next time is a start time */ 2414 ptq->sel_start = true; 2415 ptq->sel_idx += 1; 2416 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start; 2417 return true; 2418 } 2419 2420 /* No next time */ 2421 return false; 2422 } 2423 2424 static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp) 2425 { 2426 int err; 2427 2428 while (1) { 2429 if (ptq->sel_start) { 2430 if (ptq->timestamp >= ptq->sel_timestamp) { 2431 /* After start time, so consider next time */ 2432 intel_pt_next_time(ptq); 2433 if (!ptq->sel_timestamp) { 2434 /* No end time */ 2435 return 0; 2436 } 2437 /* Check against end time */ 2438 continue; 2439 } 2440 /* Before start time, so fast forward */ 2441 ptq->have_sample = false; 2442 if (ptq->sel_timestamp > *ff_timestamp) { 2443 if (ptq->sync_switch) { 2444 intel_pt_next_tid(ptq->pt, ptq); 2445 ptq->switch_state = INTEL_PT_SS_UNKNOWN; 2446 } 2447 *ff_timestamp = ptq->sel_timestamp; 2448 err = intel_pt_fast_forward(ptq->decoder, 2449 ptq->sel_timestamp); 2450 if (err) 2451 return err; 2452 } 2453 return 0; 2454 } else if (ptq->timestamp > ptq->sel_timestamp) { 2455 /* After end time, so consider next time */ 2456 if (!intel_pt_next_time(ptq)) { 2457 /* No next time range, so stop decoding */ 2458 ptq->have_sample = false; 2459 ptq->switch_state = INTEL_PT_SS_NOT_TRACING; 2460 return 1; 2461 } 2462 /* Check against next start time */ 2463 continue; 2464 } else { 2465 /* Before end time */ 2466 return 0; 2467 } 2468 } 2469 } 2470 2471 static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 2472 { 2473 const struct intel_pt_state *state = ptq->state; 2474 struct intel_pt *pt = ptq->pt; 2475 u64 ff_timestamp = 0; 2476 int err; 2477 2478 if (!pt->kernel_start) { 2479 pt->kernel_start = machine__kernel_start(pt->machine); 2480 if (pt->per_cpu_mmaps && 2481 (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && 2482 !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && 2483 !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) { 2484 pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); 2485 if (pt->switch_ip) { 2486 intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", 2487 pt->switch_ip, pt->ptss_ip); 2488 intel_pt_enable_sync_switch(pt); 2489 } 2490 } 2491 } 2492 2493 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 2494 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 2495 while (1) { 2496 err = intel_pt_sample(ptq); 2497 if (err) 2498 return err; 2499 2500 state = intel_pt_decode(ptq->decoder); 2501 if (state->err) { 2502 if (state->err == INTEL_PT_ERR_NODATA) 2503 return 1; 2504 if (ptq->sync_switch && 2505 state->from_ip >= pt->kernel_start) { 2506 ptq->sync_switch = false; 2507 intel_pt_next_tid(pt, ptq); 2508 } 2509 if (pt->synth_opts.errors) { 2510 err = intel_ptq_synth_error(ptq, state); 2511 if (err) 2512 return err; 2513 } 2514 continue; 2515 } 2516 2517 ptq->state = state; 2518 ptq->have_sample = true; 2519 intel_pt_sample_flags(ptq); 2520 2521 /* Use estimated TSC upon return to user space */ 2522 if (pt->est_tsc && 2523 (state->from_ip >= pt->kernel_start || !state->from_ip) && 2524 state->to_ip && state->to_ip < pt->kernel_start) { 2525 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 2526 state->timestamp, state->est_timestamp); 2527 ptq->timestamp = state->est_timestamp; 2528 /* Use estimated TSC in unknown switch state */ 2529 } else if (ptq->sync_switch && 2530 ptq->switch_state == INTEL_PT_SS_UNKNOWN && 2531 intel_pt_is_switch_ip(ptq, state->to_ip) && 2532 ptq->next_tid == -1) { 2533 intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", 2534 state->timestamp, state->est_timestamp); 2535 ptq->timestamp = state->est_timestamp; 2536 } else if (state->timestamp > ptq->timestamp) { 2537 ptq->timestamp = state->timestamp; 2538 } 2539 2540 if (ptq->sel_timestamp) { 2541 err = intel_pt_time_filter(ptq, &ff_timestamp); 2542 if (err) 2543 return err; 2544 } 2545 2546 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { 2547 *timestamp = ptq->timestamp; 2548 return 0; 2549 } 2550 } 2551 return 0; 2552 } 2553 2554 static inline int intel_pt_update_queues(struct intel_pt *pt) 2555 { 2556 if (pt->queues.new_data) { 2557 pt->queues.new_data = false; 2558 return intel_pt_setup_queues(pt); 2559 } 2560 return 0; 2561 } 2562 2563 static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) 2564 { 2565 unsigned int queue_nr; 2566 u64 ts; 2567 int ret; 2568 2569 while (1) { 2570 struct auxtrace_queue *queue; 2571 struct intel_pt_queue *ptq; 2572 2573 if (!pt->heap.heap_cnt) 2574 return 0; 2575 2576 if (pt->heap.heap_array[0].ordinal >= timestamp) 2577 return 0; 2578 2579 queue_nr = pt->heap.heap_array[0].queue_nr; 2580 queue = &pt->queues.queue_array[queue_nr]; 2581 ptq = queue->priv; 2582 2583 intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", 2584 queue_nr, pt->heap.heap_array[0].ordinal, 2585 timestamp); 2586 2587 auxtrace_heap__pop(&pt->heap); 2588 2589 if (pt->heap.heap_cnt) { 2590 ts = pt->heap.heap_array[0].ordinal + 1; 2591 if (ts > timestamp) 2592 ts = timestamp; 2593 } else { 2594 ts = timestamp; 2595 } 2596 2597 intel_pt_set_pid_tid_cpu(pt, queue); 2598 2599 ret = intel_pt_run_decoder(ptq, &ts); 2600 2601 if (ret < 0) { 2602 auxtrace_heap__add(&pt->heap, queue_nr, ts); 2603 return ret; 2604 } 2605 2606 if (!ret) { 2607 ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); 2608 if (ret < 0) 2609 return ret; 2610 } else { 2611 ptq->on_heap = false; 2612 } 2613 } 2614 2615 return 0; 2616 } 2617 2618 static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, 2619 u64 time_) 2620 { 2621 struct auxtrace_queues *queues = &pt->queues; 2622 unsigned int i; 2623 u64 ts = 0; 2624 2625 for (i = 0; i < queues->nr_queues; i++) { 2626 struct auxtrace_queue *queue = &pt->queues.queue_array[i]; 2627 struct intel_pt_queue *ptq = queue->priv; 2628 2629 if (ptq && (tid == -1 || ptq->tid == tid)) { 2630 ptq->time = time_; 2631 intel_pt_set_pid_tid_cpu(pt, queue); 2632 intel_pt_run_decoder(ptq, &ts); 2633 } 2634 } 2635 return 0; 2636 } 2637 2638 static void intel_pt_sample_set_pid_tid_cpu(struct intel_pt_queue *ptq, 2639 struct auxtrace_queue *queue, 2640 struct perf_sample *sample) 2641 { 2642 struct machine *m = ptq->pt->machine; 2643 2644 ptq->pid = sample->pid; 2645 ptq->tid = sample->tid; 2646 ptq->cpu = queue->cpu; 2647 2648 intel_pt_log("queue %u cpu %d pid %d tid %d\n", 2649 ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); 2650 2651 thread__zput(ptq->thread); 2652 2653 if (ptq->tid == -1) 2654 return; 2655 2656 if (ptq->pid == -1) { 2657 ptq->thread = machine__find_thread(m, -1, ptq->tid); 2658 if (ptq->thread) 2659 ptq->pid = ptq->thread->pid_; 2660 return; 2661 } 2662 2663 ptq->thread = machine__findnew_thread(m, ptq->pid, ptq->tid); 2664 } 2665 2666 static int intel_pt_process_timeless_sample(struct intel_pt *pt, 2667 struct perf_sample *sample) 2668 { 2669 struct auxtrace_queue *queue; 2670 struct intel_pt_queue *ptq; 2671 u64 ts = 0; 2672 2673 queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); 2674 if (!queue) 2675 return -EINVAL; 2676 2677 ptq = queue->priv; 2678 if (!ptq) 2679 return 0; 2680 2681 ptq->stop = false; 2682 ptq->time = sample->time; 2683 intel_pt_sample_set_pid_tid_cpu(ptq, queue, sample); 2684 intel_pt_run_decoder(ptq, &ts); 2685 return 0; 2686 } 2687 2688 static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) 2689 { 2690 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, 2691 sample->pid, sample->tid, 0, sample->time); 2692 } 2693 2694 static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) 2695 { 2696 unsigned i, j; 2697 2698 if (cpu < 0 || !pt->queues.nr_queues) 2699 return NULL; 2700 2701 if ((unsigned)cpu >= pt->queues.nr_queues) 2702 i = pt->queues.nr_queues - 1; 2703 else 2704 i = cpu; 2705 2706 if (pt->queues.queue_array[i].cpu == cpu) 2707 return pt->queues.queue_array[i].priv; 2708 2709 for (j = 0; i > 0; j++) { 2710 if (pt->queues.queue_array[--i].cpu == cpu) 2711 return pt->queues.queue_array[i].priv; 2712 } 2713 2714 for (; j < pt->queues.nr_queues; j++) { 2715 if (pt->queues.queue_array[j].cpu == cpu) 2716 return pt->queues.queue_array[j].priv; 2717 } 2718 2719 return NULL; 2720 } 2721 2722 static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid, 2723 u64 timestamp) 2724 { 2725 struct intel_pt_queue *ptq; 2726 int err; 2727 2728 if (!pt->sync_switch) 2729 return 1; 2730 2731 ptq = intel_pt_cpu_to_ptq(pt, cpu); 2732 if (!ptq || !ptq->sync_switch) 2733 return 1; 2734 2735 switch (ptq->switch_state) { 2736 case INTEL_PT_SS_NOT_TRACING: 2737 break; 2738 case INTEL_PT_SS_UNKNOWN: 2739 case INTEL_PT_SS_TRACING: 2740 ptq->next_tid = tid; 2741 ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; 2742 return 0; 2743 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 2744 if (!ptq->on_heap) { 2745 ptq->timestamp = perf_time_to_tsc(timestamp, 2746 &pt->tc); 2747 err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, 2748 ptq->timestamp); 2749 if (err) 2750 return err; 2751 ptq->on_heap = true; 2752 } 2753 ptq->switch_state = INTEL_PT_SS_TRACING; 2754 break; 2755 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 2756 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); 2757 break; 2758 default: 2759 break; 2760 } 2761 2762 ptq->next_tid = -1; 2763 2764 return 1; 2765 } 2766 2767 static int intel_pt_process_switch(struct intel_pt *pt, 2768 struct perf_sample *sample) 2769 { 2770 pid_t tid; 2771 int cpu, ret; 2772 struct evsel *evsel = evlist__id2evsel(pt->session->evlist, sample->id); 2773 2774 if (evsel != pt->switch_evsel) 2775 return 0; 2776 2777 tid = evsel__intval(evsel, sample, "next_pid"); 2778 cpu = sample->cpu; 2779 2780 intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 2781 cpu, tid, sample->time, perf_time_to_tsc(sample->time, 2782 &pt->tc)); 2783 2784 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 2785 if (ret <= 0) 2786 return ret; 2787 2788 return machine__set_current_tid(pt->machine, cpu, -1, tid); 2789 } 2790 2791 static int intel_pt_context_switch_in(struct intel_pt *pt, 2792 struct perf_sample *sample) 2793 { 2794 pid_t pid = sample->pid; 2795 pid_t tid = sample->tid; 2796 int cpu = sample->cpu; 2797 2798 if (pt->sync_switch) { 2799 struct intel_pt_queue *ptq; 2800 2801 ptq = intel_pt_cpu_to_ptq(pt, cpu); 2802 if (ptq && ptq->sync_switch) { 2803 ptq->next_tid = -1; 2804 switch (ptq->switch_state) { 2805 case INTEL_PT_SS_NOT_TRACING: 2806 case INTEL_PT_SS_UNKNOWN: 2807 case INTEL_PT_SS_TRACING: 2808 break; 2809 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: 2810 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 2811 ptq->switch_state = INTEL_PT_SS_TRACING; 2812 break; 2813 default: 2814 break; 2815 } 2816 } 2817 } 2818 2819 /* 2820 * If the current tid has not been updated yet, ensure it is now that 2821 * a "switch in" event has occurred. 2822 */ 2823 if (machine__get_current_tid(pt->machine, cpu) == tid) 2824 return 0; 2825 2826 return machine__set_current_tid(pt->machine, cpu, pid, tid); 2827 } 2828 2829 static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, 2830 struct perf_sample *sample) 2831 { 2832 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; 2833 pid_t pid, tid; 2834 int cpu, ret; 2835 2836 cpu = sample->cpu; 2837 2838 if (pt->have_sched_switch == 3) { 2839 if (!out) 2840 return intel_pt_context_switch_in(pt, sample); 2841 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { 2842 pr_err("Expecting CPU-wide context switch event\n"); 2843 return -EINVAL; 2844 } 2845 pid = event->context_switch.next_prev_pid; 2846 tid = event->context_switch.next_prev_tid; 2847 } else { 2848 if (out) 2849 return 0; 2850 pid = sample->pid; 2851 tid = sample->tid; 2852 } 2853 2854 if (tid == -1) 2855 intel_pt_log("context_switch event has no tid\n"); 2856 2857 ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); 2858 if (ret <= 0) 2859 return ret; 2860 2861 return machine__set_current_tid(pt->machine, cpu, pid, tid); 2862 } 2863 2864 static int intel_pt_process_itrace_start(struct intel_pt *pt, 2865 union perf_event *event, 2866 struct perf_sample *sample) 2867 { 2868 if (!pt->per_cpu_mmaps) 2869 return 0; 2870 2871 intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", 2872 sample->cpu, event->itrace_start.pid, 2873 event->itrace_start.tid, sample->time, 2874 perf_time_to_tsc(sample->time, &pt->tc)); 2875 2876 return machine__set_current_tid(pt->machine, sample->cpu, 2877 event->itrace_start.pid, 2878 event->itrace_start.tid); 2879 } 2880 2881 static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, 2882 struct addr_location *al) 2883 { 2884 if (!al->map || addr < al->map->start || addr >= al->map->end) { 2885 if (!thread__find_map(thread, cpumode, addr, al)) 2886 return -1; 2887 } 2888 2889 return 0; 2890 } 2891 2892 /* Invalidate all instruction cache entries that overlap the text poke */ 2893 static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event) 2894 { 2895 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 2896 u64 addr = event->text_poke.addr + event->text_poke.new_len - 1; 2897 /* Assume text poke begins in a basic block no more than 4096 bytes */ 2898 int cnt = 4096 + event->text_poke.new_len; 2899 struct thread *thread = pt->unknown_thread; 2900 struct addr_location al = { .map = NULL }; 2901 struct machine *machine = pt->machine; 2902 struct intel_pt_cache_entry *e; 2903 u64 offset; 2904 2905 if (!event->text_poke.new_len) 2906 return 0; 2907 2908 for (; cnt; cnt--, addr--) { 2909 if (intel_pt_find_map(thread, cpumode, addr, &al)) { 2910 if (addr < event->text_poke.addr) 2911 return 0; 2912 continue; 2913 } 2914 2915 if (!al.map->dso || !al.map->dso->auxtrace_cache) 2916 continue; 2917 2918 offset = al.map->map_ip(al.map, addr); 2919 2920 e = intel_pt_cache_lookup(al.map->dso, machine, offset); 2921 if (!e) 2922 continue; 2923 2924 if (addr + e->byte_cnt + e->length <= event->text_poke.addr) { 2925 /* 2926 * No overlap. Working backwards there cannot be another 2927 * basic block that overlaps the text poke if there is a 2928 * branch instruction before the text poke address. 2929 */ 2930 if (e->branch != INTEL_PT_BR_NO_BRANCH) 2931 return 0; 2932 } else { 2933 intel_pt_cache_invalidate(al.map->dso, machine, offset); 2934 intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n", 2935 al.map->dso->long_name, addr); 2936 } 2937 } 2938 2939 return 0; 2940 } 2941 2942 static int intel_pt_process_event(struct perf_session *session, 2943 union perf_event *event, 2944 struct perf_sample *sample, 2945 struct perf_tool *tool) 2946 { 2947 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 2948 auxtrace); 2949 u64 timestamp; 2950 int err = 0; 2951 2952 if (dump_trace) 2953 return 0; 2954 2955 if (!tool->ordered_events) { 2956 pr_err("Intel Processor Trace requires ordered events\n"); 2957 return -EINVAL; 2958 } 2959 2960 if (sample->time && sample->time != (u64)-1) 2961 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 2962 else 2963 timestamp = 0; 2964 2965 if (timestamp || pt->timeless_decoding) { 2966 err = intel_pt_update_queues(pt); 2967 if (err) 2968 return err; 2969 } 2970 2971 if (pt->timeless_decoding) { 2972 if (pt->sampling_mode) { 2973 if (sample->aux_sample.size) 2974 err = intel_pt_process_timeless_sample(pt, 2975 sample); 2976 } else if (event->header.type == PERF_RECORD_EXIT) { 2977 err = intel_pt_process_timeless_queues(pt, 2978 event->fork.tid, 2979 sample->time); 2980 } 2981 } else if (timestamp) { 2982 if (!pt->first_timestamp) 2983 intel_pt_first_timestamp(pt, timestamp); 2984 err = intel_pt_process_queues(pt, timestamp); 2985 } 2986 if (err) 2987 return err; 2988 2989 if (event->header.type == PERF_RECORD_SAMPLE) { 2990 if (pt->synth_opts.add_callchain && !sample->callchain) 2991 intel_pt_add_callchain(pt, sample); 2992 if (pt->synth_opts.add_last_branch && !sample->branch_stack) 2993 intel_pt_add_br_stack(pt, sample); 2994 } 2995 2996 if (event->header.type == PERF_RECORD_AUX && 2997 (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && 2998 pt->synth_opts.errors) { 2999 err = intel_pt_lost(pt, sample); 3000 if (err) 3001 return err; 3002 } 3003 3004 if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) 3005 err = intel_pt_process_switch(pt, sample); 3006 else if (event->header.type == PERF_RECORD_ITRACE_START) 3007 err = intel_pt_process_itrace_start(pt, event, sample); 3008 else if (event->header.type == PERF_RECORD_SWITCH || 3009 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) 3010 err = intel_pt_context_switch(pt, event, sample); 3011 3012 if (!err && event->header.type == PERF_RECORD_TEXT_POKE) 3013 err = intel_pt_text_poke(pt, event); 3014 3015 if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) { 3016 intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", 3017 event->header.type, sample->cpu, sample->time, timestamp); 3018 intel_pt_log_event(event); 3019 } 3020 3021 return err; 3022 } 3023 3024 static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) 3025 { 3026 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3027 auxtrace); 3028 int ret; 3029 3030 if (dump_trace) 3031 return 0; 3032 3033 if (!tool->ordered_events) 3034 return -EINVAL; 3035 3036 ret = intel_pt_update_queues(pt); 3037 if (ret < 0) 3038 return ret; 3039 3040 if (pt->timeless_decoding) 3041 return intel_pt_process_timeless_queues(pt, -1, 3042 MAX_TIMESTAMP - 1); 3043 3044 return intel_pt_process_queues(pt, MAX_TIMESTAMP); 3045 } 3046 3047 static void intel_pt_free_events(struct perf_session *session) 3048 { 3049 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3050 auxtrace); 3051 struct auxtrace_queues *queues = &pt->queues; 3052 unsigned int i; 3053 3054 for (i = 0; i < queues->nr_queues; i++) { 3055 intel_pt_free_queue(queues->queue_array[i].priv); 3056 queues->queue_array[i].priv = NULL; 3057 } 3058 intel_pt_log_disable(); 3059 auxtrace_queues__free(queues); 3060 } 3061 3062 static void intel_pt_free(struct perf_session *session) 3063 { 3064 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3065 auxtrace); 3066 3067 auxtrace_heap__free(&pt->heap); 3068 intel_pt_free_events(session); 3069 session->auxtrace = NULL; 3070 intel_pt_free_vmcs_info(pt); 3071 thread__put(pt->unknown_thread); 3072 addr_filters__exit(&pt->filts); 3073 zfree(&pt->chain); 3074 zfree(&pt->filter); 3075 zfree(&pt->time_ranges); 3076 free(pt); 3077 } 3078 3079 static bool intel_pt_evsel_is_auxtrace(struct perf_session *session, 3080 struct evsel *evsel) 3081 { 3082 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3083 auxtrace); 3084 3085 return evsel->core.attr.type == pt->pmu_type; 3086 } 3087 3088 static int intel_pt_process_auxtrace_event(struct perf_session *session, 3089 union perf_event *event, 3090 struct perf_tool *tool __maybe_unused) 3091 { 3092 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3093 auxtrace); 3094 3095 if (!pt->data_queued) { 3096 struct auxtrace_buffer *buffer; 3097 off_t data_offset; 3098 int fd = perf_data__fd(session->data); 3099 int err; 3100 3101 if (perf_data__is_pipe(session->data)) { 3102 data_offset = 0; 3103 } else { 3104 data_offset = lseek(fd, 0, SEEK_CUR); 3105 if (data_offset == -1) 3106 return -errno; 3107 } 3108 3109 err = auxtrace_queues__add_event(&pt->queues, session, event, 3110 data_offset, &buffer); 3111 if (err) 3112 return err; 3113 3114 /* Dump here now we have copied a piped trace out of the pipe */ 3115 if (dump_trace) { 3116 if (auxtrace_buffer__get_data(buffer, fd)) { 3117 intel_pt_dump_event(pt, buffer->data, 3118 buffer->size); 3119 auxtrace_buffer__put_data(buffer); 3120 } 3121 } 3122 } 3123 3124 return 0; 3125 } 3126 3127 static int intel_pt_queue_data(struct perf_session *session, 3128 struct perf_sample *sample, 3129 union perf_event *event, u64 data_offset) 3130 { 3131 struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, 3132 auxtrace); 3133 u64 timestamp; 3134 3135 if (event) { 3136 return auxtrace_queues__add_event(&pt->queues, session, event, 3137 data_offset, NULL); 3138 } 3139 3140 if (sample->time && sample->time != (u64)-1) 3141 timestamp = perf_time_to_tsc(sample->time, &pt->tc); 3142 else 3143 timestamp = 0; 3144 3145 return auxtrace_queues__add_sample(&pt->queues, session, sample, 3146 data_offset, timestamp); 3147 } 3148 3149 struct intel_pt_synth { 3150 struct perf_tool dummy_tool; 3151 struct perf_session *session; 3152 }; 3153 3154 static int intel_pt_event_synth(struct perf_tool *tool, 3155 union perf_event *event, 3156 struct perf_sample *sample __maybe_unused, 3157 struct machine *machine __maybe_unused) 3158 { 3159 struct intel_pt_synth *intel_pt_synth = 3160 container_of(tool, struct intel_pt_synth, dummy_tool); 3161 3162 return perf_session__deliver_synth_event(intel_pt_synth->session, event, 3163 NULL); 3164 } 3165 3166 static int intel_pt_synth_event(struct perf_session *session, const char *name, 3167 struct perf_event_attr *attr, u64 id) 3168 { 3169 struct intel_pt_synth intel_pt_synth; 3170 int err; 3171 3172 pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n", 3173 name, id, (u64)attr->sample_type); 3174 3175 memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); 3176 intel_pt_synth.session = session; 3177 3178 err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, 3179 &id, intel_pt_event_synth); 3180 if (err) 3181 pr_err("%s: failed to synthesize '%s' event type\n", 3182 __func__, name); 3183 3184 return err; 3185 } 3186 3187 static void intel_pt_set_event_name(struct evlist *evlist, u64 id, 3188 const char *name) 3189 { 3190 struct evsel *evsel; 3191 3192 evlist__for_each_entry(evlist, evsel) { 3193 if (evsel->core.id && evsel->core.id[0] == id) { 3194 if (evsel->name) 3195 zfree(&evsel->name); 3196 evsel->name = strdup(name); 3197 break; 3198 } 3199 } 3200 } 3201 3202 static struct evsel *intel_pt_evsel(struct intel_pt *pt, 3203 struct evlist *evlist) 3204 { 3205 struct evsel *evsel; 3206 3207 evlist__for_each_entry(evlist, evsel) { 3208 if (evsel->core.attr.type == pt->pmu_type && evsel->core.ids) 3209 return evsel; 3210 } 3211 3212 return NULL; 3213 } 3214 3215 static int intel_pt_synth_events(struct intel_pt *pt, 3216 struct perf_session *session) 3217 { 3218 struct evlist *evlist = session->evlist; 3219 struct evsel *evsel = intel_pt_evsel(pt, evlist); 3220 struct perf_event_attr attr; 3221 u64 id; 3222 int err; 3223 3224 if (!evsel) { 3225 pr_debug("There are no selected events with Intel Processor Trace data\n"); 3226 return 0; 3227 } 3228 3229 memset(&attr, 0, sizeof(struct perf_event_attr)); 3230 attr.size = sizeof(struct perf_event_attr); 3231 attr.type = PERF_TYPE_HARDWARE; 3232 attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK; 3233 attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | 3234 PERF_SAMPLE_PERIOD; 3235 if (pt->timeless_decoding) 3236 attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; 3237 else 3238 attr.sample_type |= PERF_SAMPLE_TIME; 3239 if (!pt->per_cpu_mmaps) 3240 attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; 3241 attr.exclude_user = evsel->core.attr.exclude_user; 3242 attr.exclude_kernel = evsel->core.attr.exclude_kernel; 3243 attr.exclude_hv = evsel->core.attr.exclude_hv; 3244 attr.exclude_host = evsel->core.attr.exclude_host; 3245 attr.exclude_guest = evsel->core.attr.exclude_guest; 3246 attr.sample_id_all = evsel->core.attr.sample_id_all; 3247 attr.read_format = evsel->core.attr.read_format; 3248 3249 id = evsel->core.id[0] + 1000000000; 3250 if (!id) 3251 id = 1; 3252 3253 if (pt->synth_opts.branches) { 3254 attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; 3255 attr.sample_period = 1; 3256 attr.sample_type |= PERF_SAMPLE_ADDR; 3257 err = intel_pt_synth_event(session, "branches", &attr, id); 3258 if (err) 3259 return err; 3260 pt->sample_branches = true; 3261 pt->branches_sample_type = attr.sample_type; 3262 pt->branches_id = id; 3263 id += 1; 3264 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR; 3265 } 3266 3267 if (pt->synth_opts.callchain) 3268 attr.sample_type |= PERF_SAMPLE_CALLCHAIN; 3269 if (pt->synth_opts.last_branch) { 3270 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; 3271 /* 3272 * We don't use the hardware index, but the sample generation 3273 * code uses the new format branch_stack with this field, 3274 * so the event attributes must indicate that it's present. 3275 */ 3276 attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; 3277 } 3278 3279 if (pt->synth_opts.instructions) { 3280 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 3281 if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) 3282 attr.sample_period = 3283 intel_pt_ns_to_ticks(pt, pt->synth_opts.period); 3284 else 3285 attr.sample_period = pt->synth_opts.period; 3286 err = intel_pt_synth_event(session, "instructions", &attr, id); 3287 if (err) 3288 return err; 3289 pt->sample_instructions = true; 3290 pt->instructions_sample_type = attr.sample_type; 3291 pt->instructions_id = id; 3292 id += 1; 3293 } 3294 3295 attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD; 3296 attr.sample_period = 1; 3297 3298 if (pt->synth_opts.transactions) { 3299 attr.config = PERF_COUNT_HW_INSTRUCTIONS; 3300 err = intel_pt_synth_event(session, "transactions", &attr, id); 3301 if (err) 3302 return err; 3303 pt->sample_transactions = true; 3304 pt->transactions_sample_type = attr.sample_type; 3305 pt->transactions_id = id; 3306 intel_pt_set_event_name(evlist, id, "transactions"); 3307 id += 1; 3308 } 3309 3310 attr.type = PERF_TYPE_SYNTH; 3311 attr.sample_type |= PERF_SAMPLE_RAW; 3312 3313 if (pt->synth_opts.ptwrites) { 3314 attr.config = PERF_SYNTH_INTEL_PTWRITE; 3315 err = intel_pt_synth_event(session, "ptwrite", &attr, id); 3316 if (err) 3317 return err; 3318 pt->sample_ptwrites = true; 3319 pt->ptwrites_sample_type = attr.sample_type; 3320 pt->ptwrites_id = id; 3321 intel_pt_set_event_name(evlist, id, "ptwrite"); 3322 id += 1; 3323 } 3324 3325 if (pt->synth_opts.pwr_events) { 3326 pt->sample_pwr_events = true; 3327 pt->pwr_events_sample_type = attr.sample_type; 3328 3329 attr.config = PERF_SYNTH_INTEL_CBR; 3330 err = intel_pt_synth_event(session, "cbr", &attr, id); 3331 if (err) 3332 return err; 3333 pt->cbr_id = id; 3334 intel_pt_set_event_name(evlist, id, "cbr"); 3335 id += 1; 3336 3337 attr.config = PERF_SYNTH_INTEL_PSB; 3338 err = intel_pt_synth_event(session, "psb", &attr, id); 3339 if (err) 3340 return err; 3341 pt->psb_id = id; 3342 intel_pt_set_event_name(evlist, id, "psb"); 3343 id += 1; 3344 } 3345 3346 if (pt->synth_opts.pwr_events && (evsel->core.attr.config & 0x10)) { 3347 attr.config = PERF_SYNTH_INTEL_MWAIT; 3348 err = intel_pt_synth_event(session, "mwait", &attr, id); 3349 if (err) 3350 return err; 3351 pt->mwait_id = id; 3352 intel_pt_set_event_name(evlist, id, "mwait"); 3353 id += 1; 3354 3355 attr.config = PERF_SYNTH_INTEL_PWRE; 3356 err = intel_pt_synth_event(session, "pwre", &attr, id); 3357 if (err) 3358 return err; 3359 pt->pwre_id = id; 3360 intel_pt_set_event_name(evlist, id, "pwre"); 3361 id += 1; 3362 3363 attr.config = PERF_SYNTH_INTEL_EXSTOP; 3364 err = intel_pt_synth_event(session, "exstop", &attr, id); 3365 if (err) 3366 return err; 3367 pt->exstop_id = id; 3368 intel_pt_set_event_name(evlist, id, "exstop"); 3369 id += 1; 3370 3371 attr.config = PERF_SYNTH_INTEL_PWRX; 3372 err = intel_pt_synth_event(session, "pwrx", &attr, id); 3373 if (err) 3374 return err; 3375 pt->pwrx_id = id; 3376 intel_pt_set_event_name(evlist, id, "pwrx"); 3377 id += 1; 3378 } 3379 3380 return 0; 3381 } 3382 3383 static void intel_pt_setup_pebs_events(struct intel_pt *pt) 3384 { 3385 struct evsel *evsel; 3386 3387 if (!pt->synth_opts.other_events) 3388 return; 3389 3390 evlist__for_each_entry(pt->session->evlist, evsel) { 3391 if (evsel->core.attr.aux_output && evsel->core.id) { 3392 pt->sample_pebs = true; 3393 pt->pebs_evsel = evsel; 3394 return; 3395 } 3396 } 3397 } 3398 3399 static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist) 3400 { 3401 struct evsel *evsel; 3402 3403 evlist__for_each_entry_reverse(evlist, evsel) { 3404 const char *name = evsel__name(evsel); 3405 3406 if (!strcmp(name, "sched:sched_switch")) 3407 return evsel; 3408 } 3409 3410 return NULL; 3411 } 3412 3413 static bool intel_pt_find_switch(struct evlist *evlist) 3414 { 3415 struct evsel *evsel; 3416 3417 evlist__for_each_entry(evlist, evsel) { 3418 if (evsel->core.attr.context_switch) 3419 return true; 3420 } 3421 3422 return false; 3423 } 3424 3425 static int intel_pt_perf_config(const char *var, const char *value, void *data) 3426 { 3427 struct intel_pt *pt = data; 3428 3429 if (!strcmp(var, "intel-pt.mispred-all")) 3430 pt->mispred_all = perf_config_bool(var, value); 3431 3432 return 0; 3433 } 3434 3435 /* Find least TSC which converts to ns or later */ 3436 static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt) 3437 { 3438 u64 tsc, tm; 3439 3440 tsc = perf_time_to_tsc(ns, &pt->tc); 3441 3442 while (1) { 3443 tm = tsc_to_perf_time(tsc, &pt->tc); 3444 if (tm < ns) 3445 break; 3446 tsc -= 1; 3447 } 3448 3449 while (tm < ns) 3450 tm = tsc_to_perf_time(++tsc, &pt->tc); 3451 3452 return tsc; 3453 } 3454 3455 /* Find greatest TSC which converts to ns or earlier */ 3456 static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt) 3457 { 3458 u64 tsc, tm; 3459 3460 tsc = perf_time_to_tsc(ns, &pt->tc); 3461 3462 while (1) { 3463 tm = tsc_to_perf_time(tsc, &pt->tc); 3464 if (tm > ns) 3465 break; 3466 tsc += 1; 3467 } 3468 3469 while (tm > ns) 3470 tm = tsc_to_perf_time(--tsc, &pt->tc); 3471 3472 return tsc; 3473 } 3474 3475 static int intel_pt_setup_time_ranges(struct intel_pt *pt, 3476 struct itrace_synth_opts *opts) 3477 { 3478 struct perf_time_interval *p = opts->ptime_range; 3479 int n = opts->range_num; 3480 int i; 3481 3482 if (!n || !p || pt->timeless_decoding) 3483 return 0; 3484 3485 pt->time_ranges = calloc(n, sizeof(struct range)); 3486 if (!pt->time_ranges) 3487 return -ENOMEM; 3488 3489 pt->range_cnt = n; 3490 3491 intel_pt_log("%s: %u range(s)\n", __func__, n); 3492 3493 for (i = 0; i < n; i++) { 3494 struct range *r = &pt->time_ranges[i]; 3495 u64 ts = p[i].start; 3496 u64 te = p[i].end; 3497 3498 /* 3499 * Take care to ensure the TSC range matches the perf-time range 3500 * when converted back to perf-time. 3501 */ 3502 r->start = ts ? intel_pt_tsc_start(ts, pt) : 0; 3503 r->end = te ? intel_pt_tsc_end(te, pt) : 0; 3504 3505 intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n", 3506 i, ts, te); 3507 intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n", 3508 i, r->start, r->end); 3509 } 3510 3511 return 0; 3512 } 3513 3514 static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) 3515 { 3516 struct intel_pt_vmcs_info *vmcs_info; 3517 u64 tsc_offset, vmcs; 3518 char *p = *args; 3519 3520 errno = 0; 3521 3522 p = skip_spaces(p); 3523 if (!*p) 3524 return 1; 3525 3526 tsc_offset = strtoull(p, &p, 0); 3527 if (errno) 3528 return -errno; 3529 p = skip_spaces(p); 3530 if (*p != ':') { 3531 pt->dflt_tsc_offset = tsc_offset; 3532 *args = p; 3533 return 0; 3534 } 3535 while (1) { 3536 vmcs = strtoull(p, &p, 0); 3537 if (errno) 3538 return -errno; 3539 if (!vmcs) 3540 return -EINVAL; 3541 vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset); 3542 if (!vmcs_info) 3543 return -ENOMEM; 3544 p = skip_spaces(p); 3545 if (*p != ',') 3546 break; 3547 p += 1; 3548 } 3549 *args = p; 3550 return 0; 3551 } 3552 3553 static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) 3554 { 3555 char *args = pt->synth_opts.vm_tm_corr_args; 3556 int ret; 3557 3558 if (!args) 3559 return 0; 3560 3561 do { 3562 ret = intel_pt_parse_vm_tm_corr_arg(pt, &args); 3563 } while (!ret); 3564 3565 if (ret < 0) { 3566 pr_err("Failed to parse VM Time Correlation options\n"); 3567 return ret; 3568 } 3569 3570 return 0; 3571 } 3572 3573 static const char * const intel_pt_info_fmts[] = { 3574 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 3575 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 3576 [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", 3577 [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", 3578 [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", 3579 [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", 3580 [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", 3581 [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", 3582 [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", 3583 [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", 3584 [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", 3585 [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", 3586 [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", 3587 [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", 3588 [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n", 3589 [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n", 3590 }; 3591 3592 static void intel_pt_print_info(__u64 *arr, int start, int finish) 3593 { 3594 int i; 3595 3596 if (!dump_trace) 3597 return; 3598 3599 for (i = start; i <= finish; i++) 3600 fprintf(stdout, intel_pt_info_fmts[i], arr[i]); 3601 } 3602 3603 static void intel_pt_print_info_str(const char *name, const char *str) 3604 { 3605 if (!dump_trace) 3606 return; 3607 3608 fprintf(stdout, " %-20s%s\n", name, str ? str : ""); 3609 } 3610 3611 static bool intel_pt_has(struct perf_record_auxtrace_info *auxtrace_info, int pos) 3612 { 3613 return auxtrace_info->header.size >= 3614 sizeof(struct perf_record_auxtrace_info) + (sizeof(u64) * (pos + 1)); 3615 } 3616 3617 int intel_pt_process_auxtrace_info(union perf_event *event, 3618 struct perf_session *session) 3619 { 3620 struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; 3621 size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; 3622 struct intel_pt *pt; 3623 void *info_end; 3624 __u64 *info; 3625 int err; 3626 3627 if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + 3628 min_sz) 3629 return -EINVAL; 3630 3631 pt = zalloc(sizeof(struct intel_pt)); 3632 if (!pt) 3633 return -ENOMEM; 3634 3635 pt->vmcs_info = RB_ROOT; 3636 3637 addr_filters__init(&pt->filts); 3638 3639 err = perf_config(intel_pt_perf_config, pt); 3640 if (err) 3641 goto err_free; 3642 3643 err = auxtrace_queues__init(&pt->queues); 3644 if (err) 3645 goto err_free; 3646 3647 intel_pt_log_set_name(INTEL_PT_PMU_NAME); 3648 3649 if (session->itrace_synth_opts->set) { 3650 pt->synth_opts = *session->itrace_synth_opts; 3651 } else { 3652 struct itrace_synth_opts *opts = session->itrace_synth_opts; 3653 3654 itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample); 3655 if (!opts->default_no_sample && !opts->inject) { 3656 pt->synth_opts.branches = false; 3657 pt->synth_opts.callchain = true; 3658 pt->synth_opts.add_callchain = true; 3659 } 3660 pt->synth_opts.thread_stack = opts->thread_stack; 3661 } 3662 3663 pt->session = session; 3664 pt->machine = &session->machines.host; /* No kvm support */ 3665 pt->auxtrace_type = auxtrace_info->type; 3666 pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; 3667 pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; 3668 pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; 3669 pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; 3670 pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; 3671 pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; 3672 pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; 3673 pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; 3674 pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; 3675 pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; 3676 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, 3677 INTEL_PT_PER_CPU_MMAPS); 3678 3679 if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) { 3680 pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; 3681 pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; 3682 pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; 3683 pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; 3684 pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; 3685 intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, 3686 INTEL_PT_CYC_BIT); 3687 } 3688 3689 if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) { 3690 pt->max_non_turbo_ratio = 3691 auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO]; 3692 intel_pt_print_info(&auxtrace_info->priv[0], 3693 INTEL_PT_MAX_NONTURBO_RATIO, 3694 INTEL_PT_MAX_NONTURBO_RATIO); 3695 } 3696 3697 info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; 3698 info_end = (void *)info + auxtrace_info->header.size; 3699 3700 if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) { 3701 size_t len; 3702 3703 len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN]; 3704 intel_pt_print_info(&auxtrace_info->priv[0], 3705 INTEL_PT_FILTER_STR_LEN, 3706 INTEL_PT_FILTER_STR_LEN); 3707 if (len) { 3708 const char *filter = (const char *)info; 3709 3710 len = roundup(len + 1, 8); 3711 info += len >> 3; 3712 if ((void *)info > info_end) { 3713 pr_err("%s: bad filter string length\n", __func__); 3714 err = -EINVAL; 3715 goto err_free_queues; 3716 } 3717 pt->filter = memdup(filter, len); 3718 if (!pt->filter) { 3719 err = -ENOMEM; 3720 goto err_free_queues; 3721 } 3722 if (session->header.needs_swap) 3723 mem_bswap_64(pt->filter, len); 3724 if (pt->filter[len - 1]) { 3725 pr_err("%s: filter string not null terminated\n", __func__); 3726 err = -EINVAL; 3727 goto err_free_queues; 3728 } 3729 err = addr_filters__parse_bare_filter(&pt->filts, 3730 filter); 3731 if (err) 3732 goto err_free_queues; 3733 } 3734 intel_pt_print_info_str("Filter string", pt->filter); 3735 } 3736 3737 pt->timeless_decoding = intel_pt_timeless_decoding(pt); 3738 if (pt->timeless_decoding && !pt->tc.time_mult) 3739 pt->tc.time_mult = 1; 3740 pt->have_tsc = intel_pt_have_tsc(pt); 3741 pt->sampling_mode = intel_pt_sampling_mode(pt); 3742 pt->est_tsc = !pt->timeless_decoding; 3743 3744 if (pt->synth_opts.vm_time_correlation) { 3745 if (pt->timeless_decoding) { 3746 pr_err("Intel PT has no time information for VM Time Correlation\n"); 3747 err = -EINVAL; 3748 goto err_free_queues; 3749 } 3750 if (session->itrace_synth_opts->ptime_range) { 3751 pr_err("Time ranges cannot be specified with VM Time Correlation\n"); 3752 err = -EINVAL; 3753 goto err_free_queues; 3754 } 3755 /* Currently TSC Offset is calculated using MTC packets */ 3756 if (!intel_pt_have_mtc(pt)) { 3757 pr_err("MTC packets must have been enabled for VM Time Correlation\n"); 3758 err = -EINVAL; 3759 goto err_free_queues; 3760 } 3761 err = intel_pt_parse_vm_tm_corr_args(pt); 3762 if (err) 3763 goto err_free_queues; 3764 } 3765 3766 pt->unknown_thread = thread__new(999999999, 999999999); 3767 if (!pt->unknown_thread) { 3768 err = -ENOMEM; 3769 goto err_free_queues; 3770 } 3771 3772 /* 3773 * Since this thread will not be kept in any rbtree not in a 3774 * list, initialize its list node so that at thread__put() the 3775 * current thread lifetime assumption is kept and we don't segfault 3776 * at list_del_init(). 3777 */ 3778 INIT_LIST_HEAD(&pt->unknown_thread->node); 3779 3780 err = thread__set_comm(pt->unknown_thread, "unknown", 0); 3781 if (err) 3782 goto err_delete_thread; 3783 if (thread__init_maps(pt->unknown_thread, pt->machine)) { 3784 err = -ENOMEM; 3785 goto err_delete_thread; 3786 } 3787 3788 pt->auxtrace.process_event = intel_pt_process_event; 3789 pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; 3790 pt->auxtrace.queue_data = intel_pt_queue_data; 3791 pt->auxtrace.dump_auxtrace_sample = intel_pt_dump_sample; 3792 pt->auxtrace.flush_events = intel_pt_flush; 3793 pt->auxtrace.free_events = intel_pt_free_events; 3794 pt->auxtrace.free = intel_pt_free; 3795 pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace; 3796 session->auxtrace = &pt->auxtrace; 3797 3798 if (dump_trace) 3799 return 0; 3800 3801 if (pt->have_sched_switch == 1) { 3802 pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); 3803 if (!pt->switch_evsel) { 3804 pr_err("%s: missing sched_switch event\n", __func__); 3805 err = -EINVAL; 3806 goto err_delete_thread; 3807 } 3808 } else if (pt->have_sched_switch == 2 && 3809 !intel_pt_find_switch(session->evlist)) { 3810 pr_err("%s: missing context_switch attribute flag\n", __func__); 3811 err = -EINVAL; 3812 goto err_delete_thread; 3813 } 3814 3815 if (pt->synth_opts.log) 3816 intel_pt_log_enable(); 3817 3818 /* Maximum non-turbo ratio is TSC freq / 100 MHz */ 3819 if (pt->tc.time_mult) { 3820 u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); 3821 3822 if (!pt->max_non_turbo_ratio) 3823 pt->max_non_turbo_ratio = 3824 (tsc_freq + 50000000) / 100000000; 3825 intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); 3826 intel_pt_log("Maximum non-turbo ratio %u\n", 3827 pt->max_non_turbo_ratio); 3828 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; 3829 } 3830 3831 err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts); 3832 if (err) 3833 goto err_delete_thread; 3834 3835 if (pt->synth_opts.calls) 3836 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | 3837 PERF_IP_FLAG_TRACE_END; 3838 if (pt->synth_opts.returns) 3839 pt->branches_filter |= PERF_IP_FLAG_RETURN | 3840 PERF_IP_FLAG_TRACE_BEGIN; 3841 3842 if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) && 3843 !symbol_conf.use_callchain) { 3844 symbol_conf.use_callchain = true; 3845 if (callchain_register_param(&callchain_param) < 0) { 3846 symbol_conf.use_callchain = false; 3847 pt->synth_opts.callchain = false; 3848 pt->synth_opts.add_callchain = false; 3849 } 3850 } 3851 3852 if (pt->synth_opts.add_callchain) { 3853 err = intel_pt_callchain_init(pt); 3854 if (err) 3855 goto err_delete_thread; 3856 } 3857 3858 if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) { 3859 pt->br_stack_sz = pt->synth_opts.last_branch_sz; 3860 pt->br_stack_sz_plus = pt->br_stack_sz; 3861 } 3862 3863 if (pt->synth_opts.add_last_branch) { 3864 err = intel_pt_br_stack_init(pt); 3865 if (err) 3866 goto err_delete_thread; 3867 /* 3868 * Additional branch stack size to cater for tracing from the 3869 * actual sample ip to where the sample time is recorded. 3870 * Measured at about 200 branches, but generously set to 1024. 3871 * If kernel space is not being traced, then add just 1 for the 3872 * branch to kernel space. 3873 */ 3874 if (intel_pt_tracing_kernel(pt)) 3875 pt->br_stack_sz_plus += 1024; 3876 else 3877 pt->br_stack_sz_plus += 1; 3878 } 3879 3880 pt->use_thread_stack = pt->synth_opts.callchain || 3881 pt->synth_opts.add_callchain || 3882 pt->synth_opts.thread_stack || 3883 pt->synth_opts.last_branch || 3884 pt->synth_opts.add_last_branch; 3885 3886 pt->callstack = pt->synth_opts.callchain || 3887 pt->synth_opts.add_callchain || 3888 pt->synth_opts.thread_stack; 3889 3890 err = intel_pt_synth_events(pt, session); 3891 if (err) 3892 goto err_delete_thread; 3893 3894 intel_pt_setup_pebs_events(pt); 3895 3896 if (pt->sampling_mode || list_empty(&session->auxtrace_index)) 3897 err = auxtrace_queue_data(session, true, true); 3898 else 3899 err = auxtrace_queues__process_index(&pt->queues, session); 3900 if (err) 3901 goto err_delete_thread; 3902 3903 if (pt->queues.populated) 3904 pt->data_queued = true; 3905 3906 if (pt->timeless_decoding) 3907 pr_debug2("Intel PT decoding without timestamps\n"); 3908 3909 return 0; 3910 3911 err_delete_thread: 3912 zfree(&pt->chain); 3913 thread__zput(pt->unknown_thread); 3914 err_free_queues: 3915 intel_pt_log_disable(); 3916 auxtrace_queues__free(&pt->queues); 3917 session->auxtrace = NULL; 3918 err_free: 3919 addr_filters__exit(&pt->filts); 3920 zfree(&pt->filter); 3921 zfree(&pt->time_ranges); 3922 free(pt); 3923 return err; 3924 } 3925