1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * intel_pt.c: Intel Processor Trace support 4 * Copyright (c) 2013-2015, Intel Corporation. 5 */ 6 #include "../../../util/intel-pt.h" 7 8 #include <errno.h> 9 #include <stdbool.h> 10 11 #include <linux/bitops.h> 12 #include <linux/err.h> 13 #include <linux/kernel.h> 14 #include <linux/log2.h> 15 #include <linux/types.h> 16 #include <linux/zalloc.h> 17 18 #include <api/fs/fs.h> 19 #include <internal/lib.h> // page_size 20 #include <subcmd/parse-options.h> 21 22 #include "../../../util/auxtrace.h" 23 #include "../../../util/config.h" 24 #include "../../../util/cpumap.h" 25 #include "../../../util/debug.h" 26 #include "../../../util/event.h" 27 #include "../../../util/evlist.h" 28 #include "../../../util/evsel.h" 29 #include "../../../util/evsel_config.h" 30 #include "../../../util/mmap.h" 31 #include "../../../util/parse-events.h" 32 #include "../../../util/perf_api_probe.h" 33 #include "../../../util/pmu.h" 34 #include "../../../util/pmus.h" 35 #include "../../../util/record.h" 36 #include "../../../util/session.h" 37 #include "../../../util/target.h" 38 #include "../../../util/tsc.h" 39 #include "cpuid.h" 40 41 #define KiB(x) ((x) * 1024) 42 #define MiB(x) ((x) * 1024 * 1024) 43 #define KiB_MASK(x) (KiB(x) - 1) 44 #define MiB_MASK(x) (MiB(x) - 1) 45 46 #define INTEL_PT_PSB_PERIOD_NEAR 256 47 48 struct intel_pt_snapshot_ref { 49 void *ref_buf; 50 size_t ref_offset; 51 bool wrapped; 52 }; 53 54 struct intel_pt_recording { 55 struct auxtrace_record itr; 56 struct perf_pmu *intel_pt_pmu; 57 int have_sched_switch; 58 struct evlist *evlist; 59 bool all_switch_events; 60 bool snapshot_mode; 61 bool snapshot_init_done; 62 size_t snapshot_size; 63 size_t snapshot_ref_buf_size; 64 int snapshot_ref_cnt; 65 struct intel_pt_snapshot_ref *snapshot_refs; 66 size_t priv_size; 67 }; 68 69 static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu, 70 const char *str, 71 u64 *config) 72 { 73 struct parse_events_terms terms; 74 struct perf_event_attr attr = { .size = 0, }; 75 int err; 76 77 parse_events_terms__init(&terms); 78 err = parse_events_terms(&terms, str); 79 if (err) 80 goto out_free; 81 82 attr.config = *config; 83 err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*apply_hardcoded=*/false, 84 /*err=*/NULL); 85 if (err) 86 goto out_free; 87 88 *config = attr.config; 89 out_free: 90 parse_events_terms__exit(&terms); 91 return err; 92 } 93 94 static int intel_pt_parse_terms(const struct perf_pmu *pmu, const char *str, u64 *config) 95 { 96 *config = 0; 97 return intel_pt_parse_terms_with_default(pmu, str, config); 98 } 99 100 static u64 intel_pt_masked_bits(u64 mask, u64 bits) 101 { 102 const u64 top_bit = 1ULL << 63; 103 u64 res = 0; 104 int i; 105 106 for (i = 0; i < 64; i++) { 107 if (mask & top_bit) { 108 res <<= 1; 109 if (bits & top_bit) 110 res |= 1; 111 } 112 mask <<= 1; 113 bits <<= 1; 114 } 115 116 return res; 117 } 118 119 static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, 120 struct evlist *evlist, u64 *res) 121 { 122 struct evsel *evsel; 123 u64 mask; 124 125 *res = 0; 126 127 mask = perf_pmu__format_bits(intel_pt_pmu, str); 128 if (!mask) 129 return -EINVAL; 130 131 evlist__for_each_entry(evlist, evsel) { 132 if (evsel->core.attr.type == intel_pt_pmu->type) { 133 *res = intel_pt_masked_bits(mask, evsel->core.attr.config); 134 return 0; 135 } 136 } 137 138 return -EINVAL; 139 } 140 141 static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, 142 struct evlist *evlist) 143 { 144 u64 val; 145 int err, topa_multiple_entries; 146 size_t psb_period; 147 148 if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", 149 "%d", &topa_multiple_entries) != 1) 150 topa_multiple_entries = 0; 151 152 /* 153 * Use caps/topa_multiple_entries to indicate early hardware that had 154 * extra frequent PSBs. 155 */ 156 if (!topa_multiple_entries) { 157 psb_period = 256; 158 goto out; 159 } 160 161 err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); 162 if (err) 163 val = 0; 164 165 psb_period = 1 << (val + 11); 166 out: 167 pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); 168 return psb_period; 169 } 170 171 static int intel_pt_pick_bit(int bits, int target) 172 { 173 int pos, pick = -1; 174 175 for (pos = 0; bits; bits >>= 1, pos++) { 176 if (bits & 1) { 177 if (pos <= target || pick < 0) 178 pick = pos; 179 if (pos >= target) 180 break; 181 } 182 } 183 184 return pick; 185 } 186 187 static u64 intel_pt_default_config(const struct perf_pmu *intel_pt_pmu) 188 { 189 char buf[256]; 190 int mtc, mtc_periods = 0, mtc_period; 191 int psb_cyc, psb_periods, psb_period; 192 int pos = 0; 193 u64 config; 194 char c; 195 int dirfd; 196 197 dirfd = perf_pmu__event_source_devices_fd(); 198 199 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); 200 201 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/mtc", "%d", 202 &mtc) != 1) 203 mtc = 1; 204 205 if (mtc) { 206 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/mtc_periods", "%x", 207 &mtc_periods) != 1) 208 mtc_periods = 0; 209 if (mtc_periods) { 210 mtc_period = intel_pt_pick_bit(mtc_periods, 3); 211 pos += scnprintf(buf + pos, sizeof(buf) - pos, 212 ",mtc,mtc_period=%d", mtc_period); 213 } 214 } 215 216 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/psb_cyc", "%d", 217 &psb_cyc) != 1) 218 psb_cyc = 1; 219 220 if (psb_cyc && mtc_periods) { 221 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "caps/psb_periods", "%x", 222 &psb_periods) != 1) 223 psb_periods = 0; 224 if (psb_periods) { 225 psb_period = intel_pt_pick_bit(psb_periods, 3); 226 pos += scnprintf(buf + pos, sizeof(buf) - pos, 227 ",psb_period=%d", psb_period); 228 } 229 } 230 231 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "format/pt", "%c", &c) == 1 && 232 perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "format/branch", "%c", &c) == 1) 233 pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch"); 234 235 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); 236 237 intel_pt_parse_terms(intel_pt_pmu, buf, &config); 238 239 close(dirfd); 240 return config; 241 } 242 243 static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, 244 struct record_opts *opts, 245 const char *str) 246 { 247 struct intel_pt_recording *ptr = 248 container_of(itr, struct intel_pt_recording, itr); 249 unsigned long long snapshot_size = 0; 250 char *endptr; 251 252 if (str) { 253 snapshot_size = strtoull(str, &endptr, 0); 254 if (*endptr || snapshot_size > SIZE_MAX) 255 return -1; 256 } 257 258 opts->auxtrace_snapshot_mode = true; 259 opts->auxtrace_snapshot_size = snapshot_size; 260 261 ptr->snapshot_size = snapshot_size; 262 263 return 0; 264 } 265 266 void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu, 267 struct perf_event_attr *attr) 268 { 269 static u64 config; 270 static bool initialized; 271 272 if (!initialized) { 273 config = intel_pt_default_config(intel_pt_pmu); 274 initialized = true; 275 } 276 attr->config = config; 277 } 278 279 static const char *intel_pt_find_filter(struct evlist *evlist, 280 struct perf_pmu *intel_pt_pmu) 281 { 282 struct evsel *evsel; 283 284 evlist__for_each_entry(evlist, evsel) { 285 if (evsel->core.attr.type == intel_pt_pmu->type) 286 return evsel->filter; 287 } 288 289 return NULL; 290 } 291 292 static size_t intel_pt_filter_bytes(const char *filter) 293 { 294 size_t len = filter ? strlen(filter) : 0; 295 296 return len ? roundup(len + 1, 8) : 0; 297 } 298 299 static size_t 300 intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist) 301 { 302 struct intel_pt_recording *ptr = 303 container_of(itr, struct intel_pt_recording, itr); 304 const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu); 305 306 ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) + 307 intel_pt_filter_bytes(filter); 308 ptr->priv_size += sizeof(u64); /* Cap Event Trace */ 309 310 return ptr->priv_size; 311 } 312 313 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) 314 { 315 unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; 316 317 cpuid(0x15, 0, &eax, &ebx, &ecx, &edx); 318 *n = ebx; 319 *d = eax; 320 } 321 322 static int intel_pt_info_fill(struct auxtrace_record *itr, 323 struct perf_session *session, 324 struct perf_record_auxtrace_info *auxtrace_info, 325 size_t priv_size) 326 { 327 struct intel_pt_recording *ptr = 328 container_of(itr, struct intel_pt_recording, itr); 329 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 330 struct perf_event_mmap_page *pc; 331 struct perf_tsc_conversion tc = { .time_mult = 0, }; 332 bool cap_user_time_zero = false, per_cpu_mmaps; 333 u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; 334 u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; 335 unsigned long max_non_turbo_ratio; 336 size_t filter_str_len; 337 const char *filter; 338 int event_trace; 339 __u64 *info; 340 int err; 341 342 if (priv_size != ptr->priv_size) 343 return -EINVAL; 344 345 intel_pt_parse_terms(intel_pt_pmu, "tsc", &tsc_bit); 346 intel_pt_parse_terms(intel_pt_pmu, "noretcomp", &noretcomp_bit); 347 intel_pt_parse_terms(intel_pt_pmu, "mtc", &mtc_bit); 348 mtc_freq_bits = perf_pmu__format_bits(intel_pt_pmu, "mtc_period"); 349 intel_pt_parse_terms(intel_pt_pmu, "cyc", &cyc_bit); 350 351 intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); 352 353 if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio", 354 "%lu", &max_non_turbo_ratio) != 1) 355 max_non_turbo_ratio = 0; 356 if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace", 357 "%d", &event_trace) != 1) 358 event_trace = 0; 359 360 filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu); 361 filter_str_len = filter ? strlen(filter) : 0; 362 363 if (!session->evlist->core.nr_mmaps) 364 return -EINVAL; 365 366 pc = session->evlist->mmap[0].core.base; 367 if (pc) { 368 err = perf_read_tsc_conversion(pc, &tc); 369 if (err) { 370 if (err != -EOPNOTSUPP) 371 return err; 372 } else { 373 cap_user_time_zero = tc.time_mult != 0; 374 } 375 if (!cap_user_time_zero) 376 ui__warning("Intel Processor Trace: TSC not available\n"); 377 } 378 379 per_cpu_mmaps = !perf_cpu_map__is_any_cpu_or_is_empty(session->evlist->core.user_requested_cpus); 380 381 auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; 382 auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; 383 auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; 384 auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; 385 auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; 386 auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; 387 auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; 388 auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; 389 auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; 390 auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; 391 auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; 392 auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; 393 auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; 394 auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; 395 auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; 396 auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; 397 auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio; 398 auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len; 399 400 info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1; 401 402 if (filter_str_len) { 403 size_t len = intel_pt_filter_bytes(filter); 404 405 strncpy((char *)info, filter, len); 406 info += len >> 3; 407 } 408 409 *info++ = event_trace; 410 411 return 0; 412 } 413 414 #ifdef HAVE_LIBTRACEEVENT 415 static int intel_pt_track_switches(struct evlist *evlist) 416 { 417 const char *sched_switch = "sched:sched_switch"; 418 struct evsel *evsel; 419 int err; 420 421 if (!evlist__can_select_event(evlist, sched_switch)) 422 return -EPERM; 423 424 evsel = evlist__add_sched_switch(evlist, true); 425 if (IS_ERR(evsel)) { 426 err = PTR_ERR(evsel); 427 pr_debug2("%s: failed to create %s, error = %d\n", 428 __func__, sched_switch, err); 429 return err; 430 } 431 432 evsel->immediate = true; 433 434 return 0; 435 } 436 #endif 437 438 static bool intel_pt_exclude_guest(void) 439 { 440 int pt_mode; 441 442 if (sysfs__read_int("module/kvm_intel/parameters/pt_mode", &pt_mode)) 443 pt_mode = 0; 444 445 return pt_mode == 1; 446 } 447 448 static void intel_pt_valid_str(char *str, size_t len, u64 valid) 449 { 450 unsigned int val, last = 0, state = 1; 451 int p = 0; 452 453 str[0] = '\0'; 454 455 for (val = 0; val <= 64; val++, valid >>= 1) { 456 if (valid & 1) { 457 last = val; 458 switch (state) { 459 case 0: 460 p += scnprintf(str + p, len - p, ","); 461 /* Fall through */ 462 case 1: 463 p += scnprintf(str + p, len - p, "%u", val); 464 state = 2; 465 break; 466 case 2: 467 state = 3; 468 break; 469 case 3: 470 state = 4; 471 break; 472 default: 473 break; 474 } 475 } else { 476 switch (state) { 477 case 3: 478 p += scnprintf(str + p, len - p, ",%u", last); 479 state = 0; 480 break; 481 case 4: 482 p += scnprintf(str + p, len - p, "-%u", last); 483 state = 0; 484 break; 485 default: 486 break; 487 } 488 if (state != 1) 489 state = 0; 490 } 491 } 492 } 493 494 static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, int dirfd, 495 const char *caps, const char *name, 496 const char *supported, u64 config) 497 { 498 char valid_str[256]; 499 unsigned int shift; 500 unsigned long long valid; 501 u64 bits; 502 int ok; 503 504 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, caps, "%llx", &valid) != 1) 505 valid = 0; 506 507 if (supported && 508 perf_pmu__scan_file_at(intel_pt_pmu, dirfd, supported, "%d", &ok) == 1 && !ok) 509 valid = 0; 510 511 valid |= 1; 512 513 bits = perf_pmu__format_bits(intel_pt_pmu, name); 514 515 config &= bits; 516 517 for (shift = 0; bits && !(bits & 1); shift++) 518 bits >>= 1; 519 520 config >>= shift; 521 522 if (config > 63) 523 goto out_err; 524 525 if (valid & (1 << config)) 526 return 0; 527 out_err: 528 intel_pt_valid_str(valid_str, sizeof(valid_str), valid); 529 pr_err("Invalid %s for %s. Valid values are: %s\n", 530 name, INTEL_PT_PMU_NAME, valid_str); 531 return -EINVAL; 532 } 533 534 static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, 535 struct evsel *evsel) 536 { 537 int err, dirfd; 538 char c; 539 540 if (!evsel) 541 return 0; 542 543 dirfd = perf_pmu__event_source_devices_fd(); 544 if (dirfd < 0) 545 return dirfd; 546 547 /* 548 * If supported, force pass-through config term (pt=1) even if user 549 * sets pt=0, which avoids senseless kernel errors. 550 */ 551 if (perf_pmu__scan_file_at(intel_pt_pmu, dirfd, "format/pt", "%c", &c) == 1 && 552 !(evsel->core.attr.config & 1)) { 553 pr_warning("pt=0 doesn't make sense, forcing pt=1\n"); 554 evsel->core.attr.config |= 1; 555 } 556 557 err = intel_pt_val_config_term(intel_pt_pmu, dirfd, "caps/cycle_thresholds", 558 "cyc_thresh", "caps/psb_cyc", 559 evsel->core.attr.config); 560 if (err) 561 goto out; 562 563 err = intel_pt_val_config_term(intel_pt_pmu, dirfd, "caps/mtc_periods", 564 "mtc_period", "caps/mtc", 565 evsel->core.attr.config); 566 if (err) 567 goto out; 568 569 err = intel_pt_val_config_term(intel_pt_pmu, dirfd, "caps/psb_periods", 570 "psb_period", "caps/psb_cyc", 571 evsel->core.attr.config); 572 573 out: 574 close(dirfd); 575 return err; 576 } 577 578 static void intel_pt_min_max_sample_sz(struct evlist *evlist, 579 size_t *min_sz, size_t *max_sz) 580 { 581 struct evsel *evsel; 582 583 evlist__for_each_entry(evlist, evsel) { 584 size_t sz = evsel->core.attr.aux_sample_size; 585 586 if (!sz) 587 continue; 588 if (min_sz && (sz < *min_sz || !*min_sz)) 589 *min_sz = sz; 590 if (max_sz && sz > *max_sz) 591 *max_sz = sz; 592 } 593 } 594 595 /* 596 * Currently, there is not enough information to disambiguate different PEBS 597 * events, so only allow one. 598 */ 599 static bool intel_pt_too_many_aux_output(struct evlist *evlist) 600 { 601 struct evsel *evsel; 602 int aux_output_cnt = 0; 603 604 evlist__for_each_entry(evlist, evsel) 605 aux_output_cnt += !!evsel->core.attr.aux_output; 606 607 if (aux_output_cnt > 1) { 608 pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n"); 609 return true; 610 } 611 612 return false; 613 } 614 615 static int intel_pt_recording_options(struct auxtrace_record *itr, 616 struct evlist *evlist, 617 struct record_opts *opts) 618 { 619 struct intel_pt_recording *ptr = 620 container_of(itr, struct intel_pt_recording, itr); 621 struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; 622 bool have_timing_info, need_immediate = false; 623 struct evsel *evsel, *intel_pt_evsel = NULL; 624 const struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; 625 bool privileged = perf_event_paranoid_check(-1); 626 u64 tsc_bit; 627 int err; 628 629 ptr->evlist = evlist; 630 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 631 632 evlist__for_each_entry(evlist, evsel) { 633 if (evsel->core.attr.type == intel_pt_pmu->type) { 634 if (intel_pt_evsel) { 635 pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); 636 return -EINVAL; 637 } 638 evsel->core.attr.freq = 0; 639 evsel->core.attr.sample_period = 1; 640 evsel->core.attr.exclude_guest = intel_pt_exclude_guest(); 641 evsel->no_aux_samples = true; 642 evsel->needs_auxtrace_mmap = true; 643 intel_pt_evsel = evsel; 644 opts->full_auxtrace = true; 645 } 646 } 647 648 if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { 649 pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); 650 return -EINVAL; 651 } 652 653 if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) { 654 pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n"); 655 return -EINVAL; 656 } 657 658 if (opts->use_clockid) { 659 pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); 660 return -EINVAL; 661 } 662 663 if (intel_pt_too_many_aux_output(evlist)) 664 return -EINVAL; 665 666 if (!opts->full_auxtrace) 667 return 0; 668 669 if (opts->auxtrace_sample_mode) 670 evsel__set_config_if_unset(intel_pt_evsel, "psb_period", 0); 671 672 err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); 673 if (err) 674 return err; 675 676 /* Set default sizes for snapshot mode */ 677 if (opts->auxtrace_snapshot_mode) { 678 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); 679 680 if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { 681 if (privileged) { 682 opts->auxtrace_mmap_pages = MiB(4) / page_size; 683 } else { 684 opts->auxtrace_mmap_pages = KiB(128) / page_size; 685 if (opts->mmap_pages == UINT_MAX) 686 opts->mmap_pages = KiB(256) / page_size; 687 } 688 } else if (!opts->auxtrace_mmap_pages && !privileged && 689 opts->mmap_pages == UINT_MAX) { 690 opts->mmap_pages = KiB(256) / page_size; 691 } 692 if (!opts->auxtrace_snapshot_size) 693 opts->auxtrace_snapshot_size = 694 opts->auxtrace_mmap_pages * (size_t)page_size; 695 if (!opts->auxtrace_mmap_pages) { 696 size_t sz = opts->auxtrace_snapshot_size; 697 698 sz = round_up(sz, page_size) / page_size; 699 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 700 } 701 if (opts->auxtrace_snapshot_size > 702 opts->auxtrace_mmap_pages * (size_t)page_size) { 703 pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", 704 opts->auxtrace_snapshot_size, 705 opts->auxtrace_mmap_pages * (size_t)page_size); 706 return -EINVAL; 707 } 708 if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { 709 pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); 710 return -EINVAL; 711 } 712 pr_debug2("Intel PT snapshot size: %zu\n", 713 opts->auxtrace_snapshot_size); 714 if (psb_period && 715 opts->auxtrace_snapshot_size <= psb_period + 716 INTEL_PT_PSB_PERIOD_NEAR) 717 ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", 718 opts->auxtrace_snapshot_size, psb_period); 719 } 720 721 /* Set default sizes for sample mode */ 722 if (opts->auxtrace_sample_mode) { 723 size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); 724 size_t min_sz = 0, max_sz = 0; 725 726 intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz); 727 if (!opts->auxtrace_mmap_pages && !privileged && 728 opts->mmap_pages == UINT_MAX) 729 opts->mmap_pages = KiB(256) / page_size; 730 if (!opts->auxtrace_mmap_pages) { 731 size_t sz = round_up(max_sz, page_size) / page_size; 732 733 opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); 734 } 735 if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) { 736 pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n", 737 max_sz, 738 opts->auxtrace_mmap_pages * (size_t)page_size); 739 return -EINVAL; 740 } 741 pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n", 742 min_sz, max_sz); 743 if (psb_period && 744 min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR) 745 ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n", 746 min_sz, psb_period); 747 } 748 749 /* Set default sizes for full trace mode */ 750 if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { 751 if (privileged) { 752 opts->auxtrace_mmap_pages = MiB(4) / page_size; 753 } else { 754 opts->auxtrace_mmap_pages = KiB(128) / page_size; 755 if (opts->mmap_pages == UINT_MAX) 756 opts->mmap_pages = KiB(256) / page_size; 757 } 758 } 759 760 /* Validate auxtrace_mmap_pages */ 761 if (opts->auxtrace_mmap_pages) { 762 size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; 763 size_t min_sz; 764 765 if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode) 766 min_sz = KiB(4); 767 else 768 min_sz = KiB(8); 769 770 if (sz < min_sz || !is_power_of_2(sz)) { 771 pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", 772 min_sz / 1024); 773 return -EINVAL; 774 } 775 } 776 777 if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) { 778 size_t aw = opts->auxtrace_mmap_pages * (size_t)page_size / 4; 779 u32 aux_watermark = aw > UINT_MAX ? UINT_MAX : aw; 780 781 intel_pt_evsel->core.attr.aux_watermark = aux_watermark; 782 } 783 784 intel_pt_parse_terms(intel_pt_pmu, "tsc", &tsc_bit); 785 786 if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit)) 787 have_timing_info = true; 788 else 789 have_timing_info = false; 790 791 /* 792 * Per-cpu recording needs sched_switch events to distinguish different 793 * threads. 794 */ 795 if (have_timing_info && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) && 796 !record_opts__no_switch_events(opts)) { 797 if (perf_can_record_switch_events()) { 798 bool cpu_wide = !target__none(&opts->target) && 799 !target__has_task(&opts->target); 800 801 if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) { 802 struct evsel *switch_evsel; 803 804 switch_evsel = evlist__add_dummy_on_all_cpus(evlist); 805 if (!switch_evsel) 806 return -ENOMEM; 807 808 switch_evsel->core.attr.context_switch = 1; 809 switch_evsel->immediate = true; 810 811 evsel__set_sample_bit(switch_evsel, TID); 812 evsel__set_sample_bit(switch_evsel, TIME); 813 evsel__set_sample_bit(switch_evsel, CPU); 814 evsel__reset_sample_bit(switch_evsel, BRANCH_STACK); 815 816 opts->record_switch_events = false; 817 ptr->have_sched_switch = 3; 818 } else { 819 opts->record_switch_events = true; 820 need_immediate = true; 821 if (cpu_wide) 822 ptr->have_sched_switch = 3; 823 else 824 ptr->have_sched_switch = 2; 825 } 826 } else { 827 #ifdef HAVE_LIBTRACEEVENT 828 err = intel_pt_track_switches(evlist); 829 if (err == -EPERM) 830 pr_debug2("Unable to select sched:sched_switch\n"); 831 else if (err) 832 return err; 833 else 834 ptr->have_sched_switch = 1; 835 #endif 836 } 837 } 838 839 if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel && 840 perf_can_record_text_poke_events() && perf_can_record_cpu_wide()) 841 opts->text_poke = true; 842 843 if (intel_pt_evsel) { 844 /* 845 * To obtain the auxtrace buffer file descriptor, the auxtrace 846 * event must come first. 847 */ 848 evlist__to_front(evlist, intel_pt_evsel); 849 /* 850 * In the case of per-cpu mmaps, we need the CPU on the 851 * AUX event. 852 */ 853 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) 854 evsel__set_sample_bit(intel_pt_evsel, CPU); 855 } 856 857 /* Add dummy event to keep tracking */ 858 if (opts->full_auxtrace) { 859 bool need_system_wide_tracking; 860 struct evsel *tracking_evsel; 861 862 /* 863 * User space tasks can migrate between CPUs, so when tracing 864 * selected CPUs, sideband for all CPUs is still needed. 865 */ 866 need_system_wide_tracking = opts->target.cpu_list && 867 !intel_pt_evsel->core.attr.exclude_user; 868 869 tracking_evsel = evlist__add_aux_dummy(evlist, need_system_wide_tracking); 870 if (!tracking_evsel) 871 return -ENOMEM; 872 873 evlist__set_tracking_event(evlist, tracking_evsel); 874 875 if (need_immediate) 876 tracking_evsel->immediate = true; 877 878 /* In per-cpu case, always need the time of mmap events etc */ 879 if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { 880 evsel__set_sample_bit(tracking_evsel, TIME); 881 /* And the CPU for switch events */ 882 evsel__set_sample_bit(tracking_evsel, CPU); 883 } 884 evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); 885 } 886 887 /* 888 * Warn the user when we do not have enough information to decode i.e. 889 * per-cpu with no sched_switch (except workload-only). 890 */ 891 if (!ptr->have_sched_switch && !perf_cpu_map__is_any_cpu_or_is_empty(cpus) && 892 !target__none(&opts->target) && 893 !intel_pt_evsel->core.attr.exclude_user) 894 ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); 895 896 return 0; 897 } 898 899 static int intel_pt_snapshot_start(struct auxtrace_record *itr) 900 { 901 struct intel_pt_recording *ptr = 902 container_of(itr, struct intel_pt_recording, itr); 903 struct evsel *evsel; 904 905 evlist__for_each_entry(ptr->evlist, evsel) { 906 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 907 return evsel__disable(evsel); 908 } 909 return -EINVAL; 910 } 911 912 static int intel_pt_snapshot_finish(struct auxtrace_record *itr) 913 { 914 struct intel_pt_recording *ptr = 915 container_of(itr, struct intel_pt_recording, itr); 916 struct evsel *evsel; 917 918 evlist__for_each_entry(ptr->evlist, evsel) { 919 if (evsel->core.attr.type == ptr->intel_pt_pmu->type) 920 return evsel__enable(evsel); 921 } 922 return -EINVAL; 923 } 924 925 static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) 926 { 927 const size_t sz = sizeof(struct intel_pt_snapshot_ref); 928 int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; 929 struct intel_pt_snapshot_ref *refs; 930 931 if (!new_cnt) 932 new_cnt = 16; 933 934 while (new_cnt <= idx) 935 new_cnt *= 2; 936 937 refs = calloc(new_cnt, sz); 938 if (!refs) 939 return -ENOMEM; 940 941 memcpy(refs, ptr->snapshot_refs, cnt * sz); 942 943 ptr->snapshot_refs = refs; 944 ptr->snapshot_ref_cnt = new_cnt; 945 946 return 0; 947 } 948 949 static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) 950 { 951 int i; 952 953 for (i = 0; i < ptr->snapshot_ref_cnt; i++) 954 zfree(&ptr->snapshot_refs[i].ref_buf); 955 zfree(&ptr->snapshot_refs); 956 } 957 958 static void intel_pt_recording_free(struct auxtrace_record *itr) 959 { 960 struct intel_pt_recording *ptr = 961 container_of(itr, struct intel_pt_recording, itr); 962 963 intel_pt_free_snapshot_refs(ptr); 964 free(ptr); 965 } 966 967 static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, 968 size_t snapshot_buf_size) 969 { 970 size_t ref_buf_size = ptr->snapshot_ref_buf_size; 971 void *ref_buf; 972 973 ref_buf = zalloc(ref_buf_size); 974 if (!ref_buf) 975 return -ENOMEM; 976 977 ptr->snapshot_refs[idx].ref_buf = ref_buf; 978 ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; 979 980 return 0; 981 } 982 983 static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, 984 size_t snapshot_buf_size) 985 { 986 const size_t max_size = 256 * 1024; 987 size_t buf_size = 0, psb_period; 988 989 if (ptr->snapshot_size <= 64 * 1024) 990 return 0; 991 992 psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); 993 if (psb_period) 994 buf_size = psb_period * 2; 995 996 if (!buf_size || buf_size > max_size) 997 buf_size = max_size; 998 999 if (buf_size >= snapshot_buf_size) 1000 return 0; 1001 1002 if (buf_size >= ptr->snapshot_size / 2) 1003 return 0; 1004 1005 return buf_size; 1006 } 1007 1008 static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, 1009 size_t snapshot_buf_size) 1010 { 1011 if (ptr->snapshot_init_done) 1012 return 0; 1013 1014 ptr->snapshot_init_done = true; 1015 1016 ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, 1017 snapshot_buf_size); 1018 1019 return 0; 1020 } 1021 1022 /** 1023 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. 1024 * @buf1: first buffer 1025 * @compare_size: number of bytes to compare 1026 * @buf2: second buffer (a circular buffer) 1027 * @offs2: offset in second buffer 1028 * @buf2_size: size of second buffer 1029 * 1030 * The comparison allows for the possibility that the bytes to compare in the 1031 * circular buffer are not contiguous. It is assumed that @compare_size <= 1032 * @buf2_size. This function returns %false if the bytes are identical, %true 1033 * otherwise. 1034 */ 1035 static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, 1036 void *buf2, size_t offs2, size_t buf2_size) 1037 { 1038 size_t end2 = offs2 + compare_size, part_size; 1039 1040 if (end2 <= buf2_size) 1041 return memcmp(buf1, buf2 + offs2, compare_size); 1042 1043 part_size = end2 - buf2_size; 1044 if (memcmp(buf1, buf2 + offs2, part_size)) 1045 return true; 1046 1047 compare_size -= part_size; 1048 1049 return memcmp(buf1 + part_size, buf2, compare_size); 1050 } 1051 1052 static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, 1053 size_t ref_size, size_t buf_size, 1054 void *data, size_t head) 1055 { 1056 size_t ref_end = ref_offset + ref_size; 1057 1058 if (ref_end > buf_size) { 1059 if (head > ref_offset || head < ref_end - buf_size) 1060 return true; 1061 } else if (head > ref_offset && head < ref_end) { 1062 return true; 1063 } 1064 1065 return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, 1066 buf_size); 1067 } 1068 1069 static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, 1070 void *data, size_t head) 1071 { 1072 if (head >= ref_size) { 1073 memcpy(ref_buf, data + head - ref_size, ref_size); 1074 } else { 1075 memcpy(ref_buf, data, head); 1076 ref_size -= head; 1077 memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); 1078 } 1079 } 1080 1081 static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, 1082 struct auxtrace_mmap *mm, unsigned char *data, 1083 u64 head) 1084 { 1085 struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; 1086 bool wrapped; 1087 1088 wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, 1089 ptr->snapshot_ref_buf_size, mm->len, 1090 data, head); 1091 1092 intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, 1093 data, head); 1094 1095 return wrapped; 1096 } 1097 1098 static bool intel_pt_first_wrap(u64 *data, size_t buf_size) 1099 { 1100 int i, a, b; 1101 1102 b = buf_size >> 3; 1103 a = b - 512; 1104 if (a < 0) 1105 a = 0; 1106 1107 for (i = a; i < b; i++) { 1108 if (data[i]) 1109 return true; 1110 } 1111 1112 return false; 1113 } 1114 1115 static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, 1116 struct auxtrace_mmap *mm, unsigned char *data, 1117 u64 *head, u64 *old) 1118 { 1119 struct intel_pt_recording *ptr = 1120 container_of(itr, struct intel_pt_recording, itr); 1121 bool wrapped; 1122 int err; 1123 1124 pr_debug3("%s: mmap index %d old head %zu new head %zu\n", 1125 __func__, idx, (size_t)*old, (size_t)*head); 1126 1127 err = intel_pt_snapshot_init(ptr, mm->len); 1128 if (err) 1129 goto out_err; 1130 1131 if (idx >= ptr->snapshot_ref_cnt) { 1132 err = intel_pt_alloc_snapshot_refs(ptr, idx); 1133 if (err) 1134 goto out_err; 1135 } 1136 1137 if (ptr->snapshot_ref_buf_size) { 1138 if (!ptr->snapshot_refs[idx].ref_buf) { 1139 err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); 1140 if (err) 1141 goto out_err; 1142 } 1143 wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); 1144 } else { 1145 wrapped = ptr->snapshot_refs[idx].wrapped; 1146 if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { 1147 ptr->snapshot_refs[idx].wrapped = true; 1148 wrapped = true; 1149 } 1150 } 1151 1152 /* 1153 * In full trace mode 'head' continually increases. However in snapshot 1154 * mode 'head' is an offset within the buffer. Here 'old' and 'head' 1155 * are adjusted to match the full trace case which expects that 'old' is 1156 * always less than 'head'. 1157 */ 1158 if (wrapped) { 1159 *old = *head; 1160 *head += mm->len; 1161 } else { 1162 if (mm->mask) 1163 *old &= mm->mask; 1164 else 1165 *old %= mm->len; 1166 if (*old > *head) 1167 *head += mm->len; 1168 } 1169 1170 pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", 1171 __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); 1172 1173 return 0; 1174 1175 out_err: 1176 pr_err("%s: failed, error %d\n", __func__, err); 1177 return err; 1178 } 1179 1180 static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) 1181 { 1182 return rdtsc(); 1183 } 1184 1185 static int intel_pt_perf_config(const char *var, const char *value, void *data) 1186 { 1187 struct intel_pt_recording *ptr = data; 1188 1189 if (!strcmp(var, "intel-pt.all-switch-events")) 1190 ptr->all_switch_events = perf_config_bool(var, value); 1191 1192 return 0; 1193 } 1194 1195 struct auxtrace_record *intel_pt_recording_init(int *err) 1196 { 1197 struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME); 1198 struct intel_pt_recording *ptr; 1199 1200 if (!intel_pt_pmu) 1201 return NULL; 1202 1203 if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { 1204 *err = -errno; 1205 return NULL; 1206 } 1207 1208 ptr = zalloc(sizeof(struct intel_pt_recording)); 1209 if (!ptr) { 1210 *err = -ENOMEM; 1211 return NULL; 1212 } 1213 1214 perf_config(intel_pt_perf_config, ptr); 1215 1216 ptr->intel_pt_pmu = intel_pt_pmu; 1217 ptr->itr.recording_options = intel_pt_recording_options; 1218 ptr->itr.info_priv_size = intel_pt_info_priv_size; 1219 ptr->itr.info_fill = intel_pt_info_fill; 1220 ptr->itr.free = intel_pt_recording_free; 1221 ptr->itr.snapshot_start = intel_pt_snapshot_start; 1222 ptr->itr.snapshot_finish = intel_pt_snapshot_finish; 1223 ptr->itr.find_snapshot = intel_pt_find_snapshot; 1224 ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; 1225 ptr->itr.reference = intel_pt_reference; 1226 ptr->itr.read_finish = auxtrace_record__read_finish; 1227 /* 1228 * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K 1229 * should give at least 1 PSB per sample. 1230 */ 1231 ptr->itr.default_aux_sample_size = 4096; 1232 return &ptr->itr; 1233 } 1234