1 /* 2 * Intel(R) Processor Trace PMU driver for perf 3 * Copyright (c) 2013-2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 * 14 * Intel PT is specified in the Intel Architecture Instruction Set Extensions 15 * Programming Reference: 16 * http://software.intel.com/en-us/intel-isa-extensions 17 */ 18 19 #undef DEBUG 20 21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 22 23 #include <linux/types.h> 24 #include <linux/slab.h> 25 #include <linux/device.h> 26 27 #include <asm/perf_event.h> 28 #include <asm/insn.h> 29 #include <asm/io.h> 30 #include <asm/intel_pt.h> 31 32 #include "../perf_event.h" 33 #include "pt.h" 34 35 static DEFINE_PER_CPU(struct pt, pt_ctx); 36 37 static struct pt_pmu pt_pmu; 38 39 enum cpuid_regs { 40 CR_EAX = 0, 41 CR_ECX, 42 CR_EDX, 43 CR_EBX 44 }; 45 46 /* 47 * Capabilities of Intel PT hardware, such as number of address bits or 48 * supported output schemes, are cached and exported to userspace as "caps" 49 * attribute group of pt pmu device 50 * (/sys/bus/event_source/devices/intel_pt/caps/) so that userspace can store 51 * relevant bits together with intel_pt traces. 52 * 53 * These are necessary for both trace decoding (payloads_lip, contains address 54 * width encoded in IP-related packets), and event configuration (bitmasks with 55 * permitted values for certain bit fields). 56 */ 57 #define PT_CAP(_n, _l, _r, _m) \ 58 [PT_CAP_ ## _n] = { .name = __stringify(_n), .leaf = _l, \ 59 .reg = _r, .mask = _m } 60 61 static struct pt_cap_desc { 62 const char *name; 63 u32 leaf; 64 u8 reg; 65 u32 mask; 66 } pt_caps[] = { 67 PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff), 68 PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)), 69 PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)), 70 PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)), 71 PT_CAP(mtc, 0, CR_EBX, BIT(3)), 72 PT_CAP(ptwrite, 0, CR_EBX, BIT(4)), 73 PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)), 74 PT_CAP(topa_output, 0, CR_ECX, BIT(0)), 75 PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)), 76 PT_CAP(single_range_output, 0, CR_ECX, BIT(2)), 77 PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)), 78 PT_CAP(num_address_ranges, 1, CR_EAX, 0x3), 79 PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000), 80 PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff), 81 PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000), 82 }; 83 84 static u32 pt_cap_get(enum pt_capabilities cap) 85 { 86 struct pt_cap_desc *cd = &pt_caps[cap]; 87 u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; 88 unsigned int shift = __ffs(cd->mask); 89 90 return (c & cd->mask) >> shift; 91 } 92 93 static ssize_t pt_cap_show(struct device *cdev, 94 struct device_attribute *attr, 95 char *buf) 96 { 97 struct dev_ext_attribute *ea = 98 container_of(attr, struct dev_ext_attribute, attr); 99 enum pt_capabilities cap = (long)ea->var; 100 101 return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap)); 102 } 103 104 static struct attribute_group pt_cap_group = { 105 .name = "caps", 106 }; 107 108 PMU_FORMAT_ATTR(cyc, "config:1" ); 109 PMU_FORMAT_ATTR(mtc, "config:9" ); 110 PMU_FORMAT_ATTR(tsc, "config:10" ); 111 PMU_FORMAT_ATTR(noretcomp, "config:11" ); 112 PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); 113 PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); 114 PMU_FORMAT_ATTR(psb_period, "config:24-27" ); 115 116 static struct attribute *pt_formats_attr[] = { 117 &format_attr_cyc.attr, 118 &format_attr_mtc.attr, 119 &format_attr_tsc.attr, 120 &format_attr_noretcomp.attr, 121 &format_attr_mtc_period.attr, 122 &format_attr_cyc_thresh.attr, 123 &format_attr_psb_period.attr, 124 NULL, 125 }; 126 127 static struct attribute_group pt_format_group = { 128 .name = "format", 129 .attrs = pt_formats_attr, 130 }; 131 132 static ssize_t 133 pt_timing_attr_show(struct device *dev, struct device_attribute *attr, 134 char *page) 135 { 136 struct perf_pmu_events_attr *pmu_attr = 137 container_of(attr, struct perf_pmu_events_attr, attr); 138 139 switch (pmu_attr->id) { 140 case 0: 141 return sprintf(page, "%lu\n", pt_pmu.max_nonturbo_ratio); 142 case 1: 143 return sprintf(page, "%u:%u\n", 144 pt_pmu.tsc_art_num, 145 pt_pmu.tsc_art_den); 146 default: 147 break; 148 } 149 150 return -EINVAL; 151 } 152 153 PMU_EVENT_ATTR(max_nonturbo_ratio, timing_attr_max_nonturbo_ratio, 0, 154 pt_timing_attr_show); 155 PMU_EVENT_ATTR(tsc_art_ratio, timing_attr_tsc_art_ratio, 1, 156 pt_timing_attr_show); 157 158 static struct attribute *pt_timing_attr[] = { 159 &timing_attr_max_nonturbo_ratio.attr.attr, 160 &timing_attr_tsc_art_ratio.attr.attr, 161 NULL, 162 }; 163 164 static struct attribute_group pt_timing_group = { 165 .attrs = pt_timing_attr, 166 }; 167 168 static const struct attribute_group *pt_attr_groups[] = { 169 &pt_cap_group, 170 &pt_format_group, 171 &pt_timing_group, 172 NULL, 173 }; 174 175 static int __init pt_pmu_hw_init(void) 176 { 177 struct dev_ext_attribute *de_attrs; 178 struct attribute **attrs; 179 size_t size; 180 u64 reg; 181 int ret; 182 long i; 183 184 rdmsrl(MSR_PLATFORM_INFO, reg); 185 pt_pmu.max_nonturbo_ratio = (reg & 0xff00) >> 8; 186 187 /* 188 * if available, read in TSC to core crystal clock ratio, 189 * otherwise, zero for numerator stands for "not enumerated" 190 * as per SDM 191 */ 192 if (boot_cpu_data.cpuid_level >= CPUID_TSC_LEAF) { 193 u32 eax, ebx, ecx, edx; 194 195 cpuid(CPUID_TSC_LEAF, &eax, &ebx, &ecx, &edx); 196 197 pt_pmu.tsc_art_num = ebx; 198 pt_pmu.tsc_art_den = eax; 199 } 200 201 if (boot_cpu_has(X86_FEATURE_VMX)) { 202 /* 203 * Intel SDM, 36.5 "Tracing post-VMXON" says that 204 * "IA32_VMX_MISC[bit 14]" being 1 means PT can trace 205 * post-VMXON. 206 */ 207 rdmsrl(MSR_IA32_VMX_MISC, reg); 208 if (reg & BIT(14)) 209 pt_pmu.vmx = true; 210 } 211 212 attrs = NULL; 213 214 for (i = 0; i < PT_CPUID_LEAVES; i++) { 215 cpuid_count(20, i, 216 &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM], 217 &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM], 218 &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM], 219 &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]); 220 } 221 222 ret = -ENOMEM; 223 size = sizeof(struct attribute *) * (ARRAY_SIZE(pt_caps)+1); 224 attrs = kzalloc(size, GFP_KERNEL); 225 if (!attrs) 226 goto fail; 227 228 size = sizeof(struct dev_ext_attribute) * (ARRAY_SIZE(pt_caps)+1); 229 de_attrs = kzalloc(size, GFP_KERNEL); 230 if (!de_attrs) 231 goto fail; 232 233 for (i = 0; i < ARRAY_SIZE(pt_caps); i++) { 234 struct dev_ext_attribute *de_attr = de_attrs + i; 235 236 de_attr->attr.attr.name = pt_caps[i].name; 237 238 sysfs_attr_init(&de_attr->attr.attr); 239 240 de_attr->attr.attr.mode = S_IRUGO; 241 de_attr->attr.show = pt_cap_show; 242 de_attr->var = (void *)i; 243 244 attrs[i] = &de_attr->attr.attr; 245 } 246 247 pt_cap_group.attrs = attrs; 248 249 return 0; 250 251 fail: 252 kfree(attrs); 253 254 return ret; 255 } 256 257 #define RTIT_CTL_CYC_PSB (RTIT_CTL_CYCLEACC | \ 258 RTIT_CTL_CYC_THRESH | \ 259 RTIT_CTL_PSB_FREQ) 260 261 #define RTIT_CTL_MTC (RTIT_CTL_MTC_EN | \ 262 RTIT_CTL_MTC_RANGE) 263 264 #define RTIT_CTL_PTW (RTIT_CTL_PTW_EN | \ 265 RTIT_CTL_FUP_ON_PTW) 266 267 #define PT_CONFIG_MASK (RTIT_CTL_TSC_EN | \ 268 RTIT_CTL_DISRETC | \ 269 RTIT_CTL_CYC_PSB | \ 270 RTIT_CTL_MTC | \ 271 RTIT_CTL_PWR_EVT_EN | \ 272 RTIT_CTL_FUP_ON_PTW | \ 273 RTIT_CTL_PTW_EN) 274 275 static bool pt_event_valid(struct perf_event *event) 276 { 277 u64 config = event->attr.config; 278 u64 allowed, requested; 279 280 if ((config & PT_CONFIG_MASK) != config) 281 return false; 282 283 if (config & RTIT_CTL_CYC_PSB) { 284 if (!pt_cap_get(PT_CAP_psb_cyc)) 285 return false; 286 287 allowed = pt_cap_get(PT_CAP_psb_periods); 288 requested = (config & RTIT_CTL_PSB_FREQ) >> 289 RTIT_CTL_PSB_FREQ_OFFSET; 290 if (requested && (!(allowed & BIT(requested)))) 291 return false; 292 293 allowed = pt_cap_get(PT_CAP_cycle_thresholds); 294 requested = (config & RTIT_CTL_CYC_THRESH) >> 295 RTIT_CTL_CYC_THRESH_OFFSET; 296 if (requested && (!(allowed & BIT(requested)))) 297 return false; 298 } 299 300 if (config & RTIT_CTL_MTC) { 301 /* 302 * In the unlikely case that CPUID lists valid mtc periods, 303 * but not the mtc capability, drop out here. 304 * 305 * Spec says that setting mtc period bits while mtc bit in 306 * CPUID is 0 will #GP, so better safe than sorry. 307 */ 308 if (!pt_cap_get(PT_CAP_mtc)) 309 return false; 310 311 allowed = pt_cap_get(PT_CAP_mtc_periods); 312 if (!allowed) 313 return false; 314 315 requested = (config & RTIT_CTL_MTC_RANGE) >> 316 RTIT_CTL_MTC_RANGE_OFFSET; 317 318 if (!(allowed & BIT(requested))) 319 return false; 320 } 321 322 if (config & RTIT_CTL_PWR_EVT_EN && 323 !pt_cap_get(PT_CAP_power_event_trace)) 324 return false; 325 326 if (config & RTIT_CTL_PTW) { 327 if (!pt_cap_get(PT_CAP_ptwrite)) 328 return false; 329 330 /* FUPonPTW without PTW doesn't make sense */ 331 if ((config & RTIT_CTL_FUP_ON_PTW) && 332 !(config & RTIT_CTL_PTW_EN)) 333 return false; 334 } 335 336 return true; 337 } 338 339 /* 340 * PT configuration helpers 341 * These all are cpu affine and operate on a local PT 342 */ 343 344 /* Address ranges and their corresponding msr configuration registers */ 345 static const struct pt_address_range { 346 unsigned long msr_a; 347 unsigned long msr_b; 348 unsigned int reg_off; 349 } pt_address_ranges[] = { 350 { 351 .msr_a = MSR_IA32_RTIT_ADDR0_A, 352 .msr_b = MSR_IA32_RTIT_ADDR0_B, 353 .reg_off = RTIT_CTL_ADDR0_OFFSET, 354 }, 355 { 356 .msr_a = MSR_IA32_RTIT_ADDR1_A, 357 .msr_b = MSR_IA32_RTIT_ADDR1_B, 358 .reg_off = RTIT_CTL_ADDR1_OFFSET, 359 }, 360 { 361 .msr_a = MSR_IA32_RTIT_ADDR2_A, 362 .msr_b = MSR_IA32_RTIT_ADDR2_B, 363 .reg_off = RTIT_CTL_ADDR2_OFFSET, 364 }, 365 { 366 .msr_a = MSR_IA32_RTIT_ADDR3_A, 367 .msr_b = MSR_IA32_RTIT_ADDR3_B, 368 .reg_off = RTIT_CTL_ADDR3_OFFSET, 369 } 370 }; 371 372 static u64 pt_config_filters(struct perf_event *event) 373 { 374 struct pt_filters *filters = event->hw.addr_filters; 375 struct pt *pt = this_cpu_ptr(&pt_ctx); 376 unsigned int range = 0; 377 u64 rtit_ctl = 0; 378 379 if (!filters) 380 return 0; 381 382 perf_event_addr_filters_sync(event); 383 384 for (range = 0; range < filters->nr_filters; range++) { 385 struct pt_filter *filter = &filters->filter[range]; 386 387 /* 388 * Note, if the range has zero start/end addresses due 389 * to its dynamic object not being loaded yet, we just 390 * go ahead and program zeroed range, which will simply 391 * produce no data. Note^2: if executable code at 0x0 392 * is a concern, we can set up an "invalid" configuration 393 * such as msr_b < msr_a. 394 */ 395 396 /* avoid redundant msr writes */ 397 if (pt->filters.filter[range].msr_a != filter->msr_a) { 398 wrmsrl(pt_address_ranges[range].msr_a, filter->msr_a); 399 pt->filters.filter[range].msr_a = filter->msr_a; 400 } 401 402 if (pt->filters.filter[range].msr_b != filter->msr_b) { 403 wrmsrl(pt_address_ranges[range].msr_b, filter->msr_b); 404 pt->filters.filter[range].msr_b = filter->msr_b; 405 } 406 407 rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; 408 } 409 410 return rtit_ctl; 411 } 412 413 static void pt_config(struct perf_event *event) 414 { 415 u64 reg; 416 417 if (!event->hw.itrace_started) { 418 event->hw.itrace_started = 1; 419 wrmsrl(MSR_IA32_RTIT_STATUS, 0); 420 } 421 422 reg = pt_config_filters(event); 423 reg |= RTIT_CTL_TOPA | RTIT_CTL_BRANCH_EN | RTIT_CTL_TRACEEN; 424 425 if (!event->attr.exclude_kernel) 426 reg |= RTIT_CTL_OS; 427 if (!event->attr.exclude_user) 428 reg |= RTIT_CTL_USR; 429 430 reg |= (event->attr.config & PT_CONFIG_MASK); 431 432 event->hw.config = reg; 433 wrmsrl(MSR_IA32_RTIT_CTL, reg); 434 } 435 436 static void pt_config_stop(struct perf_event *event) 437 { 438 u64 ctl = READ_ONCE(event->hw.config); 439 440 /* may be already stopped by a PMI */ 441 if (!(ctl & RTIT_CTL_TRACEEN)) 442 return; 443 444 ctl &= ~RTIT_CTL_TRACEEN; 445 wrmsrl(MSR_IA32_RTIT_CTL, ctl); 446 447 WRITE_ONCE(event->hw.config, ctl); 448 449 /* 450 * A wrmsr that disables trace generation serializes other PT 451 * registers and causes all data packets to be written to memory, 452 * but a fence is required for the data to become globally visible. 453 * 454 * The below WMB, separating data store and aux_head store matches 455 * the consumer's RMB that separates aux_head load and data load. 456 */ 457 wmb(); 458 } 459 460 static void pt_config_buffer(void *buf, unsigned int topa_idx, 461 unsigned int output_off) 462 { 463 u64 reg; 464 465 wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf)); 466 467 reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32); 468 469 wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg); 470 } 471 472 /* 473 * Keep ToPA table-related metadata on the same page as the actual table, 474 * taking up a few words from the top 475 */ 476 477 #define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1) 478 479 /** 480 * struct topa - page-sized ToPA table with metadata at the top 481 * @table: actual ToPA table entries, as understood by PT hardware 482 * @list: linkage to struct pt_buffer's list of tables 483 * @phys: physical address of this page 484 * @offset: offset of the first entry in this table in the buffer 485 * @size: total size of all entries in this table 486 * @last: index of the last initialized entry in this table 487 */ 488 struct topa { 489 struct topa_entry table[TENTS_PER_PAGE]; 490 struct list_head list; 491 u64 phys; 492 u64 offset; 493 size_t size; 494 int last; 495 }; 496 497 /* make -1 stand for the last table entry */ 498 #define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)]) 499 500 /** 501 * topa_alloc() - allocate page-sized ToPA table 502 * @cpu: CPU on which to allocate. 503 * @gfp: Allocation flags. 504 * 505 * Return: On success, return the pointer to ToPA table page. 506 */ 507 static struct topa *topa_alloc(int cpu, gfp_t gfp) 508 { 509 int node = cpu_to_node(cpu); 510 struct topa *topa; 511 struct page *p; 512 513 p = alloc_pages_node(node, gfp | __GFP_ZERO, 0); 514 if (!p) 515 return NULL; 516 517 topa = page_address(p); 518 topa->last = 0; 519 topa->phys = page_to_phys(p); 520 521 /* 522 * In case of singe-entry ToPA, always put the self-referencing END 523 * link as the 2nd entry in the table 524 */ 525 if (!pt_cap_get(PT_CAP_topa_multiple_entries)) { 526 TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT; 527 TOPA_ENTRY(topa, 1)->end = 1; 528 } 529 530 return topa; 531 } 532 533 /** 534 * topa_free() - free a page-sized ToPA table 535 * @topa: Table to deallocate. 536 */ 537 static void topa_free(struct topa *topa) 538 { 539 free_page((unsigned long)topa); 540 } 541 542 /** 543 * topa_insert_table() - insert a ToPA table into a buffer 544 * @buf: PT buffer that's being extended. 545 * @topa: New topa table to be inserted. 546 * 547 * If it's the first table in this buffer, set up buffer's pointers 548 * accordingly; otherwise, add a END=1 link entry to @topa to the current 549 * "last" table and adjust the last table pointer to @topa. 550 */ 551 static void topa_insert_table(struct pt_buffer *buf, struct topa *topa) 552 { 553 struct topa *last = buf->last; 554 555 list_add_tail(&topa->list, &buf->tables); 556 557 if (!buf->first) { 558 buf->first = buf->last = buf->cur = topa; 559 return; 560 } 561 562 topa->offset = last->offset + last->size; 563 buf->last = topa; 564 565 if (!pt_cap_get(PT_CAP_topa_multiple_entries)) 566 return; 567 568 BUG_ON(last->last != TENTS_PER_PAGE - 1); 569 570 TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT; 571 TOPA_ENTRY(last, -1)->end = 1; 572 } 573 574 /** 575 * topa_table_full() - check if a ToPA table is filled up 576 * @topa: ToPA table. 577 */ 578 static bool topa_table_full(struct topa *topa) 579 { 580 /* single-entry ToPA is a special case */ 581 if (!pt_cap_get(PT_CAP_topa_multiple_entries)) 582 return !!topa->last; 583 584 return topa->last == TENTS_PER_PAGE - 1; 585 } 586 587 /** 588 * topa_insert_pages() - create a list of ToPA tables 589 * @buf: PT buffer being initialized. 590 * @gfp: Allocation flags. 591 * 592 * This initializes a list of ToPA tables with entries from 593 * the data_pages provided by rb_alloc_aux(). 594 * 595 * Return: 0 on success or error code. 596 */ 597 static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp) 598 { 599 struct topa *topa = buf->last; 600 int order = 0; 601 struct page *p; 602 603 p = virt_to_page(buf->data_pages[buf->nr_pages]); 604 if (PagePrivate(p)) 605 order = page_private(p); 606 607 if (topa_table_full(topa)) { 608 topa = topa_alloc(buf->cpu, gfp); 609 if (!topa) 610 return -ENOMEM; 611 612 topa_insert_table(buf, topa); 613 } 614 615 TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT; 616 TOPA_ENTRY(topa, -1)->size = order; 617 if (!buf->snapshot && !pt_cap_get(PT_CAP_topa_multiple_entries)) { 618 TOPA_ENTRY(topa, -1)->intr = 1; 619 TOPA_ENTRY(topa, -1)->stop = 1; 620 } 621 622 topa->last++; 623 topa->size += sizes(order); 624 625 buf->nr_pages += 1ul << order; 626 627 return 0; 628 } 629 630 /** 631 * pt_topa_dump() - print ToPA tables and their entries 632 * @buf: PT buffer. 633 */ 634 static void pt_topa_dump(struct pt_buffer *buf) 635 { 636 struct topa *topa; 637 638 list_for_each_entry(topa, &buf->tables, list) { 639 int i; 640 641 pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table, 642 topa->phys, topa->offset, topa->size); 643 for (i = 0; i < TENTS_PER_PAGE; i++) { 644 pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n", 645 &topa->table[i], 646 (unsigned long)topa->table[i].base << TOPA_SHIFT, 647 sizes(topa->table[i].size), 648 topa->table[i].end ? 'E' : ' ', 649 topa->table[i].intr ? 'I' : ' ', 650 topa->table[i].stop ? 'S' : ' ', 651 *(u64 *)&topa->table[i]); 652 if ((pt_cap_get(PT_CAP_topa_multiple_entries) && 653 topa->table[i].stop) || 654 topa->table[i].end) 655 break; 656 } 657 } 658 } 659 660 /** 661 * pt_buffer_advance() - advance to the next output region 662 * @buf: PT buffer. 663 * 664 * Advance the current pointers in the buffer to the next ToPA entry. 665 */ 666 static void pt_buffer_advance(struct pt_buffer *buf) 667 { 668 buf->output_off = 0; 669 buf->cur_idx++; 670 671 if (buf->cur_idx == buf->cur->last) { 672 if (buf->cur == buf->last) 673 buf->cur = buf->first; 674 else 675 buf->cur = list_entry(buf->cur->list.next, struct topa, 676 list); 677 buf->cur_idx = 0; 678 } 679 } 680 681 /** 682 * pt_update_head() - calculate current offsets and sizes 683 * @pt: Per-cpu pt context. 684 * 685 * Update buffer's current write pointer position and data size. 686 */ 687 static void pt_update_head(struct pt *pt) 688 { 689 struct pt_buffer *buf = perf_get_aux(&pt->handle); 690 u64 topa_idx, base, old; 691 692 /* offset of the first region in this table from the beginning of buf */ 693 base = buf->cur->offset + buf->output_off; 694 695 /* offset of the current output region within this table */ 696 for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++) 697 base += sizes(buf->cur->table[topa_idx].size); 698 699 if (buf->snapshot) { 700 local_set(&buf->data_size, base); 701 } else { 702 old = (local64_xchg(&buf->head, base) & 703 ((buf->nr_pages << PAGE_SHIFT) - 1)); 704 if (base < old) 705 base += buf->nr_pages << PAGE_SHIFT; 706 707 local_add(base - old, &buf->data_size); 708 } 709 } 710 711 /** 712 * pt_buffer_region() - obtain current output region's address 713 * @buf: PT buffer. 714 */ 715 static void *pt_buffer_region(struct pt_buffer *buf) 716 { 717 return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT); 718 } 719 720 /** 721 * pt_buffer_region_size() - obtain current output region's size 722 * @buf: PT buffer. 723 */ 724 static size_t pt_buffer_region_size(struct pt_buffer *buf) 725 { 726 return sizes(buf->cur->table[buf->cur_idx].size); 727 } 728 729 /** 730 * pt_handle_status() - take care of possible status conditions 731 * @pt: Per-cpu pt context. 732 */ 733 static void pt_handle_status(struct pt *pt) 734 { 735 struct pt_buffer *buf = perf_get_aux(&pt->handle); 736 int advance = 0; 737 u64 status; 738 739 rdmsrl(MSR_IA32_RTIT_STATUS, status); 740 741 if (status & RTIT_STATUS_ERROR) { 742 pr_err_ratelimited("ToPA ERROR encountered, trying to recover\n"); 743 pt_topa_dump(buf); 744 status &= ~RTIT_STATUS_ERROR; 745 } 746 747 if (status & RTIT_STATUS_STOPPED) { 748 status &= ~RTIT_STATUS_STOPPED; 749 750 /* 751 * On systems that only do single-entry ToPA, hitting STOP 752 * means we are already losing data; need to let the decoder 753 * know. 754 */ 755 if (!pt_cap_get(PT_CAP_topa_multiple_entries) || 756 buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { 757 local_inc(&buf->lost); 758 advance++; 759 } 760 } 761 762 /* 763 * Also on single-entry ToPA implementations, interrupt will come 764 * before the output reaches its output region's boundary. 765 */ 766 if (!pt_cap_get(PT_CAP_topa_multiple_entries) && !buf->snapshot && 767 pt_buffer_region_size(buf) - buf->output_off <= TOPA_PMI_MARGIN) { 768 void *head = pt_buffer_region(buf); 769 770 /* everything within this margin needs to be zeroed out */ 771 memset(head + buf->output_off, 0, 772 pt_buffer_region_size(buf) - 773 buf->output_off); 774 advance++; 775 } 776 777 if (advance) 778 pt_buffer_advance(buf); 779 780 wrmsrl(MSR_IA32_RTIT_STATUS, status); 781 } 782 783 /** 784 * pt_read_offset() - translate registers into buffer pointers 785 * @buf: PT buffer. 786 * 787 * Set buffer's output pointers from MSR values. 788 */ 789 static void pt_read_offset(struct pt_buffer *buf) 790 { 791 u64 offset, base_topa; 792 793 rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa); 794 buf->cur = phys_to_virt(base_topa); 795 796 rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset); 797 /* offset within current output region */ 798 buf->output_off = offset >> 32; 799 /* index of current output region within this table */ 800 buf->cur_idx = (offset & 0xffffff80) >> 7; 801 } 802 803 /** 804 * pt_topa_next_entry() - obtain index of the first page in the next ToPA entry 805 * @buf: PT buffer. 806 * @pg: Page offset in the buffer. 807 * 808 * When advancing to the next output region (ToPA entry), given a page offset 809 * into the buffer, we need to find the offset of the first page in the next 810 * region. 811 */ 812 static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg) 813 { 814 struct topa_entry *te = buf->topa_index[pg]; 815 816 /* one region */ 817 if (buf->first == buf->last && buf->first->last == 1) 818 return pg; 819 820 do { 821 pg++; 822 pg &= buf->nr_pages - 1; 823 } while (buf->topa_index[pg] == te); 824 825 return pg; 826 } 827 828 /** 829 * pt_buffer_reset_markers() - place interrupt and stop bits in the buffer 830 * @buf: PT buffer. 831 * @handle: Current output handle. 832 * 833 * Place INT and STOP marks to prevent overwriting old data that the consumer 834 * hasn't yet collected and waking up the consumer after a certain fraction of 835 * the buffer has filled up. Only needed and sensible for non-snapshot counters. 836 * 837 * This obviously relies on buf::head to figure out buffer markers, so it has 838 * to be called after pt_buffer_reset_offsets() and before the hardware tracing 839 * is enabled. 840 */ 841 static int pt_buffer_reset_markers(struct pt_buffer *buf, 842 struct perf_output_handle *handle) 843 844 { 845 unsigned long head = local64_read(&buf->head); 846 unsigned long idx, npages, wakeup; 847 848 /* can't stop in the middle of an output region */ 849 if (buf->output_off + handle->size + 1 < 850 sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) 851 return -EINVAL; 852 853 854 /* single entry ToPA is handled by marking all regions STOP=1 INT=1 */ 855 if (!pt_cap_get(PT_CAP_topa_multiple_entries)) 856 return 0; 857 858 /* clear STOP and INT from current entry */ 859 buf->topa_index[buf->stop_pos]->stop = 0; 860 buf->topa_index[buf->stop_pos]->intr = 0; 861 buf->topa_index[buf->intr_pos]->intr = 0; 862 863 /* how many pages till the STOP marker */ 864 npages = handle->size >> PAGE_SHIFT; 865 866 /* if it's on a page boundary, fill up one more page */ 867 if (!offset_in_page(head + handle->size + 1)) 868 npages++; 869 870 idx = (head >> PAGE_SHIFT) + npages; 871 idx &= buf->nr_pages - 1; 872 buf->stop_pos = idx; 873 874 wakeup = handle->wakeup >> PAGE_SHIFT; 875 876 /* in the worst case, wake up the consumer one page before hard stop */ 877 idx = (head >> PAGE_SHIFT) + npages - 1; 878 if (idx > wakeup) 879 idx = wakeup; 880 881 idx &= buf->nr_pages - 1; 882 buf->intr_pos = idx; 883 884 buf->topa_index[buf->stop_pos]->stop = 1; 885 buf->topa_index[buf->stop_pos]->intr = 1; 886 buf->topa_index[buf->intr_pos]->intr = 1; 887 888 return 0; 889 } 890 891 /** 892 * pt_buffer_setup_topa_index() - build topa_index[] table of regions 893 * @buf: PT buffer. 894 * 895 * topa_index[] references output regions indexed by offset into the 896 * buffer for purposes of quick reverse lookup. 897 */ 898 static void pt_buffer_setup_topa_index(struct pt_buffer *buf) 899 { 900 struct topa *cur = buf->first, *prev = buf->last; 901 struct topa_entry *te_cur = TOPA_ENTRY(cur, 0), 902 *te_prev = TOPA_ENTRY(prev, prev->last - 1); 903 int pg = 0, idx = 0; 904 905 while (pg < buf->nr_pages) { 906 int tidx; 907 908 /* pages within one topa entry */ 909 for (tidx = 0; tidx < 1 << te_cur->size; tidx++, pg++) 910 buf->topa_index[pg] = te_prev; 911 912 te_prev = te_cur; 913 914 if (idx == cur->last - 1) { 915 /* advance to next topa table */ 916 idx = 0; 917 cur = list_entry(cur->list.next, struct topa, list); 918 } else { 919 idx++; 920 } 921 te_cur = TOPA_ENTRY(cur, idx); 922 } 923 924 } 925 926 /** 927 * pt_buffer_reset_offsets() - adjust buffer's write pointers from aux_head 928 * @buf: PT buffer. 929 * @head: Write pointer (aux_head) from AUX buffer. 930 * 931 * Find the ToPA table and entry corresponding to given @head and set buffer's 932 * "current" pointers accordingly. This is done after we have obtained the 933 * current aux_head position from a successful call to perf_aux_output_begin() 934 * to make sure the hardware is writing to the right place. 935 * 936 * This function modifies buf::{cur,cur_idx,output_off} that will be programmed 937 * into PT msrs when the tracing is enabled and buf::head and buf::data_size, 938 * which are used to determine INT and STOP markers' locations by a subsequent 939 * call to pt_buffer_reset_markers(). 940 */ 941 static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head) 942 { 943 int pg; 944 945 if (buf->snapshot) 946 head &= (buf->nr_pages << PAGE_SHIFT) - 1; 947 948 pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1); 949 pg = pt_topa_next_entry(buf, pg); 950 951 buf->cur = (struct topa *)((unsigned long)buf->topa_index[pg] & PAGE_MASK); 952 buf->cur_idx = ((unsigned long)buf->topa_index[pg] - 953 (unsigned long)buf->cur) / sizeof(struct topa_entry); 954 buf->output_off = head & (sizes(buf->cur->table[buf->cur_idx].size) - 1); 955 956 local64_set(&buf->head, head); 957 local_set(&buf->data_size, 0); 958 } 959 960 /** 961 * pt_buffer_fini_topa() - deallocate ToPA structure of a buffer 962 * @buf: PT buffer. 963 */ 964 static void pt_buffer_fini_topa(struct pt_buffer *buf) 965 { 966 struct topa *topa, *iter; 967 968 list_for_each_entry_safe(topa, iter, &buf->tables, list) { 969 /* 970 * right now, this is in free_aux() path only, so 971 * no need to unlink this table from the list 972 */ 973 topa_free(topa); 974 } 975 } 976 977 /** 978 * pt_buffer_init_topa() - initialize ToPA table for pt buffer 979 * @buf: PT buffer. 980 * @size: Total size of all regions within this ToPA. 981 * @gfp: Allocation flags. 982 */ 983 static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, 984 gfp_t gfp) 985 { 986 struct topa *topa; 987 int err; 988 989 topa = topa_alloc(buf->cpu, gfp); 990 if (!topa) 991 return -ENOMEM; 992 993 topa_insert_table(buf, topa); 994 995 while (buf->nr_pages < nr_pages) { 996 err = topa_insert_pages(buf, gfp); 997 if (err) { 998 pt_buffer_fini_topa(buf); 999 return -ENOMEM; 1000 } 1001 } 1002 1003 pt_buffer_setup_topa_index(buf); 1004 1005 /* link last table to the first one, unless we're double buffering */ 1006 if (pt_cap_get(PT_CAP_topa_multiple_entries)) { 1007 TOPA_ENTRY(buf->last, -1)->base = buf->first->phys >> TOPA_SHIFT; 1008 TOPA_ENTRY(buf->last, -1)->end = 1; 1009 } 1010 1011 pt_topa_dump(buf); 1012 return 0; 1013 } 1014 1015 /** 1016 * pt_buffer_setup_aux() - set up topa tables for a PT buffer 1017 * @cpu: Cpu on which to allocate, -1 means current. 1018 * @pages: Array of pointers to buffer pages passed from perf core. 1019 * @nr_pages: Number of pages in the buffer. 1020 * @snapshot: If this is a snapshot/overwrite counter. 1021 * 1022 * This is a pmu::setup_aux callback that sets up ToPA tables and all the 1023 * bookkeeping for an AUX buffer. 1024 * 1025 * Return: Our private PT buffer structure. 1026 */ 1027 static void * 1028 pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) 1029 { 1030 struct pt_buffer *buf; 1031 int node, ret; 1032 1033 if (!nr_pages) 1034 return NULL; 1035 1036 if (cpu == -1) 1037 cpu = raw_smp_processor_id(); 1038 node = cpu_to_node(cpu); 1039 1040 buf = kzalloc_node(offsetof(struct pt_buffer, topa_index[nr_pages]), 1041 GFP_KERNEL, node); 1042 if (!buf) 1043 return NULL; 1044 1045 buf->cpu = cpu; 1046 buf->snapshot = snapshot; 1047 buf->data_pages = pages; 1048 1049 INIT_LIST_HEAD(&buf->tables); 1050 1051 ret = pt_buffer_init_topa(buf, nr_pages, GFP_KERNEL); 1052 if (ret) { 1053 kfree(buf); 1054 return NULL; 1055 } 1056 1057 return buf; 1058 } 1059 1060 /** 1061 * pt_buffer_free_aux() - perf AUX deallocation path callback 1062 * @data: PT buffer. 1063 */ 1064 static void pt_buffer_free_aux(void *data) 1065 { 1066 struct pt_buffer *buf = data; 1067 1068 pt_buffer_fini_topa(buf); 1069 kfree(buf); 1070 } 1071 1072 static int pt_addr_filters_init(struct perf_event *event) 1073 { 1074 struct pt_filters *filters; 1075 int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu); 1076 1077 if (!pt_cap_get(PT_CAP_num_address_ranges)) 1078 return 0; 1079 1080 filters = kzalloc_node(sizeof(struct pt_filters), GFP_KERNEL, node); 1081 if (!filters) 1082 return -ENOMEM; 1083 1084 if (event->parent) 1085 memcpy(filters, event->parent->hw.addr_filters, 1086 sizeof(*filters)); 1087 1088 event->hw.addr_filters = filters; 1089 1090 return 0; 1091 } 1092 1093 static void pt_addr_filters_fini(struct perf_event *event) 1094 { 1095 kfree(event->hw.addr_filters); 1096 event->hw.addr_filters = NULL; 1097 } 1098 1099 static inline bool valid_kernel_ip(unsigned long ip) 1100 { 1101 return virt_addr_valid(ip) && kernel_ip(ip); 1102 } 1103 1104 static int pt_event_addr_filters_validate(struct list_head *filters) 1105 { 1106 struct perf_addr_filter *filter; 1107 int range = 0; 1108 1109 list_for_each_entry(filter, filters, entry) { 1110 /* PT doesn't support single address triggers */ 1111 if (!filter->range || !filter->size) 1112 return -EOPNOTSUPP; 1113 1114 if (!filter->inode) { 1115 if (!valid_kernel_ip(filter->offset)) 1116 return -EINVAL; 1117 1118 if (!valid_kernel_ip(filter->offset + filter->size)) 1119 return -EINVAL; 1120 } 1121 1122 if (++range > pt_cap_get(PT_CAP_num_address_ranges)) 1123 return -EOPNOTSUPP; 1124 } 1125 1126 return 0; 1127 } 1128 1129 static void pt_event_addr_filters_sync(struct perf_event *event) 1130 { 1131 struct perf_addr_filters_head *head = perf_event_addr_filters(event); 1132 unsigned long msr_a, msr_b, *offs = event->addr_filters_offs; 1133 struct pt_filters *filters = event->hw.addr_filters; 1134 struct perf_addr_filter *filter; 1135 int range = 0; 1136 1137 if (!filters) 1138 return; 1139 1140 list_for_each_entry(filter, &head->list, entry) { 1141 if (filter->inode && !offs[range]) { 1142 msr_a = msr_b = 0; 1143 } else { 1144 /* apply the offset */ 1145 msr_a = filter->offset + offs[range]; 1146 msr_b = filter->size + msr_a - 1; 1147 } 1148 1149 filters->filter[range].msr_a = msr_a; 1150 filters->filter[range].msr_b = msr_b; 1151 filters->filter[range].config = filter->filter ? 1 : 2; 1152 range++; 1153 } 1154 1155 filters->nr_filters = range; 1156 } 1157 1158 /** 1159 * intel_pt_interrupt() - PT PMI handler 1160 */ 1161 void intel_pt_interrupt(void) 1162 { 1163 struct pt *pt = this_cpu_ptr(&pt_ctx); 1164 struct pt_buffer *buf; 1165 struct perf_event *event = pt->handle.event; 1166 1167 /* 1168 * There may be a dangling PT bit in the interrupt status register 1169 * after PT has been disabled by pt_event_stop(). Make sure we don't 1170 * do anything (particularly, re-enable) for this event here. 1171 */ 1172 if (!READ_ONCE(pt->handle_nmi)) 1173 return; 1174 1175 /* 1176 * If VMX is on and PT does not support it, don't touch anything. 1177 */ 1178 if (READ_ONCE(pt->vmx_on)) 1179 return; 1180 1181 if (!event) 1182 return; 1183 1184 pt_config_stop(event); 1185 1186 buf = perf_get_aux(&pt->handle); 1187 if (!buf) 1188 return; 1189 1190 pt_read_offset(buf); 1191 1192 pt_handle_status(pt); 1193 1194 pt_update_head(pt); 1195 1196 perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), 1197 local_xchg(&buf->lost, 0)); 1198 1199 if (!event->hw.state) { 1200 int ret; 1201 1202 buf = perf_aux_output_begin(&pt->handle, event); 1203 if (!buf) { 1204 event->hw.state = PERF_HES_STOPPED; 1205 return; 1206 } 1207 1208 pt_buffer_reset_offsets(buf, pt->handle.head); 1209 /* snapshot counters don't use PMI, so it's safe */ 1210 ret = pt_buffer_reset_markers(buf, &pt->handle); 1211 if (ret) { 1212 perf_aux_output_end(&pt->handle, 0, true); 1213 return; 1214 } 1215 1216 pt_config_buffer(buf->cur->table, buf->cur_idx, 1217 buf->output_off); 1218 pt_config(event); 1219 } 1220 } 1221 1222 void intel_pt_handle_vmx(int on) 1223 { 1224 struct pt *pt = this_cpu_ptr(&pt_ctx); 1225 struct perf_event *event; 1226 unsigned long flags; 1227 1228 /* PT plays nice with VMX, do nothing */ 1229 if (pt_pmu.vmx) 1230 return; 1231 1232 /* 1233 * VMXON will clear RTIT_CTL.TraceEn; we need to make 1234 * sure to not try to set it while VMX is on. Disable 1235 * interrupts to avoid racing with pmu callbacks; 1236 * concurrent PMI should be handled fine. 1237 */ 1238 local_irq_save(flags); 1239 WRITE_ONCE(pt->vmx_on, on); 1240 1241 if (on) { 1242 /* prevent pt_config_stop() from writing RTIT_CTL */ 1243 event = pt->handle.event; 1244 if (event) 1245 event->hw.config = 0; 1246 } 1247 local_irq_restore(flags); 1248 } 1249 EXPORT_SYMBOL_GPL(intel_pt_handle_vmx); 1250 1251 /* 1252 * PMU callbacks 1253 */ 1254 1255 static void pt_event_start(struct perf_event *event, int mode) 1256 { 1257 struct hw_perf_event *hwc = &event->hw; 1258 struct pt *pt = this_cpu_ptr(&pt_ctx); 1259 struct pt_buffer *buf; 1260 1261 if (READ_ONCE(pt->vmx_on)) 1262 return; 1263 1264 buf = perf_aux_output_begin(&pt->handle, event); 1265 if (!buf) 1266 goto fail_stop; 1267 1268 pt_buffer_reset_offsets(buf, pt->handle.head); 1269 if (!buf->snapshot) { 1270 if (pt_buffer_reset_markers(buf, &pt->handle)) 1271 goto fail_end_stop; 1272 } 1273 1274 WRITE_ONCE(pt->handle_nmi, 1); 1275 hwc->state = 0; 1276 1277 pt_config_buffer(buf->cur->table, buf->cur_idx, 1278 buf->output_off); 1279 pt_config(event); 1280 1281 return; 1282 1283 fail_end_stop: 1284 perf_aux_output_end(&pt->handle, 0, true); 1285 fail_stop: 1286 hwc->state = PERF_HES_STOPPED; 1287 } 1288 1289 static void pt_event_stop(struct perf_event *event, int mode) 1290 { 1291 struct pt *pt = this_cpu_ptr(&pt_ctx); 1292 1293 /* 1294 * Protect against the PMI racing with disabling wrmsr, 1295 * see comment in intel_pt_interrupt(). 1296 */ 1297 WRITE_ONCE(pt->handle_nmi, 0); 1298 1299 pt_config_stop(event); 1300 1301 if (event->hw.state == PERF_HES_STOPPED) 1302 return; 1303 1304 event->hw.state = PERF_HES_STOPPED; 1305 1306 if (mode & PERF_EF_UPDATE) { 1307 struct pt_buffer *buf = perf_get_aux(&pt->handle); 1308 1309 if (!buf) 1310 return; 1311 1312 if (WARN_ON_ONCE(pt->handle.event != event)) 1313 return; 1314 1315 pt_read_offset(buf); 1316 1317 pt_handle_status(pt); 1318 1319 pt_update_head(pt); 1320 1321 if (buf->snapshot) 1322 pt->handle.head = 1323 local_xchg(&buf->data_size, 1324 buf->nr_pages << PAGE_SHIFT); 1325 perf_aux_output_end(&pt->handle, local_xchg(&buf->data_size, 0), 1326 local_xchg(&buf->lost, 0)); 1327 } 1328 } 1329 1330 static void pt_event_del(struct perf_event *event, int mode) 1331 { 1332 pt_event_stop(event, PERF_EF_UPDATE); 1333 } 1334 1335 static int pt_event_add(struct perf_event *event, int mode) 1336 { 1337 struct pt *pt = this_cpu_ptr(&pt_ctx); 1338 struct hw_perf_event *hwc = &event->hw; 1339 int ret = -EBUSY; 1340 1341 if (pt->handle.event) 1342 goto fail; 1343 1344 if (mode & PERF_EF_START) { 1345 pt_event_start(event, 0); 1346 ret = -EINVAL; 1347 if (hwc->state == PERF_HES_STOPPED) 1348 goto fail; 1349 } else { 1350 hwc->state = PERF_HES_STOPPED; 1351 } 1352 1353 ret = 0; 1354 fail: 1355 1356 return ret; 1357 } 1358 1359 static void pt_event_read(struct perf_event *event) 1360 { 1361 } 1362 1363 static void pt_event_destroy(struct perf_event *event) 1364 { 1365 pt_addr_filters_fini(event); 1366 x86_del_exclusive(x86_lbr_exclusive_pt); 1367 } 1368 1369 static int pt_event_init(struct perf_event *event) 1370 { 1371 if (event->attr.type != pt_pmu.pmu.type) 1372 return -ENOENT; 1373 1374 if (!pt_event_valid(event)) 1375 return -EINVAL; 1376 1377 if (x86_add_exclusive(x86_lbr_exclusive_pt)) 1378 return -EBUSY; 1379 1380 if (pt_addr_filters_init(event)) { 1381 x86_del_exclusive(x86_lbr_exclusive_pt); 1382 return -ENOMEM; 1383 } 1384 1385 event->destroy = pt_event_destroy; 1386 1387 return 0; 1388 } 1389 1390 void cpu_emergency_stop_pt(void) 1391 { 1392 struct pt *pt = this_cpu_ptr(&pt_ctx); 1393 1394 if (pt->handle.event) 1395 pt_event_stop(pt->handle.event, PERF_EF_UPDATE); 1396 } 1397 1398 static __init int pt_init(void) 1399 { 1400 int ret, cpu, prior_warn = 0; 1401 1402 BUILD_BUG_ON(sizeof(struct topa) > PAGE_SIZE); 1403 1404 if (!boot_cpu_has(X86_FEATURE_INTEL_PT)) 1405 return -ENODEV; 1406 1407 get_online_cpus(); 1408 for_each_online_cpu(cpu) { 1409 u64 ctl; 1410 1411 ret = rdmsrl_safe_on_cpu(cpu, MSR_IA32_RTIT_CTL, &ctl); 1412 if (!ret && (ctl & RTIT_CTL_TRACEEN)) 1413 prior_warn++; 1414 } 1415 put_online_cpus(); 1416 1417 if (prior_warn) { 1418 x86_add_exclusive(x86_lbr_exclusive_pt); 1419 pr_warn("PT is enabled at boot time, doing nothing\n"); 1420 1421 return -EBUSY; 1422 } 1423 1424 ret = pt_pmu_hw_init(); 1425 if (ret) 1426 return ret; 1427 1428 if (!pt_cap_get(PT_CAP_topa_output)) { 1429 pr_warn("ToPA output is not supported on this CPU\n"); 1430 return -ENODEV; 1431 } 1432 1433 if (!pt_cap_get(PT_CAP_topa_multiple_entries)) 1434 pt_pmu.pmu.capabilities = 1435 PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF; 1436 1437 pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE; 1438 pt_pmu.pmu.attr_groups = pt_attr_groups; 1439 pt_pmu.pmu.task_ctx_nr = perf_sw_context; 1440 pt_pmu.pmu.event_init = pt_event_init; 1441 pt_pmu.pmu.add = pt_event_add; 1442 pt_pmu.pmu.del = pt_event_del; 1443 pt_pmu.pmu.start = pt_event_start; 1444 pt_pmu.pmu.stop = pt_event_stop; 1445 pt_pmu.pmu.read = pt_event_read; 1446 pt_pmu.pmu.setup_aux = pt_buffer_setup_aux; 1447 pt_pmu.pmu.free_aux = pt_buffer_free_aux; 1448 pt_pmu.pmu.addr_filters_sync = pt_event_addr_filters_sync; 1449 pt_pmu.pmu.addr_filters_validate = pt_event_addr_filters_validate; 1450 pt_pmu.pmu.nr_addr_filters = 1451 pt_cap_get(PT_CAP_num_address_ranges); 1452 1453 ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1); 1454 1455 return ret; 1456 } 1457 arch_initcall(pt_init); 1458