1 /* 2 * Performance events - AMD IBS 3 * 4 * Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter 5 * 6 * For licencing details see kernel-base/COPYING 7 */ 8 9 #include <linux/perf_event.h> 10 #include <linux/init.h> 11 #include <linux/export.h> 12 #include <linux/pci.h> 13 #include <linux/ptrace.h> 14 #include <linux/syscore_ops.h> 15 #include <linux/sched/clock.h> 16 17 #include <asm/apic.h> 18 #include <asm/msr.h> 19 20 #include "../perf_event.h" 21 22 static u32 ibs_caps; 23 24 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) 25 26 #include <linux/kprobes.h> 27 #include <linux/hardirq.h> 28 29 #include <asm/nmi.h> 30 #include <asm/amd/ibs.h> 31 32 /* attr.config2 */ 33 #define IBS_SW_FILTER_MASK 1 34 35 /* attr.config1 */ 36 #define IBS_OP_CONFIG1_LDLAT_MASK (0xFFFULL << 0) 37 #define IBS_OP_CONFIG1_STRMST_MASK (1ULL << 12) 38 #define IBS_OP_CONFIG1_STRMST_SHIFT (12) 39 40 #define IBS_FETCH_CONFIG1_FETCHLAT_MASK (0x7FFULL << 0) 41 42 /* 43 * IBS states: 44 * 45 * ENABLED; tracks the pmu::add(), pmu::del() state, when set the counter is taken 46 * and any further add()s must fail. 47 * 48 * STARTED/STOPPING/STOPPED; deal with pmu::start(), pmu::stop() state but are 49 * complicated by the fact that the IBS hardware can send late NMIs (ie. after 50 * we've cleared the EN bit). 51 * 52 * In order to consume these late NMIs we have the STOPPED state, any NMI that 53 * happens after we've cleared the EN state will clear this bit and report the 54 * NMI handled (this is fundamentally racy in the face or multiple NMI sources, 55 * someone else can consume our BIT and our NMI will go unhandled). 56 * 57 * And since we cannot set/clear this separate bit together with the EN bit, 58 * there are races; if we cleared STARTED early, an NMI could land in 59 * between clearing STARTED and clearing the EN bit (in fact multiple NMIs 60 * could happen if the period is small enough), and consume our STOPPED bit 61 * and trigger streams of unhandled NMIs. 62 * 63 * If, however, we clear STARTED late, an NMI can hit between clearing the 64 * EN bit and clearing STARTED, still see STARTED set and process the event. 65 * If this event will have the VALID bit clear, we bail properly, but this 66 * is not a given. With VALID set we can end up calling pmu::stop() again 67 * (the throttle logic) and trigger the WARNs in there. 68 * 69 * So what we do is set STOPPING before clearing EN to avoid the pmu::stop() 70 * nesting, and clear STARTED late, so that we have a well defined state over 71 * the clearing of the EN bit. 72 * 73 * XXX: we could probably be using !atomic bitops for all this. 74 */ 75 76 enum ibs_states { 77 IBS_ENABLED = 0, 78 IBS_STARTED = 1, 79 IBS_STOPPING = 2, 80 IBS_STOPPED = 3, 81 82 IBS_MAX_STATES, 83 }; 84 85 struct cpu_perf_ibs { 86 struct perf_event *event; 87 unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; 88 }; 89 90 struct perf_ibs { 91 struct pmu pmu; 92 unsigned int msr; 93 unsigned int msr2; 94 u64 config_mask; 95 u64 cnt_mask; 96 u64 enable_mask; 97 u64 disable_mask; 98 u64 valid_mask; 99 u16 min_period; 100 u64 max_period; 101 unsigned long offset_mask[1]; 102 int offset_max; 103 unsigned int fetch_count_reset_broken : 1; 104 unsigned int fetch_ignore_if_zero_rip : 1; 105 struct cpu_perf_ibs __percpu *pcpu; 106 107 u64 (*get_count)(u64 config); 108 }; 109 110 static int 111 perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) 112 { 113 s64 left = local64_read(&hwc->period_left); 114 s64 period = hwc->sample_period; 115 int overflow = 0; 116 117 /* 118 * If we are way outside a reasonable range then just skip forward: 119 */ 120 if (unlikely(left <= -period)) { 121 left = period; 122 local64_set(&hwc->period_left, left); 123 hwc->last_period = period; 124 overflow = 1; 125 } 126 127 if (unlikely(left < (s64)min)) { 128 left += period; 129 local64_set(&hwc->period_left, left); 130 hwc->last_period = period; 131 overflow = 1; 132 } 133 134 /* 135 * If the hw period that triggers the sw overflow is too short 136 * we might hit the irq handler. This biases the results. 137 * Thus we shorten the next-to-last period and set the last 138 * period to the max period. 139 */ 140 if (left > max) { 141 left -= max; 142 if (left > max) 143 left = max; 144 else if (left < min) 145 left = min; 146 } 147 148 *hw_period = (u64)left; 149 150 return overflow; 151 } 152 153 static int 154 perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) 155 { 156 struct hw_perf_event *hwc = &event->hw; 157 int shift = 64 - width; 158 u64 prev_raw_count; 159 u64 delta; 160 161 /* 162 * Careful: an NMI might modify the previous event value. 163 * 164 * Our tactic to handle this is to first atomically read and 165 * exchange a new raw count - then add that new-prev delta 166 * count to the generic event atomically: 167 */ 168 prev_raw_count = local64_read(&hwc->prev_count); 169 if (!local64_try_cmpxchg(&hwc->prev_count, 170 &prev_raw_count, new_raw_count)) 171 return 0; 172 173 /* 174 * Now we have the new raw value and have updated the prev 175 * timestamp already. We can now calculate the elapsed delta 176 * (event-)time and add that to the generic event. 177 * 178 * Careful, not all hw sign-extends above the physical width 179 * of the count. 180 */ 181 delta = (new_raw_count << shift) - (prev_raw_count << shift); 182 delta >>= shift; 183 184 local64_add(delta, &event->count); 185 local64_sub(delta, &hwc->period_left); 186 187 return 1; 188 } 189 190 static struct perf_ibs perf_ibs_fetch; 191 static struct perf_ibs perf_ibs_op; 192 193 static struct perf_ibs *get_ibs_pmu(int type) 194 { 195 if (perf_ibs_fetch.pmu.type == type) 196 return &perf_ibs_fetch; 197 if (perf_ibs_op.pmu.type == type) 198 return &perf_ibs_op; 199 return NULL; 200 } 201 202 /* 203 * core pmu config -> IBS config 204 * 205 * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count 206 * perf record -a -e r076:p ... # same as -e cpu-cycles:p 207 * perf record -a -e r0C1:p ... # use ibs op counting micro-ops 208 * 209 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, 210 * MSRC001_1033) is used to select either cycle or micro-ops counting 211 * mode. 212 */ 213 static int core_pmu_ibs_config(struct perf_event *event, u64 *config) 214 { 215 switch (event->attr.type) { 216 case PERF_TYPE_HARDWARE: 217 switch (event->attr.config) { 218 case PERF_COUNT_HW_CPU_CYCLES: 219 *config = 0; 220 return 0; 221 } 222 break; 223 case PERF_TYPE_RAW: 224 switch (event->attr.config) { 225 case 0x0076: 226 *config = 0; 227 return 0; 228 case 0x00C1: 229 *config = IBS_OP_CNT_CTL; 230 return 0; 231 } 232 break; 233 default: 234 return -ENOENT; 235 } 236 237 return -EOPNOTSUPP; 238 } 239 240 /* 241 * The rip of IBS samples has skid 0. Thus, IBS supports precise 242 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the 243 * rip is invalid when IBS was not able to record the rip correctly. 244 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. 245 */ 246 int forward_event_to_ibs(struct perf_event *event) 247 { 248 u64 config = 0; 249 250 if (!event->attr.precise_ip || event->attr.precise_ip > 2) 251 return -EOPNOTSUPP; 252 253 if (!core_pmu_ibs_config(event, &config)) { 254 event->attr.type = perf_ibs_op.pmu.type; 255 event->attr.config = config; 256 } 257 return -ENOENT; 258 } 259 260 /* 261 * Grouping of IBS events is not possible since IBS can have only 262 * one event active at any point in time. 263 */ 264 static int validate_group(struct perf_event *event) 265 { 266 struct perf_event *sibling; 267 268 if (event->group_leader == event) 269 return 0; 270 271 if (event->group_leader->pmu == event->pmu) 272 return -EINVAL; 273 274 for_each_sibling_event(sibling, event->group_leader) { 275 if (sibling->pmu == event->pmu) 276 return -EINVAL; 277 } 278 return 0; 279 } 280 281 static bool perf_ibs_ldlat_event(struct perf_ibs *perf_ibs, 282 struct perf_event *event) 283 { 284 return perf_ibs == &perf_ibs_op && 285 (ibs_caps & IBS_CAPS_OPLDLAT) && 286 (event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK); 287 } 288 289 static bool perf_ibs_fetch_lat_event(struct perf_ibs *perf_ibs, 290 struct perf_event *event) 291 { 292 return perf_ibs == &perf_ibs_fetch && 293 (ibs_caps & IBS_CAPS_FETCHLAT) && 294 (event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK); 295 } 296 297 static bool perf_ibs_strmst_event(struct perf_ibs *perf_ibs, 298 struct perf_event *event) 299 { 300 return perf_ibs == &perf_ibs_op && 301 (ibs_caps & IBS_CAPS_STRMST_RMTSOCKET) && 302 (event->attr.config1 & IBS_OP_CONFIG1_STRMST_MASK); 303 } 304 305 static int perf_ibs_init(struct perf_event *event) 306 { 307 struct hw_perf_event *hwc = &event->hw; 308 struct perf_ibs *perf_ibs; 309 u64 config; 310 int ret; 311 312 perf_ibs = get_ibs_pmu(event->attr.type); 313 if (!perf_ibs) 314 return -ENOENT; 315 316 config = event->attr.config; 317 hwc->extra_reg.config = 0; 318 hwc->extra_reg.reg = 0; 319 320 if (event->pmu != &perf_ibs->pmu) 321 return -ENOENT; 322 323 if (config & ~perf_ibs->config_mask) 324 return -EINVAL; 325 326 if (has_branch_stack(event)) 327 return -EOPNOTSUPP; 328 329 /* handle exclude_{user,kernel} in the IRQ handler */ 330 if (event->attr.exclude_host || event->attr.exclude_guest || 331 event->attr.exclude_idle) 332 return -EINVAL; 333 334 ret = validate_group(event); 335 if (ret) 336 return ret; 337 338 if (perf_allow_kernel()) 339 hwc->flags |= PERF_X86_EVENT_UNPRIVILEGED; 340 341 if (ibs_caps & IBS_CAPS_DIS) { 342 hwc->extra_reg.config &= ~perf_ibs->disable_mask; 343 hwc->extra_reg.reg = perf_ibs->msr2; 344 } 345 346 if (ibs_caps & IBS_CAPS_BIT63_FILTER) { 347 if (perf_ibs == &perf_ibs_fetch) { 348 if (event->attr.exclude_kernel) { 349 hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_1; 350 hwc->extra_reg.reg = perf_ibs->msr2; 351 } 352 if (event->attr.exclude_user) { 353 hwc->extra_reg.config |= IBS_FETCH_2_EXCL_RIP_63_EQ_0; 354 hwc->extra_reg.reg = perf_ibs->msr2; 355 } 356 } else { 357 if (event->attr.exclude_kernel) { 358 hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_1; 359 hwc->extra_reg.reg = perf_ibs->msr2; 360 } 361 if (event->attr.exclude_user) { 362 hwc->extra_reg.config |= IBS_OP_2_EXCL_RIP_63_EQ_0; 363 hwc->extra_reg.reg = perf_ibs->msr2; 364 } 365 } 366 } else if (!(event->attr.config2 & IBS_SW_FILTER_MASK) && 367 (event->attr.exclude_kernel || event->attr.exclude_user || 368 event->attr.exclude_hv)) { 369 return -EINVAL; 370 } 371 372 if (hwc->sample_period) { 373 if (config & perf_ibs->cnt_mask) 374 /* raw max_cnt may not be set */ 375 return -EINVAL; 376 377 if (event->attr.freq) { 378 hwc->sample_period = perf_ibs->min_period; 379 } else { 380 /* Silently mask off lower nibble. IBS hw mandates it. */ 381 hwc->sample_period &= ~0x0FULL; 382 if (hwc->sample_period < perf_ibs->min_period) 383 return -EINVAL; 384 } 385 } else { 386 u64 period = 0; 387 388 if (event->attr.freq) 389 return -EINVAL; 390 391 if (perf_ibs == &perf_ibs_op) { 392 period = (config & IBS_OP_MAX_CNT) << 4; 393 if (ibs_caps & IBS_CAPS_OPCNTEXT) 394 period |= config & IBS_OP_MAX_CNT_EXT_MASK; 395 } else { 396 period = (config & IBS_FETCH_MAX_CNT) << 4; 397 } 398 399 config &= ~perf_ibs->cnt_mask; 400 event->attr.sample_period = period; 401 hwc->sample_period = period; 402 403 if (hwc->sample_period < perf_ibs->min_period) 404 return -EINVAL; 405 } 406 407 if (perf_ibs_ldlat_event(perf_ibs, event)) { 408 u64 ldlat = event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK; 409 410 if (ldlat < 128 || ldlat > 2048) 411 return -EINVAL; 412 ldlat >>= 7; 413 414 config |= (ldlat - 1) << IBS_OP_LDLAT_THRSH_SHIFT; 415 416 config |= IBS_OP_LDLAT_EN; 417 if (cpu_feature_enabled(X86_FEATURE_ZEN5)) 418 config |= IBS_OP_L3MISSONLY; 419 } 420 421 if (perf_ibs_fetch_lat_event(perf_ibs, event)) { 422 u64 fetchlat = event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK; 423 424 if (fetchlat < 128 || fetchlat > 1920) 425 return -EINVAL; 426 fetchlat >>= 7; 427 428 hwc->extra_reg.reg = perf_ibs->msr2; 429 hwc->extra_reg.config |= fetchlat << IBS_FETCH_2_FETCHLAT_FILTER_SHIFT; 430 } 431 432 if (perf_ibs_strmst_event(perf_ibs, event)) { 433 u64 strmst = event->attr.config1 & IBS_OP_CONFIG1_STRMST_MASK; 434 435 strmst >>= IBS_OP_CONFIG1_STRMST_SHIFT; 436 437 hwc->extra_reg.reg = perf_ibs->msr2; 438 hwc->extra_reg.config |= strmst << IBS_OP_2_STRM_ST_FILTER_SHIFT; 439 } 440 441 /* 442 * If we modify hwc->sample_period, we also need to update 443 * hwc->last_period and hwc->period_left. 444 */ 445 hwc->last_period = hwc->sample_period; 446 local64_set(&hwc->period_left, hwc->sample_period); 447 448 hwc->config_base = perf_ibs->msr; 449 hwc->config = config; 450 451 return 0; 452 } 453 454 static int perf_ibs_set_period(struct perf_ibs *perf_ibs, 455 struct hw_perf_event *hwc, u64 *period) 456 { 457 int overflow; 458 459 /* ignore lower 4 bits in min count: */ 460 overflow = perf_event_set_period(hwc, perf_ibs->min_period, 461 perf_ibs->max_period, period); 462 local64_set(&hwc->prev_count, 0); 463 464 return overflow; 465 } 466 467 static u64 get_ibs_fetch_count(u64 config) 468 { 469 union ibs_fetch_ctl fetch_ctl = (union ibs_fetch_ctl)config; 470 471 return fetch_ctl.fetch_cnt << 4; 472 } 473 474 static u64 get_ibs_op_count(u64 config) 475 { 476 union ibs_op_ctl op_ctl = (union ibs_op_ctl)config; 477 u64 count = 0; 478 479 /* 480 * If the internal 27-bit counter rolled over, the count is MaxCnt 481 * and the lower 7 bits of CurCnt are randomized. 482 * Otherwise CurCnt has the full 27-bit current counter value. 483 */ 484 if (op_ctl.op_val) { 485 count = op_ctl.opmaxcnt << 4; 486 if (ibs_caps & IBS_CAPS_OPCNTEXT) 487 count += op_ctl.opmaxcnt_ext << 20; 488 } else if (ibs_caps & IBS_CAPS_RDWROPCNT) { 489 count = op_ctl.opcurcnt; 490 } 491 492 return count; 493 } 494 495 static void 496 perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, 497 u64 *config) 498 { 499 u64 count = perf_ibs->get_count(*config); 500 501 /* 502 * Set width to 64 since we do not overflow on max width but 503 * instead on max count. In perf_ibs_set_period() we clear 504 * prev count manually on overflow. 505 */ 506 while (!perf_event_try_update(event, count, 64)) { 507 rdmsrq(event->hw.config_base, *config); 508 count = perf_ibs->get_count(*config); 509 } 510 } 511 512 static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, 513 struct hw_perf_event *hwc, u64 config) 514 { 515 u64 tmp = hwc->config | config; 516 517 if (perf_ibs->fetch_count_reset_broken) 518 wrmsrq(hwc->config_base, tmp & ~perf_ibs->enable_mask); 519 520 wrmsrq(hwc->config_base, tmp | perf_ibs->enable_mask); 521 522 if (hwc->extra_reg.reg) 523 wrmsrq(hwc->extra_reg.reg, hwc->extra_reg.config); 524 } 525 526 /* 527 * Erratum #420 Instruction-Based Sampling Engine May Generate 528 * Interrupt that Cannot Be Cleared: 529 * 530 * Must clear counter mask first, then clear the enable bit. See 531 * Revision Guide for AMD Family 10h Processors, Publication #41322. 532 */ 533 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, 534 struct hw_perf_event *hwc, u64 config) 535 { 536 if (ibs_caps & IBS_CAPS_DIS) { 537 wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask); 538 return; 539 } 540 541 config &= ~perf_ibs->cnt_mask; 542 if (boot_cpu_data.x86 == 0x10) 543 wrmsrq(hwc->config_base, config); 544 config &= ~perf_ibs->enable_mask; 545 wrmsrq(hwc->config_base, config); 546 } 547 548 /* 549 * We cannot restore the ibs pmu state, so we always needs to update 550 * the event while stopping it and then reset the state when starting 551 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in 552 * perf_ibs_start()/perf_ibs_stop() and instead always do it. 553 */ 554 static void perf_ibs_start(struct perf_event *event, int flags) 555 { 556 struct hw_perf_event *hwc = &event->hw; 557 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 558 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 559 u64 period, config = 0; 560 561 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 562 return; 563 564 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 565 hwc->state = 0; 566 567 if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) 568 hwc->sample_period = perf_ibs->min_period; 569 570 perf_ibs_set_period(perf_ibs, hwc, &period); 571 if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) { 572 config |= period & IBS_OP_MAX_CNT_EXT_MASK; 573 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 574 } 575 config |= period >> 4; 576 577 /* 578 * Reset the IBS_{FETCH|OP}_CTL MSR before updating pcpu->state. 579 * Doing so prevents a race condition in which an NMI due to other 580 * source might accidentally activate the event before we enable 581 * it ourselves. 582 */ 583 perf_ibs_disable_event(perf_ibs, hwc, 0); 584 585 /* 586 * Set STARTED before enabling the hardware, such that a subsequent NMI 587 * must observe it. 588 */ 589 set_bit(IBS_STARTED, pcpu->state); 590 clear_bit(IBS_STOPPING, pcpu->state); 591 perf_ibs_enable_event(perf_ibs, hwc, config); 592 593 perf_event_update_userpage(event); 594 } 595 596 static void perf_ibs_stop(struct perf_event *event, int flags) 597 { 598 struct hw_perf_event *hwc = &event->hw; 599 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 600 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 601 u64 config; 602 int stopping; 603 604 if (test_and_set_bit(IBS_STOPPING, pcpu->state)) 605 return; 606 607 stopping = test_bit(IBS_STARTED, pcpu->state); 608 609 if (!stopping && (hwc->state & PERF_HES_UPTODATE)) 610 return; 611 612 rdmsrq(hwc->config_base, config); 613 614 if (stopping) { 615 /* 616 * Set STOPPED before disabling the hardware, such that it 617 * must be visible to NMIs the moment we clear the EN bit, 618 * at which point we can generate an !VALID sample which 619 * we need to consume. 620 */ 621 set_bit(IBS_STOPPED, pcpu->state); 622 perf_ibs_disable_event(perf_ibs, hwc, config); 623 /* 624 * Clear STARTED after disabling the hardware; if it were 625 * cleared before an NMI hitting after the clear but before 626 * clearing the EN bit might think it a spurious NMI and not 627 * handle it. 628 * 629 * Clearing it after, however, creates the problem of the NMI 630 * handler seeing STARTED but not having a valid sample. 631 */ 632 clear_bit(IBS_STARTED, pcpu->state); 633 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 634 hwc->state |= PERF_HES_STOPPED; 635 } 636 637 if (hwc->state & PERF_HES_UPTODATE) 638 return; 639 640 /* 641 * Clear valid bit to not count rollovers on update, rollovers 642 * are only updated in the irq handler. 643 */ 644 config &= ~perf_ibs->valid_mask; 645 646 perf_ibs_event_update(perf_ibs, event, &config); 647 hwc->state |= PERF_HES_UPTODATE; 648 } 649 650 static int perf_ibs_add(struct perf_event *event, int flags) 651 { 652 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 653 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 654 655 if (test_and_set_bit(IBS_ENABLED, pcpu->state)) 656 return -ENOSPC; 657 658 event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 659 660 pcpu->event = event; 661 662 if (flags & PERF_EF_START) 663 perf_ibs_start(event, PERF_EF_RELOAD); 664 665 return 0; 666 } 667 668 static void perf_ibs_del(struct perf_event *event, int flags) 669 { 670 struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 671 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 672 673 if (!test_and_clear_bit(IBS_ENABLED, pcpu->state)) 674 return; 675 676 perf_ibs_stop(event, PERF_EF_UPDATE); 677 678 pcpu->event = NULL; 679 680 perf_event_update_userpage(event); 681 } 682 683 static void perf_ibs_read(struct perf_event *event) { } 684 685 static int perf_ibs_check_period(struct perf_event *event, u64 value) 686 { 687 struct perf_ibs *perf_ibs; 688 u64 low_nibble; 689 690 if (event->attr.freq) 691 return 0; 692 693 perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); 694 low_nibble = value & 0xFULL; 695 696 /* 697 * This contradicts with perf_ibs_init() which allows sample period 698 * with lower nibble bits set but silently masks them off. Whereas 699 * this returns error. 700 */ 701 if (low_nibble || value < perf_ibs->min_period) 702 return -EINVAL; 703 704 return 0; 705 } 706 707 /* 708 * We need to initialize with empty group if all attributes in the 709 * group are dynamic. 710 */ 711 static struct attribute *attrs_empty[] = { 712 NULL, 713 }; 714 715 static struct attribute_group empty_caps_group = { 716 .name = "caps", 717 .attrs = attrs_empty, 718 }; 719 720 PMU_FORMAT_ATTR(rand_en, "config:57"); 721 PMU_FORMAT_ATTR(cnt_ctl, "config:19"); 722 PMU_FORMAT_ATTR(swfilt, "config2:0"); 723 PMU_EVENT_ATTR_STRING(l3missonly, fetch_l3missonly, "config:59"); 724 PMU_EVENT_ATTR_STRING(l3missonly, op_l3missonly, "config:16"); 725 PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_format, "config1:0-11"); 726 PMU_EVENT_ATTR_STRING(zen4_ibs_extensions, zen4_ibs_extensions, "1"); 727 PMU_EVENT_ATTR_STRING(ldlat, ibs_op_ldlat_cap, "1"); 728 PMU_EVENT_ATTR_STRING(dtlb_pgsize, ibs_op_dtlb_pgsize_cap, "1"); 729 PMU_EVENT_ATTR_STRING(fetchlat, ibs_fetch_lat_format, "config1:0-10"); 730 PMU_EVENT_ATTR_STRING(fetchlat, ibs_fetch_lat_cap, "1"); 731 PMU_EVENT_ATTR_STRING(strmst, ibs_op_strmst_format, "config1:12"); 732 PMU_EVENT_ATTR_STRING(strmst, ibs_op_strmst_cap, "1"); 733 PMU_EVENT_ATTR_STRING(rmtsocket, ibs_op_rmtsocket_cap, "1"); 734 735 static umode_t 736 zen4_ibs_extensions_is_visible(struct kobject *kobj, struct attribute *attr, int i) 737 { 738 return ibs_caps & IBS_CAPS_ZEN4 ? attr->mode : 0; 739 } 740 741 static umode_t 742 ibs_fetch_lat_is_visible(struct kobject *kobj, struct attribute *attr, int i) 743 { 744 return ibs_caps & IBS_CAPS_FETCHLAT ? attr->mode : 0; 745 } 746 747 static umode_t 748 ibs_op_strmst_is_visible(struct kobject *kobj, struct attribute *attr, int i) 749 { 750 return ibs_caps & IBS_CAPS_STRMST_RMTSOCKET ? attr->mode : 0; 751 } 752 753 static umode_t 754 ibs_op_rmtsocket_is_visible(struct kobject *kobj, struct attribute *attr, int i) 755 { 756 return ibs_caps & IBS_CAPS_STRMST_RMTSOCKET ? attr->mode : 0; 757 } 758 759 static umode_t 760 ibs_op_ldlat_is_visible(struct kobject *kobj, struct attribute *attr, int i) 761 { 762 return ibs_caps & IBS_CAPS_OPLDLAT ? attr->mode : 0; 763 } 764 765 static umode_t 766 ibs_op_dtlb_pgsize_is_visible(struct kobject *kobj, struct attribute *attr, int i) 767 { 768 return ibs_caps & IBS_CAPS_OPDTLBPGSIZE ? attr->mode : 0; 769 } 770 771 static struct attribute *fetch_attrs[] = { 772 &format_attr_rand_en.attr, 773 &format_attr_swfilt.attr, 774 NULL, 775 }; 776 777 static struct attribute *fetch_l3missonly_attrs[] = { 778 &fetch_l3missonly.attr.attr, 779 NULL, 780 }; 781 782 static struct attribute *zen4_ibs_extensions_attrs[] = { 783 &zen4_ibs_extensions.attr.attr, 784 NULL, 785 }; 786 787 static struct attribute *ibs_fetch_lat_format_attrs[] = { 788 &ibs_fetch_lat_format.attr.attr, 789 NULL, 790 }; 791 792 static struct attribute *ibs_fetch_lat_cap_attrs[] = { 793 &ibs_fetch_lat_cap.attr.attr, 794 NULL, 795 }; 796 797 static struct attribute *ibs_op_ldlat_cap_attrs[] = { 798 &ibs_op_ldlat_cap.attr.attr, 799 NULL, 800 }; 801 802 static struct attribute *ibs_op_dtlb_pgsize_cap_attrs[] = { 803 &ibs_op_dtlb_pgsize_cap.attr.attr, 804 NULL, 805 }; 806 807 static struct attribute *ibs_op_strmst_cap_attrs[] = { 808 &ibs_op_strmst_cap.attr.attr, 809 NULL, 810 }; 811 812 static struct attribute *ibs_op_rmtsocket_cap_attrs[] = { 813 &ibs_op_rmtsocket_cap.attr.attr, 814 NULL, 815 }; 816 817 static struct attribute_group group_fetch_formats = { 818 .name = "format", 819 .attrs = fetch_attrs, 820 }; 821 822 static struct attribute_group group_fetch_l3missonly = { 823 .name = "format", 824 .attrs = fetch_l3missonly_attrs, 825 .is_visible = zen4_ibs_extensions_is_visible, 826 }; 827 828 static struct attribute_group group_zen4_ibs_extensions = { 829 .name = "caps", 830 .attrs = zen4_ibs_extensions_attrs, 831 .is_visible = zen4_ibs_extensions_is_visible, 832 }; 833 834 static struct attribute_group group_ibs_fetch_lat_cap = { 835 .name = "caps", 836 .attrs = ibs_fetch_lat_cap_attrs, 837 .is_visible = ibs_fetch_lat_is_visible, 838 }; 839 840 static struct attribute_group group_ibs_fetch_lat_format = { 841 .name = "format", 842 .attrs = ibs_fetch_lat_format_attrs, 843 .is_visible = ibs_fetch_lat_is_visible, 844 }; 845 846 static struct attribute_group group_ibs_op_ldlat_cap = { 847 .name = "caps", 848 .attrs = ibs_op_ldlat_cap_attrs, 849 .is_visible = ibs_op_ldlat_is_visible, 850 }; 851 852 static struct attribute_group group_ibs_op_dtlb_pgsize_cap = { 853 .name = "caps", 854 .attrs = ibs_op_dtlb_pgsize_cap_attrs, 855 .is_visible = ibs_op_dtlb_pgsize_is_visible, 856 }; 857 858 static struct attribute_group group_ibs_op_strmst_cap = { 859 .name = "caps", 860 .attrs = ibs_op_strmst_cap_attrs, 861 .is_visible = ibs_op_strmst_is_visible, 862 }; 863 864 static struct attribute_group group_ibs_op_rmtsocket_cap = { 865 .name = "caps", 866 .attrs = ibs_op_rmtsocket_cap_attrs, 867 .is_visible = ibs_op_rmtsocket_is_visible, 868 }; 869 870 static const struct attribute_group *fetch_attr_groups[] = { 871 &group_fetch_formats, 872 &empty_caps_group, 873 NULL, 874 }; 875 876 static const struct attribute_group *fetch_attr_update[] = { 877 &group_fetch_l3missonly, 878 &group_zen4_ibs_extensions, 879 &group_ibs_fetch_lat_cap, 880 &group_ibs_fetch_lat_format, 881 NULL, 882 }; 883 884 static umode_t 885 cnt_ctl_is_visible(struct kobject *kobj, struct attribute *attr, int i) 886 { 887 return ibs_caps & IBS_CAPS_OPCNT ? attr->mode : 0; 888 } 889 890 static struct attribute *op_attrs[] = { 891 &format_attr_swfilt.attr, 892 NULL, 893 }; 894 895 static struct attribute *cnt_ctl_attrs[] = { 896 &format_attr_cnt_ctl.attr, 897 NULL, 898 }; 899 900 static struct attribute *op_l3missonly_attrs[] = { 901 &op_l3missonly.attr.attr, 902 NULL, 903 }; 904 905 static struct attribute_group group_op_formats = { 906 .name = "format", 907 .attrs = op_attrs, 908 }; 909 910 static struct attribute *ibs_op_ldlat_format_attrs[] = { 911 &ibs_op_ldlat_format.attr.attr, 912 NULL, 913 }; 914 915 static struct attribute *ibs_op_strmst_format_attrs[] = { 916 &ibs_op_strmst_format.attr.attr, 917 NULL, 918 }; 919 920 static struct attribute_group group_cnt_ctl = { 921 .name = "format", 922 .attrs = cnt_ctl_attrs, 923 .is_visible = cnt_ctl_is_visible, 924 }; 925 926 static struct attribute_group group_op_l3missonly = { 927 .name = "format", 928 .attrs = op_l3missonly_attrs, 929 .is_visible = zen4_ibs_extensions_is_visible, 930 }; 931 932 static const struct attribute_group *op_attr_groups[] = { 933 &group_op_formats, 934 &empty_caps_group, 935 NULL, 936 }; 937 938 static struct attribute_group group_ibs_op_ldlat_format = { 939 .name = "format", 940 .attrs = ibs_op_ldlat_format_attrs, 941 .is_visible = ibs_op_ldlat_is_visible, 942 }; 943 944 static struct attribute_group group_ibs_op_strmst_format = { 945 .name = "format", 946 .attrs = ibs_op_strmst_format_attrs, 947 .is_visible = ibs_op_strmst_is_visible, 948 }; 949 950 static const struct attribute_group *op_attr_update[] = { 951 &group_cnt_ctl, 952 &group_op_l3missonly, 953 &group_zen4_ibs_extensions, 954 &group_ibs_op_ldlat_cap, 955 &group_ibs_op_ldlat_format, 956 &group_ibs_op_dtlb_pgsize_cap, 957 &group_ibs_op_strmst_cap, 958 &group_ibs_op_strmst_format, 959 &group_ibs_op_rmtsocket_cap, 960 NULL, 961 }; 962 963 static struct perf_ibs perf_ibs_fetch = { 964 .pmu = { 965 .task_ctx_nr = perf_hw_context, 966 967 .event_init = perf_ibs_init, 968 .add = perf_ibs_add, 969 .del = perf_ibs_del, 970 .start = perf_ibs_start, 971 .stop = perf_ibs_stop, 972 .read = perf_ibs_read, 973 .check_period = perf_ibs_check_period, 974 }, 975 .msr = MSR_AMD64_IBSFETCHCTL, 976 .msr2 = MSR_AMD64_IBSFETCHCTL2, 977 .config_mask = IBS_FETCH_MAX_CNT | IBS_FETCH_RAND_EN, 978 .cnt_mask = IBS_FETCH_MAX_CNT, 979 .enable_mask = IBS_FETCH_ENABLE, 980 .valid_mask = IBS_FETCH_VAL, 981 .min_period = 0x10, 982 .max_period = IBS_FETCH_MAX_CNT << 4, 983 .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, 984 .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, 985 986 .get_count = get_ibs_fetch_count, 987 }; 988 989 static struct perf_ibs perf_ibs_op = { 990 .pmu = { 991 .task_ctx_nr = perf_hw_context, 992 993 .event_init = perf_ibs_init, 994 .add = perf_ibs_add, 995 .del = perf_ibs_del, 996 .start = perf_ibs_start, 997 .stop = perf_ibs_stop, 998 .read = perf_ibs_read, 999 .check_period = perf_ibs_check_period, 1000 }, 1001 .msr = MSR_AMD64_IBSOPCTL, 1002 .msr2 = MSR_AMD64_IBSOPCTL2, 1003 .config_mask = IBS_OP_MAX_CNT, 1004 .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | 1005 IBS_OP_CUR_CNT_RAND, 1006 .enable_mask = IBS_OP_ENABLE, 1007 .valid_mask = IBS_OP_VAL, 1008 .min_period = 0x90, 1009 .max_period = IBS_OP_MAX_CNT << 4, 1010 .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, 1011 .offset_max = MSR_AMD64_IBSOP_REG_COUNT, 1012 1013 .get_count = get_ibs_op_count, 1014 }; 1015 1016 static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, 1017 struct perf_sample_data *data) 1018 { 1019 union perf_mem_data_src *data_src = &data->data_src; 1020 1021 data_src->mem_op = PERF_MEM_OP_NA; 1022 1023 if (op_data3->ld_op) 1024 data_src->mem_op = PERF_MEM_OP_LOAD; 1025 else if (op_data3->st_op) 1026 data_src->mem_op = PERF_MEM_OP_STORE; 1027 } 1028 1029 /* 1030 * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has 1031 * more fine granular DataSrc encodings. Others have coarse. 1032 */ 1033 static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) 1034 { 1035 if (ibs_caps & IBS_CAPS_ZEN4) 1036 return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; 1037 1038 return op_data2->data_src_lo; 1039 } 1040 1041 #define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT)) 1042 #define LN(x) PERF_MEM_S(LVLNUM, x) 1043 #define REM PERF_MEM_S(REMOTE, REMOTE) 1044 #define HOPS(x) PERF_MEM_S(HOPS, x) 1045 1046 static u64 g_data_src[8] = { 1047 [IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0), 1048 [IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM), 1049 [IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), 1050 [IBS_DATA_SRC_IO] = L(IO) | LN(IO), 1051 }; 1052 1053 #define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM) 1054 #define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x)) 1055 1056 static u64 g_zen4_data_src[32] = { 1057 [IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3), 1058 [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0), 1059 [IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM), 1060 [IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), 1061 [IBS_DATA_SRC_EXT_PMEM] = LN(PMEM), 1062 [IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO), 1063 [IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL), 1064 }; 1065 1066 #define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \ 1067 (1 << IBS_DATA_SRC_EXT_PMEM) | \ 1068 (1 << IBS_DATA_SRC_EXT_EXT_MEM)) 1069 #define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x)) 1070 1071 static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, 1072 union ibs_op_data3 *op_data3, 1073 struct perf_sample_data *data) 1074 { 1075 union perf_mem_data_src *data_src = &data->data_src; 1076 u8 ibs_data_src = perf_ibs_data_src(op_data2); 1077 1078 data_src->mem_lvl = 0; 1079 data_src->mem_lvl_num = 0; 1080 1081 /* 1082 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached 1083 * memory accesses. So, check DcUcMemAcc bit early. 1084 */ 1085 if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) 1086 return L(UNC) | LN(UNC); 1087 1088 /* L1 Hit */ 1089 if (op_data3->dc_miss == 0) 1090 return L(L1) | LN(L1); 1091 1092 /* L2 Hit */ 1093 if (op_data3->l2_miss == 0) { 1094 /* Erratum #1293 */ 1095 if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || 1096 !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) 1097 return L(L2) | LN(L2); 1098 } 1099 1100 /* 1101 * OP_DATA2 is valid only for load ops. Skip all checks which 1102 * uses OP_DATA2[DataSrc]. 1103 */ 1104 if (data_src->mem_op != PERF_MEM_OP_LOAD) 1105 goto check_mab; 1106 1107 if (ibs_caps & IBS_CAPS_ZEN4) { 1108 u64 val = g_zen4_data_src[ibs_data_src]; 1109 1110 if (!val) 1111 goto check_mab; 1112 1113 /* HOPS_1 because IBS doesn't provide remote socket detail */ 1114 if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { 1115 if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) 1116 val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); 1117 else 1118 val |= REM | HOPS(1); 1119 } 1120 1121 return val; 1122 } else { 1123 u64 val = g_data_src[ibs_data_src]; 1124 1125 if (!val) 1126 goto check_mab; 1127 1128 /* HOPS_1 because IBS doesn't provide remote socket detail */ 1129 if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { 1130 if (ibs_data_src == IBS_DATA_SRC_DRAM) 1131 val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); 1132 else 1133 val |= REM | HOPS(1); 1134 } 1135 1136 return val; 1137 } 1138 1139 check_mab: 1140 /* 1141 * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding 1142 * DC misses. However, such data may come from any level in mem 1143 * hierarchy. IBS provides detail about both MAB as well as actual 1144 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set 1145 * MAB only when IBS fails to provide DataSrc. 1146 */ 1147 if (op_data3->dc_miss_no_mab_alloc) 1148 return L(LFB) | LN(LFB); 1149 1150 /* Don't set HIT with NA */ 1151 return PERF_MEM_S(LVL, NA) | LN(NA); 1152 } 1153 1154 static bool perf_ibs_cache_hit_st_valid(void) 1155 { 1156 /* 0: Uninitialized, 1: Valid, -1: Invalid */ 1157 static int cache_hit_st_valid; 1158 1159 if (unlikely(!cache_hit_st_valid)) { 1160 if (boot_cpu_data.x86 == 0x19 && 1161 (boot_cpu_data.x86_model <= 0xF || 1162 (boot_cpu_data.x86_model >= 0x20 && 1163 boot_cpu_data.x86_model <= 0x5F))) { 1164 cache_hit_st_valid = -1; 1165 } else { 1166 cache_hit_st_valid = 1; 1167 } 1168 } 1169 1170 return cache_hit_st_valid == 1; 1171 } 1172 1173 static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, 1174 struct perf_sample_data *data) 1175 { 1176 union perf_mem_data_src *data_src = &data->data_src; 1177 u8 ibs_data_src; 1178 1179 data_src->mem_snoop = PERF_MEM_SNOOP_NA; 1180 1181 if (!perf_ibs_cache_hit_st_valid() || 1182 data_src->mem_op != PERF_MEM_OP_LOAD || 1183 data_src->mem_lvl & PERF_MEM_LVL_L1 || 1184 data_src->mem_lvl & PERF_MEM_LVL_L2 || 1185 op_data2->cache_hit_st) 1186 return; 1187 1188 ibs_data_src = perf_ibs_data_src(op_data2); 1189 1190 if (ibs_caps & IBS_CAPS_ZEN4) { 1191 if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || 1192 ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || 1193 ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) 1194 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 1195 } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { 1196 data_src->mem_snoop = PERF_MEM_SNOOP_HITM; 1197 } 1198 } 1199 1200 static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, 1201 struct perf_sample_data *data) 1202 { 1203 union perf_mem_data_src *data_src = &data->data_src; 1204 1205 data_src->mem_dtlb = PERF_MEM_TLB_NA; 1206 1207 if (!op_data3->dc_lin_addr_valid) 1208 return; 1209 1210 if ((ibs_caps & IBS_CAPS_OPDTLBPGSIZE) && 1211 !op_data3->dc_phy_addr_valid) 1212 return; 1213 1214 if (!op_data3->dc_l1tlb_miss) { 1215 data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; 1216 return; 1217 } 1218 1219 if (!op_data3->dc_l2tlb_miss) { 1220 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; 1221 return; 1222 } 1223 1224 data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; 1225 } 1226 1227 static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, 1228 struct perf_sample_data *data) 1229 { 1230 union perf_mem_data_src *data_src = &data->data_src; 1231 1232 data_src->mem_lock = PERF_MEM_LOCK_NA; 1233 1234 if (op_data3->dc_locked_op) 1235 data_src->mem_lock = PERF_MEM_LOCK_LOCKED; 1236 } 1237 1238 /* Be careful. Works only for contiguous MSRs. */ 1239 #define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL) 1240 #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) 1241 1242 static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, 1243 struct perf_sample_data *data, 1244 union ibs_op_data2 *op_data2, 1245 union ibs_op_data3 *op_data3) 1246 { 1247 union perf_mem_data_src *data_src = &data->data_src; 1248 1249 data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); 1250 perf_ibs_get_mem_snoop(op_data2, data); 1251 perf_ibs_get_tlb_lvl(op_data3, data); 1252 perf_ibs_get_mem_lock(op_data3, data); 1253 } 1254 1255 static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, 1256 union ibs_op_data3 *op_data3) 1257 { 1258 __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; 1259 1260 /* Erratum #1293 */ 1261 if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && 1262 (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { 1263 /* 1264 * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. 1265 * DataSrc=0 is 'No valid status' and RmtNode is invalid when 1266 * DataSrc=0. 1267 */ 1268 val = 0; 1269 } 1270 return val; 1271 } 1272 1273 static void perf_ibs_parse_ld_st_data(__u64 sample_type, 1274 struct perf_ibs_data *ibs_data, 1275 struct perf_sample_data *data) 1276 { 1277 union ibs_op_data3 op_data3; 1278 union ibs_op_data2 op_data2; 1279 union ibs_op_data op_data; 1280 1281 data->data_src.val = PERF_MEM_NA; 1282 op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; 1283 1284 perf_ibs_get_mem_op(&op_data3, data); 1285 if (data->data_src.mem_op != PERF_MEM_OP_LOAD && 1286 data->data_src.mem_op != PERF_MEM_OP_STORE) 1287 return; 1288 1289 op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3); 1290 1291 if (sample_type & PERF_SAMPLE_DATA_SRC) { 1292 perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); 1293 data->sample_flags |= PERF_SAMPLE_DATA_SRC; 1294 } 1295 1296 if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && 1297 data->data_src.mem_op == PERF_MEM_OP_LOAD) { 1298 op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; 1299 1300 if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { 1301 data->weight.var1_dw = op_data3.dc_miss_lat; 1302 data->weight.var2_w = op_data.tag_to_ret_ctr; 1303 } else if (sample_type & PERF_SAMPLE_WEIGHT) { 1304 data->weight.full = op_data3.dc_miss_lat; 1305 } 1306 data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; 1307 } 1308 1309 if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { 1310 data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; 1311 data->sample_flags |= PERF_SAMPLE_ADDR; 1312 } 1313 1314 if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { 1315 data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; 1316 data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; 1317 } 1318 } 1319 1320 static bool perf_ibs_is_mem_sample_type(struct perf_ibs *perf_ibs, 1321 struct perf_event *event) 1322 { 1323 u64 sample_type = event->attr.sample_type; 1324 1325 return perf_ibs == &perf_ibs_op && 1326 sample_type & (PERF_SAMPLE_DATA_SRC | 1327 PERF_SAMPLE_WEIGHT_TYPE | 1328 PERF_SAMPLE_ADDR | 1329 PERF_SAMPLE_PHYS_ADDR); 1330 } 1331 1332 static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, 1333 struct perf_event *event, 1334 int check_rip) 1335 { 1336 if (event->attr.sample_type & PERF_SAMPLE_RAW || 1337 perf_ibs_is_mem_sample_type(perf_ibs, event) || 1338 perf_ibs_ldlat_event(perf_ibs, event) || 1339 perf_ibs_fetch_lat_event(perf_ibs, event)) 1340 return perf_ibs->offset_max; 1341 else if (check_rip) 1342 return 3; 1343 return 1; 1344 } 1345 1346 static bool perf_ibs_is_kernel_data_addr(struct perf_event *event, 1347 struct perf_ibs_data *ibs_data) 1348 { 1349 u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW; 1350 union ibs_op_data3 op_data3; 1351 u64 dc_lin_addr; 1352 1353 op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; 1354 dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; 1355 1356 return unlikely((event->attr.sample_type & sample_type_mask) && 1357 op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr)); 1358 } 1359 1360 static bool perf_ibs_is_kernel_br_target(struct perf_event *event, 1361 struct perf_ibs_data *ibs_data, 1362 int br_target_idx) 1363 { 1364 union ibs_op_data op_data; 1365 u64 br_target; 1366 1367 op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; 1368 br_target = ibs_data->regs[br_target_idx]; 1369 1370 return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && 1371 op_data.op_brn_ret && kernel_ip(br_target)); 1372 } 1373 1374 static bool perf_ibs_discard_sample(struct perf_ibs *perf_ibs, struct perf_event *event, 1375 struct pt_regs *regs, struct perf_ibs_data *ibs_data, 1376 int br_target_idx) 1377 { 1378 if (perf_exclude_event(event, regs)) 1379 return true; 1380 1381 if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) 1382 return false; 1383 1384 if (perf_ibs_is_kernel_data_addr(event, ibs_data)) 1385 return true; 1386 1387 if (br_target_idx != -1 && 1388 perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx)) 1389 return true; 1390 1391 return false; 1392 } 1393 1394 static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs, 1395 struct perf_ibs_data *ibs_data) 1396 { 1397 if (perf_ibs == &perf_ibs_op) { 1398 ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; 1399 return; 1400 } 1401 1402 ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; 1403 } 1404 1405 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) 1406 { 1407 struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); 1408 struct perf_event *event = pcpu->event; 1409 struct hw_perf_event *hwc; 1410 struct perf_sample_data data; 1411 struct perf_raw_record raw; 1412 struct pt_regs regs; 1413 struct perf_ibs_data ibs_data; 1414 int offset, size, check_rip, offset_max, throttle = 0; 1415 unsigned int msr; 1416 u64 *buf, *config, period, new_config = 0; 1417 int br_target_idx = -1; 1418 1419 if (!test_bit(IBS_STARTED, pcpu->state)) { 1420 fail: 1421 /* 1422 * Catch spurious interrupts after stopping IBS: After 1423 * disabling IBS there could be still incoming NMIs 1424 * with samples that even have the valid bit cleared. 1425 * Mark all this NMIs as handled. 1426 */ 1427 if (test_and_clear_bit(IBS_STOPPED, pcpu->state)) 1428 return 1; 1429 1430 return 0; 1431 } 1432 1433 if (WARN_ON_ONCE(!event)) 1434 goto fail; 1435 1436 hwc = &event->hw; 1437 msr = hwc->config_base; 1438 buf = ibs_data.regs; 1439 rdmsrq(msr, *buf); 1440 if (!(*buf++ & perf_ibs->valid_mask)) 1441 goto fail; 1442 1443 config = &ibs_data.regs[0]; 1444 perf_ibs_event_update(perf_ibs, event, config); 1445 perf_sample_data_init(&data, 0, hwc->last_period); 1446 if (!perf_ibs_set_period(perf_ibs, hwc, &period)) 1447 goto out; /* no sw counter overflow */ 1448 1449 ibs_data.caps = ibs_caps; 1450 size = 1; 1451 offset = 1; 1452 check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); 1453 1454 offset_max = perf_ibs_get_offset_max(perf_ibs, event, check_rip); 1455 1456 do { 1457 rdmsrq(msr + offset, *buf++); 1458 size++; 1459 offset = find_next_bit(perf_ibs->offset_mask, 1460 perf_ibs->offset_max, 1461 offset + 1); 1462 } while (offset < offset_max); 1463 1464 if (perf_ibs_ldlat_event(perf_ibs, event)) { 1465 union ibs_op_data3 op_data3; 1466 1467 op_data3.val = ibs_data.regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; 1468 /* 1469 * Opening event is errored out if load latency threshold is 1470 * outside of [128, 2048] range. Since the event has reached 1471 * interrupt handler, we can safely assume the threshold is 1472 * within [128, 2048] range. 1473 */ 1474 if (!op_data3.ld_op || !op_data3.dc_miss || 1475 op_data3.dc_miss_lat <= (event->attr.config1 & IBS_OP_CONFIG1_LDLAT_MASK)) { 1476 throttle = perf_event_account_interrupt(event); 1477 goto out; 1478 } 1479 } 1480 1481 if (perf_ibs_fetch_lat_event(perf_ibs, event)) { 1482 union ibs_fetch_ctl fetch_ctl; 1483 1484 fetch_ctl.val = ibs_data.regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)]; 1485 if (fetch_ctl.fetch_lat < (event->attr.config1 & IBS_FETCH_CONFIG1_FETCHLAT_MASK)) { 1486 throttle = perf_event_account_interrupt(event); 1487 goto out; 1488 } 1489 } 1490 1491 /* 1492 * Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately 1493 * depending on their availability. 1494 * Can't add to offset_max as they are staggered 1495 */ 1496 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 1497 if (perf_ibs == &perf_ibs_op) { 1498 if (ibs_caps & IBS_CAPS_BRNTRGT) { 1499 rdmsrq(MSR_AMD64_IBSBRTARGET, *buf++); 1500 br_target_idx = size; 1501 size++; 1502 } 1503 if (ibs_caps & IBS_CAPS_OPDATA4) { 1504 rdmsrq(MSR_AMD64_IBSOPDATA4, *buf++); 1505 size++; 1506 } 1507 } 1508 if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) { 1509 rdmsrq(MSR_AMD64_ICIBSEXTDCTL, *buf++); 1510 size++; 1511 } 1512 } 1513 ibs_data.size = sizeof(u64) * size; 1514 1515 regs = *iregs; 1516 if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) { 1517 regs.flags &= ~PERF_EFLAGS_EXACT; 1518 } else { 1519 /* Workaround for erratum #1197 */ 1520 if (perf_ibs->fetch_ignore_if_zero_rip && !(ibs_data.regs[1])) { 1521 throttle = perf_event_account_interrupt(event); 1522 goto out; 1523 } 1524 1525 set_linear_ip(®s, ibs_data.regs[1]); 1526 regs.flags |= PERF_EFLAGS_EXACT; 1527 } 1528 1529 if (((ibs_caps & IBS_CAPS_BIT63_FILTER) || 1530 (event->attr.config2 & IBS_SW_FILTER_MASK)) && 1531 perf_ibs_discard_sample(perf_ibs, event, ®s, &ibs_data, br_target_idx)) { 1532 throttle = perf_event_account_interrupt(event); 1533 goto out; 1534 } 1535 /* 1536 * Prevent leaking physical addresses to unprivileged users. Skip 1537 * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for 1538 * unprivileged users. 1539 */ 1540 if ((event->attr.sample_type & PERF_SAMPLE_RAW) && 1541 (hwc->flags & PERF_X86_EVENT_UNPRIVILEGED)) { 1542 perf_ibs_phyaddr_clear(perf_ibs, &ibs_data); 1543 } 1544 1545 if (event->attr.sample_type & PERF_SAMPLE_RAW) { 1546 raw = (struct perf_raw_record){ 1547 .frag = { 1548 .size = sizeof(u32) + ibs_data.size, 1549 .data = ibs_data.data, 1550 }, 1551 }; 1552 perf_sample_save_raw_data(&data, event, &raw); 1553 } 1554 1555 if (perf_ibs == &perf_ibs_op) 1556 perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); 1557 1558 /* 1559 * rip recorded by IbsOpRip will not be consistent with rsp and rbp 1560 * recorded as part of interrupt regs. Thus we need to use rip from 1561 * interrupt regs while unwinding call stack. 1562 */ 1563 perf_sample_save_callchain(&data, event, iregs); 1564 1565 throttle = perf_event_overflow(event, &data, ®s); 1566 1567 if (event->attr.freq && hwc->sample_period < perf_ibs->min_period) 1568 hwc->sample_period = perf_ibs->min_period; 1569 1570 out: 1571 if (!throttle) { 1572 if (ibs_caps & IBS_CAPS_DIS) 1573 wrmsrq(hwc->extra_reg.reg, perf_ibs->disable_mask); 1574 1575 if (perf_ibs == &perf_ibs_op) { 1576 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 1577 new_config = period & IBS_OP_MAX_CNT_EXT_MASK; 1578 period &= ~IBS_OP_MAX_CNT_EXT_MASK; 1579 } 1580 if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL)) 1581 new_config |= *config & IBS_OP_CUR_CNT_RAND; 1582 } 1583 new_config |= period >> 4; 1584 1585 perf_ibs_enable_event(perf_ibs, hwc, new_config); 1586 } 1587 1588 perf_event_update_userpage(event); 1589 1590 return 1; 1591 } 1592 1593 static int 1594 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1595 { 1596 u64 stamp = sched_clock(); 1597 int handled = 0; 1598 1599 handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); 1600 handled += perf_ibs_handle_irq(&perf_ibs_op, regs); 1601 1602 if (handled) 1603 inc_irq_stat(apic_perf_irqs); 1604 1605 perf_sample_event_took(sched_clock() - stamp); 1606 1607 return handled; 1608 } 1609 NOKPROBE_SYMBOL(perf_ibs_nmi_handler); 1610 1611 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) 1612 { 1613 struct cpu_perf_ibs __percpu *pcpu; 1614 int ret; 1615 1616 pcpu = alloc_percpu(struct cpu_perf_ibs); 1617 if (!pcpu) 1618 return -ENOMEM; 1619 1620 perf_ibs->pcpu = pcpu; 1621 1622 ret = perf_pmu_register(&perf_ibs->pmu, name, -1); 1623 if (ret) { 1624 perf_ibs->pcpu = NULL; 1625 free_percpu(pcpu); 1626 } 1627 1628 return ret; 1629 } 1630 1631 static __init int perf_ibs_fetch_init(void) 1632 { 1633 /* 1634 * Some chips fail to reset the fetch count when it is written; instead 1635 * they need a 0-1 transition of IbsFetchEn. 1636 */ 1637 if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18) 1638 perf_ibs_fetch.fetch_count_reset_broken = 1; 1639 1640 if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model < 0x10) 1641 perf_ibs_fetch.fetch_ignore_if_zero_rip = 1; 1642 1643 if (ibs_caps & IBS_CAPS_ZEN4) 1644 perf_ibs_fetch.config_mask |= IBS_FETCH_L3MISSONLY; 1645 1646 if (ibs_caps & IBS_CAPS_DIS) 1647 perf_ibs_fetch.disable_mask = IBS_FETCH_2_DIS; 1648 1649 perf_ibs_fetch.pmu.attr_groups = fetch_attr_groups; 1650 perf_ibs_fetch.pmu.attr_update = fetch_attr_update; 1651 1652 return perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); 1653 } 1654 1655 static __init int perf_ibs_op_init(void) 1656 { 1657 if (ibs_caps & IBS_CAPS_OPCNT) 1658 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; 1659 1660 if (ibs_caps & IBS_CAPS_OPCNTEXT) { 1661 perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK; 1662 perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK; 1663 perf_ibs_op.cnt_mask |= (IBS_OP_MAX_CNT_EXT_MASK | 1664 IBS_OP_CUR_CNT_EXT_MASK); 1665 } 1666 1667 if (ibs_caps & IBS_CAPS_ZEN4) 1668 perf_ibs_op.config_mask |= IBS_OP_L3MISSONLY; 1669 1670 if (ibs_caps & IBS_CAPS_DIS) 1671 perf_ibs_op.disable_mask = IBS_OP_2_DIS; 1672 1673 perf_ibs_op.pmu.attr_groups = op_attr_groups; 1674 perf_ibs_op.pmu.attr_update = op_attr_update; 1675 1676 return perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); 1677 } 1678 1679 static __init int perf_event_ibs_init(void) 1680 { 1681 int ret; 1682 1683 ret = perf_ibs_fetch_init(); 1684 if (ret) 1685 return ret; 1686 1687 ret = perf_ibs_op_init(); 1688 if (ret) 1689 goto err_op; 1690 1691 ret = register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); 1692 if (ret) 1693 goto err_nmi; 1694 1695 pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); 1696 return 0; 1697 1698 err_nmi: 1699 perf_pmu_unregister(&perf_ibs_op.pmu); 1700 free_percpu(perf_ibs_op.pcpu); 1701 perf_ibs_op.pcpu = NULL; 1702 err_op: 1703 perf_pmu_unregister(&perf_ibs_fetch.pmu); 1704 free_percpu(perf_ibs_fetch.pcpu); 1705 perf_ibs_fetch.pcpu = NULL; 1706 1707 return ret; 1708 } 1709 1710 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */ 1711 1712 static __init int perf_event_ibs_init(void) 1713 { 1714 return 0; 1715 } 1716 1717 #endif 1718 1719 /* IBS - apic initialization, for perf and oprofile */ 1720 1721 static __init u32 __get_ibs_caps(void) 1722 { 1723 u32 caps; 1724 unsigned int max_level; 1725 1726 if (!boot_cpu_has(X86_FEATURE_IBS)) 1727 return 0; 1728 1729 /* check IBS cpuid feature flags */ 1730 max_level = cpuid_eax(0x80000000); 1731 if (max_level < IBS_CPUID_FEATURES) 1732 return IBS_CAPS_DEFAULT; 1733 1734 caps = cpuid_eax(IBS_CPUID_FEATURES); 1735 if (!(caps & IBS_CAPS_AVAIL)) 1736 /* cpuid flags not valid */ 1737 return IBS_CAPS_DEFAULT; 1738 1739 return caps; 1740 } 1741 1742 u32 get_ibs_caps(void) 1743 { 1744 return ibs_caps; 1745 } 1746 1747 EXPORT_SYMBOL(get_ibs_caps); 1748 1749 static inline int get_eilvt(int offset) 1750 { 1751 return !setup_APIC_eilvt(offset, 0, APIC_DELIVERY_MODE_NMI, 1); 1752 } 1753 1754 static inline int put_eilvt(int offset) 1755 { 1756 return !setup_APIC_eilvt(offset, 0, 0, 1); 1757 } 1758 1759 /* 1760 * Check and reserve APIC extended interrupt LVT offset for IBS if available. 1761 */ 1762 static inline int ibs_eilvt_valid(void) 1763 { 1764 int offset; 1765 u64 val; 1766 int valid = 0; 1767 1768 preempt_disable(); 1769 1770 rdmsrq(MSR_AMD64_IBSCTL, val); 1771 offset = val & IBSCTL_LVT_OFFSET_MASK; 1772 1773 if (!(val & IBSCTL_LVT_OFFSET_VALID)) { 1774 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", 1775 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 1776 goto out; 1777 } 1778 1779 if (!get_eilvt(offset)) { 1780 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", 1781 smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); 1782 goto out; 1783 } 1784 1785 valid = 1; 1786 out: 1787 preempt_enable(); 1788 1789 return valid; 1790 } 1791 1792 static int setup_ibs_ctl(int ibs_eilvt_off) 1793 { 1794 struct pci_dev *cpu_cfg; 1795 int nodes; 1796 u32 value = 0; 1797 1798 nodes = 0; 1799 cpu_cfg = NULL; 1800 do { 1801 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD, 1802 PCI_DEVICE_ID_AMD_10H_NB_MISC, 1803 cpu_cfg); 1804 if (!cpu_cfg) 1805 break; 1806 ++nodes; 1807 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off 1808 | IBSCTL_LVT_OFFSET_VALID); 1809 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 1810 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { 1811 pci_dev_put(cpu_cfg); 1812 pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", 1813 value); 1814 return -EINVAL; 1815 } 1816 } while (1); 1817 1818 if (!nodes) { 1819 pr_debug("No CPU node configured for IBS\n"); 1820 return -ENODEV; 1821 } 1822 1823 return 0; 1824 } 1825 1826 /* 1827 * This runs only on the current cpu. We try to find an LVT offset and 1828 * setup the local APIC. For this we must disable preemption. On 1829 * success we initialize all nodes with this offset. This updates then 1830 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of 1831 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that 1832 * is using the new offset. 1833 */ 1834 static void force_ibs_eilvt_setup(void) 1835 { 1836 int offset; 1837 int ret; 1838 1839 preempt_disable(); 1840 /* find the next free available EILVT entry, skip offset 0 */ 1841 for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { 1842 if (get_eilvt(offset)) 1843 break; 1844 } 1845 preempt_enable(); 1846 1847 if (offset == APIC_EILVT_NR_MAX) { 1848 pr_debug("No EILVT entry available\n"); 1849 return; 1850 } 1851 1852 ret = setup_ibs_ctl(offset); 1853 if (ret) 1854 goto out; 1855 1856 if (!ibs_eilvt_valid()) 1857 goto out; 1858 1859 pr_info("LVT offset %d assigned\n", offset); 1860 1861 return; 1862 out: 1863 preempt_disable(); 1864 put_eilvt(offset); 1865 preempt_enable(); 1866 return; 1867 } 1868 1869 static void ibs_eilvt_setup(void) 1870 { 1871 /* 1872 * Force LVT offset assignment for family 10h: The offsets are 1873 * not assigned by the BIOS for this family, so the OS is 1874 * responsible for doing it. If the OS assignment fails, fall 1875 * back to BIOS settings and try to setup this. 1876 */ 1877 if (boot_cpu_data.x86 == 0x10) 1878 force_ibs_eilvt_setup(); 1879 } 1880 1881 static inline int get_ibs_lvt_offset(void) 1882 { 1883 u64 val; 1884 1885 rdmsrq(MSR_AMD64_IBSCTL, val); 1886 if (!(val & IBSCTL_LVT_OFFSET_VALID)) 1887 return -EINVAL; 1888 1889 return val & IBSCTL_LVT_OFFSET_MASK; 1890 } 1891 1892 static void setup_APIC_ibs(void) 1893 { 1894 int offset; 1895 1896 offset = get_ibs_lvt_offset(); 1897 if (offset < 0) 1898 goto failed; 1899 1900 if (!setup_APIC_eilvt(offset, 0, APIC_DELIVERY_MODE_NMI, 0)) 1901 return; 1902 failed: 1903 pr_warn("perf: IBS APIC setup failed on cpu #%d\n", 1904 smp_processor_id()); 1905 } 1906 1907 static void clear_APIC_ibs(void) 1908 { 1909 int offset; 1910 1911 offset = get_ibs_lvt_offset(); 1912 if (offset >= 0) 1913 setup_APIC_eilvt(offset, 0, APIC_DELIVERY_MODE_FIXED, 1); 1914 } 1915 1916 static int x86_pmu_amd_ibs_starting_cpu(unsigned int cpu) 1917 { 1918 setup_APIC_ibs(); 1919 1920 if (ibs_caps & IBS_CAPS_DIS) { 1921 /* 1922 * IBS enable sequence: 1923 * CTL[En] = 1; 1924 * CTL2[Dis] = 0; 1925 * 1926 * IBS disable sequence: 1927 * CTL2[Dis] = 1; 1928 * 1929 * Set CTL2[Dis] when CPU comes up. This is needed to make 1930 * enable sequence effective. 1931 */ 1932 wrmsrq(MSR_AMD64_IBSFETCHCTL2, IBS_FETCH_2_DIS); 1933 wrmsrq(MSR_AMD64_IBSOPCTL2, IBS_OP_2_DIS); 1934 } 1935 1936 return 0; 1937 } 1938 1939 #ifdef CONFIG_PM 1940 1941 static int perf_ibs_suspend(void *data) 1942 { 1943 clear_APIC_ibs(); 1944 return 0; 1945 } 1946 1947 static void perf_ibs_resume(void *data) 1948 { 1949 ibs_eilvt_setup(); 1950 setup_APIC_ibs(); 1951 } 1952 1953 static const struct syscore_ops perf_ibs_syscore_ops = { 1954 .resume = perf_ibs_resume, 1955 .suspend = perf_ibs_suspend, 1956 }; 1957 1958 static struct syscore perf_ibs_syscore = { 1959 .ops = &perf_ibs_syscore_ops, 1960 }; 1961 1962 static void perf_ibs_pm_init(void) 1963 { 1964 register_syscore(&perf_ibs_syscore); 1965 } 1966 1967 #else 1968 1969 static inline void perf_ibs_pm_init(void) { } 1970 1971 #endif 1972 1973 static int x86_pmu_amd_ibs_dying_cpu(unsigned int cpu) 1974 { 1975 clear_APIC_ibs(); 1976 return 0; 1977 } 1978 1979 static __init int amd_ibs_init(void) 1980 { 1981 u32 caps; 1982 1983 caps = __get_ibs_caps(); 1984 if (!caps) 1985 return -ENODEV; /* ibs not supported by the cpu */ 1986 1987 ibs_eilvt_setup(); 1988 1989 if (!ibs_eilvt_valid()) 1990 return -EINVAL; 1991 1992 perf_ibs_pm_init(); 1993 1994 #ifdef CONFIG_X86_32 1995 /* 1996 * IBS_CAPS_BIT63_FILTER is used for exclude_kernel/user filtering, 1997 * which obviously won't work for 32 bit kernel. 1998 */ 1999 caps &= ~IBS_CAPS_BIT63_FILTER; 2000 #endif 2001 2002 ibs_caps = caps; 2003 /* make ibs_caps visible to other cpus: */ 2004 smp_mb(); 2005 /* 2006 * x86_pmu_amd_ibs_starting_cpu will be called from core on 2007 * all online cpus. 2008 */ 2009 cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_IBS_STARTING, 2010 "perf/x86/amd/ibs:starting", 2011 x86_pmu_amd_ibs_starting_cpu, 2012 x86_pmu_amd_ibs_dying_cpu); 2013 2014 return perf_event_ibs_init(); 2015 } 2016 2017 /* Since we need the pci subsystem to init ibs we can't do this earlier: */ 2018 device_initcall(amd_ibs_init); 2019