1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/perf_event.h> 3 #include <linux/jump_label.h> 4 #include <linux/export.h> 5 #include <linux/kvm_types.h> 6 #include <linux/types.h> 7 #include <linux/init.h> 8 #include <linux/slab.h> 9 #include <linux/delay.h> 10 #include <linux/jiffies.h> 11 12 #include <asm/apicdef.h> 13 #include <asm/apic.h> 14 #include <asm/cpuid/api.h> 15 #include <asm/msr.h> 16 #include <asm/nmi.h> 17 18 #include "../perf_event.h" 19 20 static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); 21 static unsigned long perf_nmi_window; 22 23 /* AMD Event 0xFFF: Merge. Used with Large Increment per Cycle events */ 24 #define AMD_MERGE_EVENT ((0xFULL << 32) | 0xFFULL) 25 #define AMD_MERGE_EVENT_ENABLE (AMD_MERGE_EVENT | ARCH_PERFMON_EVENTSEL_ENABLE) 26 27 /* PMC Enable and Overflow bits for PerfCntrGlobal* registers */ 28 static u64 amd_pmu_global_cntr_mask __read_mostly; 29 30 static __initconst const u64 amd_hw_cache_event_ids 31 [PERF_COUNT_HW_CACHE_MAX] 32 [PERF_COUNT_HW_CACHE_OP_MAX] 33 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 34 { 35 [ C(L1D) ] = { 36 [ C(OP_READ) ] = { 37 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 38 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ 39 }, 40 [ C(OP_WRITE) ] = { 41 [ C(RESULT_ACCESS) ] = 0, 42 [ C(RESULT_MISS) ] = 0, 43 }, 44 [ C(OP_PREFETCH) ] = { 45 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ 46 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ 47 }, 48 }, 49 [ C(L1I ) ] = { 50 [ C(OP_READ) ] = { 51 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ 52 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ 53 }, 54 [ C(OP_WRITE) ] = { 55 [ C(RESULT_ACCESS) ] = -1, 56 [ C(RESULT_MISS) ] = -1, 57 }, 58 [ C(OP_PREFETCH) ] = { 59 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ 60 [ C(RESULT_MISS) ] = 0, 61 }, 62 }, 63 [ C(LL ) ] = { 64 [ C(OP_READ) ] = { 65 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ 66 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ 67 }, 68 [ C(OP_WRITE) ] = { 69 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ 70 [ C(RESULT_MISS) ] = 0, 71 }, 72 [ C(OP_PREFETCH) ] = { 73 [ C(RESULT_ACCESS) ] = 0, 74 [ C(RESULT_MISS) ] = 0, 75 }, 76 }, 77 [ C(DTLB) ] = { 78 [ C(OP_READ) ] = { 79 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 80 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ 81 }, 82 [ C(OP_WRITE) ] = { 83 [ C(RESULT_ACCESS) ] = 0, 84 [ C(RESULT_MISS) ] = 0, 85 }, 86 [ C(OP_PREFETCH) ] = { 87 [ C(RESULT_ACCESS) ] = 0, 88 [ C(RESULT_MISS) ] = 0, 89 }, 90 }, 91 [ C(ITLB) ] = { 92 [ C(OP_READ) ] = { 93 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 94 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ 95 }, 96 [ C(OP_WRITE) ] = { 97 [ C(RESULT_ACCESS) ] = -1, 98 [ C(RESULT_MISS) ] = -1, 99 }, 100 [ C(OP_PREFETCH) ] = { 101 [ C(RESULT_ACCESS) ] = -1, 102 [ C(RESULT_MISS) ] = -1, 103 }, 104 }, 105 [ C(BPU ) ] = { 106 [ C(OP_READ) ] = { 107 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ 108 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ 109 }, 110 [ C(OP_WRITE) ] = { 111 [ C(RESULT_ACCESS) ] = -1, 112 [ C(RESULT_MISS) ] = -1, 113 }, 114 [ C(OP_PREFETCH) ] = { 115 [ C(RESULT_ACCESS) ] = -1, 116 [ C(RESULT_MISS) ] = -1, 117 }, 118 }, 119 [ C(NODE) ] = { 120 [ C(OP_READ) ] = { 121 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ 122 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ 123 }, 124 [ C(OP_WRITE) ] = { 125 [ C(RESULT_ACCESS) ] = -1, 126 [ C(RESULT_MISS) ] = -1, 127 }, 128 [ C(OP_PREFETCH) ] = { 129 [ C(RESULT_ACCESS) ] = -1, 130 [ C(RESULT_MISS) ] = -1, 131 }, 132 }, 133 }; 134 135 static __initconst const u64 amd_hw_cache_event_ids_f17h 136 [PERF_COUNT_HW_CACHE_MAX] 137 [PERF_COUNT_HW_CACHE_OP_MAX] 138 [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 139 [C(L1D)] = { 140 [C(OP_READ)] = { 141 [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */ 142 [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */ 143 }, 144 [C(OP_WRITE)] = { 145 [C(RESULT_ACCESS)] = 0, 146 [C(RESULT_MISS)] = 0, 147 }, 148 [C(OP_PREFETCH)] = { 149 [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */ 150 [C(RESULT_MISS)] = 0, 151 }, 152 }, 153 [C(L1I)] = { 154 [C(OP_READ)] = { 155 [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */ 156 [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */ 157 }, 158 [C(OP_WRITE)] = { 159 [C(RESULT_ACCESS)] = -1, 160 [C(RESULT_MISS)] = -1, 161 }, 162 [C(OP_PREFETCH)] = { 163 [C(RESULT_ACCESS)] = 0, 164 [C(RESULT_MISS)] = 0, 165 }, 166 }, 167 [C(LL)] = { 168 [C(OP_READ)] = { 169 [C(RESULT_ACCESS)] = 0, 170 [C(RESULT_MISS)] = 0, 171 }, 172 [C(OP_WRITE)] = { 173 [C(RESULT_ACCESS)] = 0, 174 [C(RESULT_MISS)] = 0, 175 }, 176 [C(OP_PREFETCH)] = { 177 [C(RESULT_ACCESS)] = 0, 178 [C(RESULT_MISS)] = 0, 179 }, 180 }, 181 [C(DTLB)] = { 182 [C(OP_READ)] = { 183 [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */ 184 [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */ 185 }, 186 [C(OP_WRITE)] = { 187 [C(RESULT_ACCESS)] = 0, 188 [C(RESULT_MISS)] = 0, 189 }, 190 [C(OP_PREFETCH)] = { 191 [C(RESULT_ACCESS)] = 0, 192 [C(RESULT_MISS)] = 0, 193 }, 194 }, 195 [C(ITLB)] = { 196 [C(OP_READ)] = { 197 [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */ 198 [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */ 199 }, 200 [C(OP_WRITE)] = { 201 [C(RESULT_ACCESS)] = -1, 202 [C(RESULT_MISS)] = -1, 203 }, 204 [C(OP_PREFETCH)] = { 205 [C(RESULT_ACCESS)] = -1, 206 [C(RESULT_MISS)] = -1, 207 }, 208 }, 209 [C(BPU)] = { 210 [C(OP_READ)] = { 211 [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */ 212 [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */ 213 }, 214 [C(OP_WRITE)] = { 215 [C(RESULT_ACCESS)] = -1, 216 [C(RESULT_MISS)] = -1, 217 }, 218 [C(OP_PREFETCH)] = { 219 [C(RESULT_ACCESS)] = -1, 220 [C(RESULT_MISS)] = -1, 221 }, 222 }, 223 [C(NODE)] = { 224 [C(OP_READ)] = { 225 [C(RESULT_ACCESS)] = 0, 226 [C(RESULT_MISS)] = 0, 227 }, 228 [C(OP_WRITE)] = { 229 [C(RESULT_ACCESS)] = -1, 230 [C(RESULT_MISS)] = -1, 231 }, 232 [C(OP_PREFETCH)] = { 233 [C(RESULT_ACCESS)] = -1, 234 [C(RESULT_MISS)] = -1, 235 }, 236 }, 237 }; 238 239 /* 240 * AMD Performance Monitor K7 and later, up to and including Family 16h: 241 */ 242 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = 243 { 244 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 245 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 246 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, 247 [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, 248 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 249 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 250 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 251 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 252 }; 253 254 /* 255 * AMD Performance Monitor Family 17h and later: 256 */ 257 static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = 258 { 259 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 260 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 261 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, 262 [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, 263 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 264 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 265 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, 266 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, 267 }; 268 269 static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = 270 { 271 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 272 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 273 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, 274 [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, 275 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 276 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 277 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, 278 }; 279 280 static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] = 281 { 282 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 283 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 284 [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, 285 [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, 286 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 287 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 288 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, 289 [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120, 290 }; 291 292 static u64 amd_pmu_event_map(int hw_event) 293 { 294 if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a) 295 return amd_zen4_perfmon_event_map[hw_event]; 296 297 if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) 298 return amd_zen2_perfmon_event_map[hw_event]; 299 300 if (cpu_feature_enabled(X86_FEATURE_ZEN1)) 301 return amd_zen1_perfmon_event_map[hw_event]; 302 303 return amd_perfmon_event_map[hw_event]; 304 } 305 306 /* 307 * Previously calculated offsets 308 */ 309 static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; 310 static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; 311 312 /* 313 * Legacy CPUs: 314 * 4 counters starting at 0xc0010000 each offset by 1 315 * 316 * CPUs with core performance counter extensions: 317 * 6 counters starting at 0xc0010200 each offset by 2 318 */ 319 static inline int amd_pmu_addr_offset(int index, bool eventsel) 320 { 321 int offset; 322 323 if (!index) 324 return index; 325 326 if (eventsel) 327 offset = event_offsets[index]; 328 else 329 offset = count_offsets[index]; 330 331 if (offset) 332 return offset; 333 334 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 335 offset = index; 336 else 337 offset = index << 1; 338 339 if (eventsel) 340 event_offsets[index] = offset; 341 else 342 count_offsets[index] = offset; 343 344 return offset; 345 } 346 347 /* 348 * AMD64 events are detected based on their event codes. 349 */ 350 static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) 351 { 352 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); 353 } 354 355 static inline bool amd_is_pair_event_code(struct hw_perf_event *hwc) 356 { 357 if (!(x86_pmu.flags & PMU_FL_PAIR)) 358 return false; 359 360 switch (amd_get_event_code(hwc)) { 361 case 0x003: return true; /* Retired SSE/AVX FLOPs */ 362 default: return false; 363 } 364 } 365 366 DEFINE_STATIC_CALL_RET0(amd_pmu_branch_hw_config, *x86_pmu.hw_config); 367 368 static int amd_core_hw_config(struct perf_event *event) 369 { 370 if (event->attr.exclude_host && event->attr.exclude_guest) 371 /* 372 * When HO == GO == 1 the hardware treats that as GO == HO == 0 373 * and will count in both modes. We don't want to count in that 374 * case so we emulate no-counting by setting US = OS = 0. 375 */ 376 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 377 ARCH_PERFMON_EVENTSEL_OS); 378 else if (event->attr.exclude_host) 379 event->hw.config |= AMD64_EVENTSEL_GUESTONLY; 380 else if (event->attr.exclude_guest) 381 event->hw.config |= AMD64_EVENTSEL_HOSTONLY; 382 383 if ((x86_pmu.flags & PMU_FL_PAIR) && amd_is_pair_event_code(&event->hw)) 384 event->hw.flags |= PERF_X86_EVENT_PAIR; 385 386 if (has_branch_stack(event)) 387 return static_call(amd_pmu_branch_hw_config)(event); 388 389 return 0; 390 } 391 392 static inline int amd_is_nb_event(struct hw_perf_event *hwc) 393 { 394 return (hwc->config & 0xe0) == 0xe0; 395 } 396 397 static inline int amd_has_nb(struct cpu_hw_events *cpuc) 398 { 399 struct amd_nb *nb = cpuc->amd_nb; 400 401 return nb && nb->nb_id != -1; 402 } 403 404 static int amd_pmu_hw_config(struct perf_event *event) 405 { 406 int ret; 407 408 /* pass precise event sampling to ibs: */ 409 if (event->attr.precise_ip && get_ibs_caps()) 410 return forward_event_to_ibs(event); 411 412 if (has_branch_stack(event) && !x86_pmu.lbr_nr) 413 return -EOPNOTSUPP; 414 415 ret = x86_pmu_hw_config(event); 416 if (ret) 417 return ret; 418 419 if (event->attr.type == PERF_TYPE_RAW) 420 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 421 422 return amd_core_hw_config(event); 423 } 424 425 static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, 426 struct perf_event *event) 427 { 428 struct amd_nb *nb = cpuc->amd_nb; 429 int i; 430 431 /* 432 * need to scan whole list because event may not have 433 * been assigned during scheduling 434 * 435 * no race condition possible because event can only 436 * be removed on one CPU at a time AND PMU is disabled 437 * when we come here 438 */ 439 for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 440 struct perf_event *tmp = event; 441 442 if (try_cmpxchg(nb->owners + i, &tmp, NULL)) 443 break; 444 } 445 } 446 447 /* 448 * AMD64 NorthBridge events need special treatment because 449 * counter access needs to be synchronized across all cores 450 * of a package. Refer to BKDG section 3.12 451 * 452 * NB events are events measuring L3 cache, Hypertransport 453 * traffic. They are identified by an event code >= 0xe00. 454 * They measure events on the NorthBride which is shared 455 * by all cores on a package. NB events are counted on a 456 * shared set of counters. When a NB event is programmed 457 * in a counter, the data actually comes from a shared 458 * counter. Thus, access to those counters needs to be 459 * synchronized. 460 * 461 * We implement the synchronization such that no two cores 462 * can be measuring NB events using the same counters. Thus, 463 * we maintain a per-NB allocation table. The available slot 464 * is propagated using the event_constraint structure. 465 * 466 * We provide only one choice for each NB event based on 467 * the fact that only NB events have restrictions. Consequently, 468 * if a counter is available, there is a guarantee the NB event 469 * will be assigned to it. If no slot is available, an empty 470 * constraint is returned and scheduling will eventually fail 471 * for this event. 472 * 473 * Note that all cores attached the same NB compete for the same 474 * counters to host NB events, this is why we use atomic ops. Some 475 * multi-chip CPUs may have more than one NB. 476 * 477 * Given that resources are allocated (cmpxchg), they must be 478 * eventually freed for others to use. This is accomplished by 479 * calling __amd_put_nb_event_constraints() 480 * 481 * Non NB events are not impacted by this restriction. 482 */ 483 static struct event_constraint * 484 __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 485 struct event_constraint *c) 486 { 487 struct hw_perf_event *hwc = &event->hw; 488 struct amd_nb *nb = cpuc->amd_nb; 489 struct perf_event *old; 490 int idx, new = -1; 491 492 if (!c) 493 c = &unconstrained; 494 495 if (cpuc->is_fake) 496 return c; 497 498 /* 499 * detect if already present, if so reuse 500 * 501 * cannot merge with actual allocation 502 * because of possible holes 503 * 504 * event can already be present yet not assigned (in hwc->idx) 505 * because of successive calls to x86_schedule_events() from 506 * hw_perf_group_sched_in() without hw_perf_enable() 507 */ 508 for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) { 509 if (new == -1 || hwc->idx == idx) 510 /* assign free slot, prefer hwc->idx */ 511 old = cmpxchg(nb->owners + idx, NULL, event); 512 else if (nb->owners[idx] == event) 513 /* event already present */ 514 old = event; 515 else 516 continue; 517 518 if (old && old != event) 519 continue; 520 521 /* reassign to this slot */ 522 if (new != -1) 523 cmpxchg(nb->owners + new, event, NULL); 524 new = idx; 525 526 /* already present, reuse */ 527 if (old == event) 528 break; 529 } 530 531 if (new == -1) 532 return &emptyconstraint; 533 534 return &nb->event_constraints[new]; 535 } 536 537 static struct amd_nb *amd_alloc_nb(int cpu) 538 { 539 struct amd_nb *nb; 540 int i; 541 542 nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); 543 if (!nb) 544 return NULL; 545 546 nb->nb_id = -1; 547 548 /* 549 * initialize all possible NB constraints 550 */ 551 for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 552 __set_bit(i, nb->event_constraints[i].idxmsk); 553 nb->event_constraints[i].weight = 1; 554 } 555 return nb; 556 } 557 558 typedef void (amd_pmu_branch_reset_t)(void); 559 DEFINE_STATIC_CALL_NULL(amd_pmu_branch_reset, amd_pmu_branch_reset_t); 560 561 static void amd_pmu_cpu_reset(int cpu) 562 { 563 if (x86_pmu.lbr_nr) 564 static_call(amd_pmu_branch_reset)(); 565 566 if (x86_pmu.version < 2) 567 return; 568 569 /* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */ 570 wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0); 571 572 /* 573 * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze 574 * and PerfCntrGLobalStatus.PerfCntrOvfl 575 */ 576 wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, 577 GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask); 578 } 579 580 static int amd_pmu_cpu_prepare(int cpu) 581 { 582 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 583 584 cpuc->lbr_sel = kzalloc_node(sizeof(struct er_account), GFP_KERNEL, 585 cpu_to_node(cpu)); 586 if (!cpuc->lbr_sel) 587 return -ENOMEM; 588 589 WARN_ON_ONCE(cpuc->amd_nb); 590 591 if (!x86_pmu.amd_nb_constraints) 592 return 0; 593 594 cpuc->amd_nb = amd_alloc_nb(cpu); 595 if (cpuc->amd_nb) 596 return 0; 597 598 kfree(cpuc->lbr_sel); 599 cpuc->lbr_sel = NULL; 600 601 return -ENOMEM; 602 } 603 604 static void amd_pmu_cpu_starting(int cpu) 605 { 606 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 607 void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; 608 struct amd_nb *nb; 609 int i, nb_id; 610 611 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 612 amd_pmu_cpu_reset(cpu); 613 614 if (!x86_pmu.amd_nb_constraints) 615 return; 616 617 nb_id = topology_amd_node_id(cpu); 618 WARN_ON_ONCE(nb_id == BAD_APICID); 619 620 for_each_online_cpu(i) { 621 nb = per_cpu(cpu_hw_events, i).amd_nb; 622 if (WARN_ON_ONCE(!nb)) 623 continue; 624 625 if (nb->nb_id == nb_id) { 626 *onln = cpuc->amd_nb; 627 cpuc->amd_nb = nb; 628 break; 629 } 630 } 631 632 cpuc->amd_nb->nb_id = nb_id; 633 cpuc->amd_nb->refcnt++; 634 } 635 636 static void amd_pmu_cpu_dead(int cpu) 637 { 638 struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); 639 640 kfree(cpuhw->lbr_sel); 641 cpuhw->lbr_sel = NULL; 642 643 if (!x86_pmu.amd_nb_constraints) 644 return; 645 646 if (cpuhw->amd_nb) { 647 struct amd_nb *nb = cpuhw->amd_nb; 648 649 if (nb->nb_id == -1 || --nb->refcnt == 0) 650 kfree(nb); 651 652 cpuhw->amd_nb = NULL; 653 } 654 } 655 656 static __always_inline void amd_pmu_set_global_ctl(u64 ctl) 657 { 658 wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); 659 } 660 661 static inline u64 amd_pmu_get_global_status(void) 662 { 663 u64 status; 664 665 /* PerfCntrGlobalStatus is read-only */ 666 rdmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS, status); 667 668 return status; 669 } 670 671 static inline void amd_pmu_ack_global_status(u64 status) 672 { 673 /* 674 * PerfCntrGlobalStatus is read-only but an overflow acknowledgment 675 * mechanism exists; writing 1 to a bit in PerfCntrGlobalStatusClr 676 * clears the same bit in PerfCntrGlobalStatus 677 */ 678 679 wrmsrq(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, status); 680 } 681 682 static bool amd_pmu_test_overflow_topbit(int idx) 683 { 684 u64 counter; 685 686 rdmsrq(x86_pmu_event_addr(idx), counter); 687 688 return !(counter & BIT_ULL(x86_pmu.cntval_bits - 1)); 689 } 690 691 static bool amd_pmu_test_overflow_status(int idx) 692 { 693 return amd_pmu_get_global_status() & BIT_ULL(idx); 694 } 695 696 DEFINE_STATIC_CALL(amd_pmu_test_overflow, amd_pmu_test_overflow_topbit); 697 698 /* 699 * When a PMC counter overflows, an NMI is used to process the event and 700 * reset the counter. NMI latency can result in the counter being updated 701 * before the NMI can run, which can result in what appear to be spurious 702 * NMIs. This function is intended to wait for the NMI to run and reset 703 * the counter to avoid possible unhandled NMI messages. 704 */ 705 #define OVERFLOW_WAIT_COUNT 50 706 707 static void amd_pmu_wait_on_overflow(int idx) 708 { 709 unsigned int i; 710 711 /* 712 * Wait for the counter to be reset if it has overflowed. This loop 713 * should exit very, very quickly, but just in case, don't wait 714 * forever... 715 */ 716 for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { 717 if (!static_call(amd_pmu_test_overflow)(idx)) 718 break; 719 720 /* Might be in IRQ context, so can't sleep */ 721 udelay(1); 722 } 723 } 724 725 static void amd_pmu_check_overflow(void) 726 { 727 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 728 int idx; 729 730 /* 731 * This shouldn't be called from NMI context, but add a safeguard here 732 * to return, since if we're in NMI context we can't wait for an NMI 733 * to reset an overflowed counter value. 734 */ 735 if (in_nmi()) 736 return; 737 738 /* 739 * Check each counter for overflow and wait for it to be reset by the 740 * NMI if it has overflowed. This relies on the fact that all active 741 * counters are always enabled when this function is called and 742 * ARCH_PERFMON_EVENTSEL_INT is always set. 743 */ 744 for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 745 if (!test_bit(idx, cpuc->active_mask)) 746 continue; 747 748 amd_pmu_wait_on_overflow(idx); 749 } 750 } 751 752 static void amd_pmu_enable_event(struct perf_event *event) 753 { 754 x86_pmu_enable_event(event); 755 } 756 757 static void amd_pmu_enable_all(int added) 758 { 759 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 760 int idx; 761 762 amd_brs_enable_all(); 763 764 for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 765 /* only activate events which are marked as active */ 766 if (!test_bit(idx, cpuc->active_mask)) 767 continue; 768 769 /* 770 * FIXME: cpuc->events[idx] can become NULL in a subtle race 771 * condition with NMI->throttle->x86_pmu_stop(). 772 */ 773 if (cpuc->events[idx]) 774 amd_pmu_enable_event(cpuc->events[idx]); 775 } 776 } 777 778 static void amd_pmu_v2_enable_event(struct perf_event *event) 779 { 780 struct hw_perf_event *hwc = &event->hw; 781 782 /* 783 * Testing cpu_hw_events.enabled should be skipped in this case unlike 784 * in x86_pmu_enable_event(). 785 * 786 * Since cpu_hw_events.enabled is set only after returning from 787 * x86_pmu_start(), the PMCs must be programmed and kept ready. 788 * Counting starts only after x86_pmu_enable_all() is called. 789 */ 790 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 791 } 792 793 static __always_inline void amd_pmu_core_enable_all(void) 794 { 795 amd_pmu_set_global_ctl(amd_pmu_global_cntr_mask); 796 } 797 798 static void amd_pmu_v2_enable_all(int added) 799 { 800 amd_pmu_lbr_enable_all(); 801 amd_pmu_core_enable_all(); 802 } 803 804 static void amd_pmu_disable_event(struct perf_event *event) 805 { 806 x86_pmu_disable_event(event); 807 808 /* 809 * This can be called from NMI context (via x86_pmu_stop). The counter 810 * may have overflowed, but either way, we'll never see it get reset 811 * by the NMI if we're already in the NMI. And the NMI latency support 812 * below will take care of any pending NMI that might have been 813 * generated by the overflow. 814 */ 815 if (in_nmi()) 816 return; 817 818 amd_pmu_wait_on_overflow(event->hw.idx); 819 } 820 821 static void amd_pmu_disable_all(void) 822 { 823 amd_brs_disable_all(); 824 x86_pmu_disable_all(); 825 amd_pmu_check_overflow(); 826 } 827 828 static __always_inline void amd_pmu_core_disable_all(void) 829 { 830 amd_pmu_set_global_ctl(0); 831 } 832 833 static void amd_pmu_v2_disable_all(void) 834 { 835 amd_pmu_core_disable_all(); 836 amd_pmu_lbr_disable_all(); 837 amd_pmu_check_overflow(); 838 } 839 840 DEFINE_STATIC_CALL_NULL(amd_pmu_branch_add, *x86_pmu.add); 841 842 static void amd_pmu_add_event(struct perf_event *event) 843 { 844 if (needs_branch_stack(event)) 845 static_call(amd_pmu_branch_add)(event); 846 } 847 848 DEFINE_STATIC_CALL_NULL(amd_pmu_branch_del, *x86_pmu.del); 849 850 static void amd_pmu_del_event(struct perf_event *event) 851 { 852 if (needs_branch_stack(event)) 853 static_call(amd_pmu_branch_del)(event); 854 } 855 856 /* 857 * Because of NMI latency, if multiple PMC counters are active or other sources 858 * of NMIs are received, the perf NMI handler can handle one or more overflowed 859 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI 860 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel 861 * back-to-back NMI support won't be active. This PMC handler needs to take into 862 * account that this can occur, otherwise this could result in unknown NMI 863 * messages being issued. Examples of this is PMC overflow while in the NMI 864 * handler when multiple PMCs are active or PMC overflow while handling some 865 * other source of an NMI. 866 * 867 * Attempt to mitigate this by creating an NMI window in which un-handled NMIs 868 * received during this window will be claimed. This prevents extending the 869 * window past when it is possible that latent NMIs should be received. The 870 * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has 871 * handled a counter. When an un-handled NMI is received, it will be claimed 872 * only if arriving within that window. 873 */ 874 static inline int amd_pmu_adjust_nmi_window(int handled) 875 { 876 /* 877 * If a counter was handled, record a timestamp such that un-handled 878 * NMIs will be claimed if arriving within that window. 879 */ 880 if (handled) { 881 this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window); 882 883 return handled; 884 } 885 886 if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) 887 return NMI_DONE; 888 889 return NMI_HANDLED; 890 } 891 892 static int amd_pmu_handle_irq(struct pt_regs *regs) 893 { 894 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 895 int handled; 896 int pmu_enabled; 897 898 /* 899 * Save the PMU state. 900 * It needs to be restored when leaving the handler. 901 */ 902 pmu_enabled = cpuc->enabled; 903 cpuc->enabled = 0; 904 905 amd_brs_disable_all(); 906 907 /* Drain BRS is in use (could be inactive) */ 908 if (cpuc->lbr_users) 909 amd_brs_drain(); 910 911 /* Process any counter overflows */ 912 handled = x86_pmu_handle_irq(regs); 913 914 cpuc->enabled = pmu_enabled; 915 if (pmu_enabled) 916 amd_brs_enable_all(); 917 918 return amd_pmu_adjust_nmi_window(handled); 919 } 920 921 /* 922 * AMD-specific callback invoked through perf_snapshot_branch_stack static 923 * call, defined in include/linux/perf_event.h. See its definition for API 924 * details. It's up to caller to provide enough space in *entries* to fit all 925 * LBR records, otherwise returned result will be truncated to *cnt* entries. 926 */ 927 static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) 928 { 929 struct cpu_hw_events *cpuc; 930 unsigned long flags; 931 932 /* 933 * The sequence of steps to freeze LBR should be completely inlined 934 * and contain no branches to minimize contamination of LBR snapshot 935 */ 936 local_irq_save(flags); 937 amd_pmu_core_disable_all(); 938 __amd_pmu_lbr_disable(); 939 940 cpuc = this_cpu_ptr(&cpu_hw_events); 941 942 amd_pmu_lbr_read(); 943 cnt = min(cnt, x86_pmu.lbr_nr); 944 memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); 945 946 amd_pmu_v2_enable_all(0); 947 local_irq_restore(flags); 948 949 return cnt; 950 } 951 952 static int amd_pmu_v2_handle_irq(struct pt_regs *regs) 953 { 954 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 955 static atomic64_t status_warned = ATOMIC64_INIT(0); 956 u64 reserved, status, mask, new_bits, prev_bits; 957 struct perf_sample_data data; 958 struct hw_perf_event *hwc; 959 struct perf_event *event; 960 int handled = 0, idx; 961 bool pmu_enabled; 962 963 /* 964 * Save the PMU state as it needs to be restored when leaving the 965 * handler 966 */ 967 pmu_enabled = cpuc->enabled; 968 cpuc->enabled = 0; 969 970 /* Stop counting but do not disable LBR */ 971 amd_pmu_core_disable_all(); 972 973 status = amd_pmu_get_global_status(); 974 975 /* Check if any overflows are pending */ 976 if (!status) 977 goto done; 978 979 /* Read branch records */ 980 if (x86_pmu.lbr_nr) { 981 amd_pmu_lbr_read(); 982 status &= ~GLOBAL_STATUS_LBRS_FROZEN; 983 } 984 985 reserved = status & ~amd_pmu_global_cntr_mask; 986 if (reserved) 987 pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n", 988 reserved); 989 990 /* Clear any reserved bits set by buggy microcode */ 991 status &= amd_pmu_global_cntr_mask; 992 993 for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { 994 if (!test_bit(idx, cpuc->active_mask)) 995 continue; 996 997 event = cpuc->events[idx]; 998 hwc = &event->hw; 999 x86_perf_event_update(event); 1000 mask = BIT_ULL(idx); 1001 1002 if (!(status & mask)) 1003 continue; 1004 1005 /* Event overflow */ 1006 handled++; 1007 status &= ~mask; 1008 perf_sample_data_init(&data, 0, hwc->last_period); 1009 1010 if (!x86_perf_event_set_period(event)) 1011 continue; 1012 1013 perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); 1014 1015 perf_event_overflow(event, &data, regs); 1016 } 1017 1018 /* 1019 * It should never be the case that some overflows are not handled as 1020 * the corresponding PMCs are expected to be inactive according to the 1021 * active_mask 1022 */ 1023 if (status > 0) { 1024 prev_bits = atomic64_fetch_or(status, &status_warned); 1025 // A new bit was set for the very first time. 1026 new_bits = status & ~prev_bits; 1027 WARN(new_bits, "New overflows for inactive PMCs: %llx\n", new_bits); 1028 } 1029 1030 /* Clear overflow and freeze bits */ 1031 amd_pmu_ack_global_status(~status); 1032 1033 /* 1034 * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT 1035 * PMI entry is not set by the local APIC when a PMC overflow occurs 1036 */ 1037 inc_perf_irq_stat(); 1038 1039 done: 1040 cpuc->enabled = pmu_enabled; 1041 1042 /* Resume counting only if PMU is active */ 1043 if (pmu_enabled) 1044 amd_pmu_core_enable_all(); 1045 1046 return amd_pmu_adjust_nmi_window(handled); 1047 } 1048 1049 static struct event_constraint * 1050 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 1051 struct perf_event *event) 1052 { 1053 /* 1054 * if not NB event or no NB, then no constraints 1055 */ 1056 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) 1057 return &unconstrained; 1058 1059 return __amd_get_nb_event_constraints(cpuc, event, NULL); 1060 } 1061 1062 static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 1063 struct perf_event *event) 1064 { 1065 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) 1066 __amd_put_nb_event_constraints(cpuc, event); 1067 } 1068 1069 PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 1070 PMU_FORMAT_ATTR(umask, "config:8-15" ); 1071 PMU_FORMAT_ATTR(edge, "config:18" ); 1072 PMU_FORMAT_ATTR(inv, "config:23" ); 1073 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 1074 1075 static struct attribute *amd_format_attr[] = { 1076 &format_attr_event.attr, 1077 &format_attr_umask.attr, 1078 &format_attr_edge.attr, 1079 &format_attr_inv.attr, 1080 &format_attr_cmask.attr, 1081 NULL, 1082 }; 1083 1084 /* AMD Family 15h */ 1085 1086 #define AMD_EVENT_TYPE_MASK 0x000000F0ULL 1087 1088 #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL 1089 #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL 1090 #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL 1091 #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL 1092 #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL 1093 #define AMD_EVENT_EX_LS 0x000000C0ULL 1094 #define AMD_EVENT_DE 0x000000D0ULL 1095 #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL 1096 1097 /* 1098 * AMD family 15h event code/PMC mappings: 1099 * 1100 * type = event_code & 0x0F0: 1101 * 1102 * 0x000 FP PERF_CTL[5:3] 1103 * 0x010 FP PERF_CTL[5:3] 1104 * 0x020 LS PERF_CTL[5:0] 1105 * 0x030 LS PERF_CTL[5:0] 1106 * 0x040 DC PERF_CTL[5:0] 1107 * 0x050 DC PERF_CTL[5:0] 1108 * 0x060 CU PERF_CTL[2:0] 1109 * 0x070 CU PERF_CTL[2:0] 1110 * 0x080 IC/DE PERF_CTL[2:0] 1111 * 0x090 IC/DE PERF_CTL[2:0] 1112 * 0x0A0 --- 1113 * 0x0B0 --- 1114 * 0x0C0 EX/LS PERF_CTL[5:0] 1115 * 0x0D0 DE PERF_CTL[2:0] 1116 * 0x0E0 NB NB_PERF_CTL[3:0] 1117 * 0x0F0 NB NB_PERF_CTL[3:0] 1118 * 1119 * Exceptions: 1120 * 1121 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) 1122 * 0x003 FP PERF_CTL[3] 1123 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) 1124 * 0x00B FP PERF_CTL[3] 1125 * 0x00D FP PERF_CTL[3] 1126 * 0x023 DE PERF_CTL[2:0] 1127 * 0x02D LS PERF_CTL[3] 1128 * 0x02E LS PERF_CTL[3,0] 1129 * 0x031 LS PERF_CTL[2:0] (**) 1130 * 0x043 CU PERF_CTL[2:0] 1131 * 0x045 CU PERF_CTL[2:0] 1132 * 0x046 CU PERF_CTL[2:0] 1133 * 0x054 CU PERF_CTL[2:0] 1134 * 0x055 CU PERF_CTL[2:0] 1135 * 0x08F IC PERF_CTL[0] 1136 * 0x187 DE PERF_CTL[0] 1137 * 0x188 DE PERF_CTL[0] 1138 * 0x0DB EX PERF_CTL[5:0] 1139 * 0x0DC LS PERF_CTL[5:0] 1140 * 0x0DD LS PERF_CTL[5:0] 1141 * 0x0DE LS PERF_CTL[5:0] 1142 * 0x0DF LS PERF_CTL[5:0] 1143 * 0x1C0 EX PERF_CTL[5:3] 1144 * 0x1D6 EX PERF_CTL[5:0] 1145 * 0x1D8 EX PERF_CTL[5:0] 1146 * 1147 * (*) depending on the umask all FPU counters may be used 1148 * (**) only one unitmask enabled at a time 1149 */ 1150 1151 static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 1152 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 1153 static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 1154 static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 1155 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 1156 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 1157 1158 static struct event_constraint * 1159 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, 1160 struct perf_event *event) 1161 { 1162 struct hw_perf_event *hwc = &event->hw; 1163 unsigned int event_code = amd_get_event_code(hwc); 1164 1165 switch (event_code & AMD_EVENT_TYPE_MASK) { 1166 case AMD_EVENT_FP: 1167 switch (event_code) { 1168 case 0x000: 1169 if (!(hwc->config & 0x0000F000ULL)) 1170 break; 1171 if (!(hwc->config & 0x00000F00ULL)) 1172 break; 1173 return &amd_f15_PMC3; 1174 case 0x004: 1175 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 1176 break; 1177 return &amd_f15_PMC3; 1178 case 0x003: 1179 case 0x00B: 1180 case 0x00D: 1181 return &amd_f15_PMC3; 1182 } 1183 return &amd_f15_PMC53; 1184 case AMD_EVENT_LS: 1185 case AMD_EVENT_DC: 1186 case AMD_EVENT_EX_LS: 1187 switch (event_code) { 1188 case 0x023: 1189 case 0x043: 1190 case 0x045: 1191 case 0x046: 1192 case 0x054: 1193 case 0x055: 1194 return &amd_f15_PMC20; 1195 case 0x02D: 1196 return &amd_f15_PMC3; 1197 case 0x02E: 1198 return &amd_f15_PMC30; 1199 case 0x031: 1200 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 1201 return &amd_f15_PMC20; 1202 return &emptyconstraint; 1203 case 0x1C0: 1204 return &amd_f15_PMC53; 1205 default: 1206 return &amd_f15_PMC50; 1207 } 1208 case AMD_EVENT_CU: 1209 case AMD_EVENT_IC_DE: 1210 case AMD_EVENT_DE: 1211 switch (event_code) { 1212 case 0x08F: 1213 case 0x187: 1214 case 0x188: 1215 return &amd_f15_PMC0; 1216 case 0x0DB ... 0x0DF: 1217 case 0x1D6: 1218 case 0x1D8: 1219 return &amd_f15_PMC50; 1220 default: 1221 return &amd_f15_PMC20; 1222 } 1223 case AMD_EVENT_NB: 1224 /* moved to uncore.c */ 1225 return &emptyconstraint; 1226 default: 1227 return &emptyconstraint; 1228 } 1229 } 1230 1231 static struct event_constraint pair_constraint; 1232 1233 static struct event_constraint * 1234 amd_get_event_constraints_f17h(struct cpu_hw_events *cpuc, int idx, 1235 struct perf_event *event) 1236 { 1237 struct hw_perf_event *hwc = &event->hw; 1238 1239 if (amd_is_pair_event_code(hwc)) 1240 return &pair_constraint; 1241 1242 return &unconstrained; 1243 } 1244 1245 static void amd_put_event_constraints_f17h(struct cpu_hw_events *cpuc, 1246 struct perf_event *event) 1247 { 1248 struct hw_perf_event *hwc = &event->hw; 1249 1250 if (is_counter_pair(hwc)) 1251 --cpuc->n_pair; 1252 } 1253 1254 /* 1255 * Because of the way BRS operates with an inactive and active phases, and 1256 * the link to one counter, it is not possible to have two events using BRS 1257 * scheduled at the same time. There would be an issue with enforcing the 1258 * period of each one and given that the BRS saturates, it would not be possible 1259 * to guarantee correlated content for all events. Therefore, in situations 1260 * where multiple events want to use BRS, the kernel enforces mutual exclusion. 1261 * Exclusion is enforced by choosing only one counter for events using BRS. 1262 * The event scheduling logic will then automatically multiplex the 1263 * events and ensure that at most one event is actively using BRS. 1264 * 1265 * The BRS counter could be any counter, but there is no constraint on Fam19h, 1266 * therefore all counters are equal and thus we pick the first one: PMC0 1267 */ 1268 static struct event_constraint amd_fam19h_brs_cntr0_constraint = 1269 EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK); 1270 1271 static struct event_constraint amd_fam19h_brs_pair_cntr0_constraint = 1272 __EVENT_CONSTRAINT(0, 0x1, AMD64_RAW_EVENT_MASK, 1, 0, PERF_X86_EVENT_PAIR); 1273 1274 static struct event_constraint * 1275 amd_get_event_constraints_f19h(struct cpu_hw_events *cpuc, int idx, 1276 struct perf_event *event) 1277 { 1278 struct hw_perf_event *hwc = &event->hw; 1279 bool has_brs = has_amd_brs(hwc); 1280 1281 /* 1282 * In case BRS is used with an event requiring a counter pair, 1283 * the kernel allows it but only on counter 0 & 1 to enforce 1284 * multiplexing requiring to protect BRS in case of multiple 1285 * BRS users 1286 */ 1287 if (amd_is_pair_event_code(hwc)) { 1288 return has_brs ? &amd_fam19h_brs_pair_cntr0_constraint 1289 : &pair_constraint; 1290 } 1291 1292 if (has_brs) 1293 return &amd_fam19h_brs_cntr0_constraint; 1294 1295 return &unconstrained; 1296 } 1297 1298 1299 static ssize_t amd_event_sysfs_show(char *page, u64 config) 1300 { 1301 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | 1302 (config & AMD64_EVENTSEL_EVENT) >> 24; 1303 1304 return x86_event_sysfs_show(page, config, event); 1305 } 1306 1307 static void amd_pmu_limit_period(struct perf_event *event, s64 *left) 1308 { 1309 /* 1310 * Decrease period by the depth of the BRS feature to get the last N 1311 * taken branches and approximate the desired period 1312 */ 1313 if (has_branch_stack(event) && *left > x86_pmu.lbr_nr) 1314 *left -= x86_pmu.lbr_nr; 1315 } 1316 1317 static __initconst const struct x86_pmu amd_pmu = { 1318 .name = "AMD", 1319 .handle_irq = amd_pmu_handle_irq, 1320 .disable_all = amd_pmu_disable_all, 1321 .enable_all = amd_pmu_enable_all, 1322 .enable = amd_pmu_enable_event, 1323 .disable = amd_pmu_disable_event, 1324 .hw_config = amd_pmu_hw_config, 1325 .schedule_events = x86_schedule_events, 1326 .eventsel = MSR_K7_EVNTSEL0, 1327 .perfctr = MSR_K7_PERFCTR0, 1328 .addr_offset = amd_pmu_addr_offset, 1329 .event_map = amd_pmu_event_map, 1330 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 1331 .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0), 1332 .add = amd_pmu_add_event, 1333 .del = amd_pmu_del_event, 1334 .cntval_bits = 48, 1335 .cntval_mask = (1ULL << 48) - 1, 1336 .apic = 1, 1337 /* use highest bit to detect overflow */ 1338 .max_period = (1ULL << 47) - 1, 1339 .get_event_constraints = amd_get_event_constraints, 1340 .put_event_constraints = amd_put_event_constraints, 1341 1342 .format_attrs = amd_format_attr, 1343 .events_sysfs_show = amd_event_sysfs_show, 1344 1345 .cpu_prepare = amd_pmu_cpu_prepare, 1346 .cpu_starting = amd_pmu_cpu_starting, 1347 .cpu_dead = amd_pmu_cpu_dead, 1348 1349 .amd_nb_constraints = 1, 1350 }; 1351 1352 static ssize_t branches_show(struct device *cdev, 1353 struct device_attribute *attr, 1354 char *buf) 1355 { 1356 return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr); 1357 } 1358 1359 static DEVICE_ATTR_RO(branches); 1360 1361 static struct attribute *amd_pmu_branches_attrs[] = { 1362 &dev_attr_branches.attr, 1363 NULL, 1364 }; 1365 1366 static umode_t 1367 amd_branches_is_visible(struct kobject *kobj, struct attribute *attr, int i) 1368 { 1369 return x86_pmu.lbr_nr ? attr->mode : 0; 1370 } 1371 1372 static struct attribute_group group_caps_amd_branches = { 1373 .name = "caps", 1374 .attrs = amd_pmu_branches_attrs, 1375 .is_visible = amd_branches_is_visible, 1376 }; 1377 1378 #ifdef CONFIG_PERF_EVENTS_AMD_BRS 1379 1380 EVENT_ATTR_STR(branch-brs, amd_branch_brs, 1381 "event=" __stringify(AMD_FAM19H_BRS_EVENT)"\n"); 1382 1383 static struct attribute *amd_brs_events_attrs[] = { 1384 EVENT_PTR(amd_branch_brs), 1385 NULL, 1386 }; 1387 1388 static umode_t 1389 amd_brs_is_visible(struct kobject *kobj, struct attribute *attr, int i) 1390 { 1391 return static_cpu_has(X86_FEATURE_BRS) && x86_pmu.lbr_nr ? 1392 attr->mode : 0; 1393 } 1394 1395 static struct attribute_group group_events_amd_brs = { 1396 .name = "events", 1397 .attrs = amd_brs_events_attrs, 1398 .is_visible = amd_brs_is_visible, 1399 }; 1400 1401 #endif /* CONFIG_PERF_EVENTS_AMD_BRS */ 1402 1403 static const struct attribute_group *amd_attr_update[] = { 1404 &group_caps_amd_branches, 1405 #ifdef CONFIG_PERF_EVENTS_AMD_BRS 1406 &group_events_amd_brs, 1407 #endif 1408 NULL, 1409 }; 1410 1411 static int __init amd_core_pmu_init(void) 1412 { 1413 union cpuid_0x80000022_ebx ebx; 1414 u64 even_ctr_mask = 0ULL; 1415 int i; 1416 1417 /* Avoid calculating the value each time in the NMI handler */ 1418 perf_nmi_window = msecs_to_jiffies(100); 1419 1420 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 1421 return 0; 1422 1423 /* 1424 * If core performance counter extensions exists, we must use 1425 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 1426 * amd_pmu_addr_offset(). 1427 */ 1428 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 1429 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 1430 x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0); 1431 1432 /* Check for Performance Monitoring v2 support */ 1433 if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { 1434 ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); 1435 1436 /* Update PMU version for later usage */ 1437 x86_pmu.version = 2; 1438 1439 /* Find the number of available Core PMCs */ 1440 x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0); 1441 1442 amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64; 1443 1444 x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_MEDIATED_VPMU; 1445 1446 /* Update PMC handling functions */ 1447 x86_pmu.enable_all = amd_pmu_v2_enable_all; 1448 x86_pmu.disable_all = amd_pmu_v2_disable_all; 1449 x86_pmu.enable = amd_pmu_v2_enable_event; 1450 x86_pmu.handle_irq = amd_pmu_v2_handle_irq; 1451 static_call_update(amd_pmu_test_overflow, amd_pmu_test_overflow_status); 1452 } 1453 1454 /* 1455 * AMD Core perfctr has separate MSRs for the NB events, see 1456 * the amd/uncore.c driver. 1457 */ 1458 x86_pmu.amd_nb_constraints = 0; 1459 1460 if (boot_cpu_data.x86 == 0x15) { 1461 pr_cont("Fam15h "); 1462 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 1463 } 1464 if (boot_cpu_data.x86 >= 0x17) { 1465 pr_cont("Fam17h+ "); 1466 /* 1467 * Family 17h and compatibles have constraints for Large 1468 * Increment per Cycle events: they may only be assigned an 1469 * even numbered counter that has a consecutive adjacent odd 1470 * numbered counter following it. 1471 */ 1472 for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2) 1473 even_ctr_mask |= BIT_ULL(i); 1474 1475 pair_constraint = (struct event_constraint) 1476 __EVENT_CONSTRAINT(0, even_ctr_mask, 0, 1477 x86_pmu_max_num_counters(NULL) / 2, 0, 1478 PERF_X86_EVENT_PAIR); 1479 1480 x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; 1481 x86_pmu.put_event_constraints = amd_put_event_constraints_f17h; 1482 x86_pmu.perf_ctr_pair_en = AMD_MERGE_EVENT_ENABLE; 1483 x86_pmu.flags |= PMU_FL_PAIR; 1484 } 1485 1486 /* LBR and BRS are mutually exclusive features */ 1487 if (!amd_pmu_lbr_init()) { 1488 /* LBR requires flushing on context switch */ 1489 x86_pmu.sched_task = amd_pmu_lbr_sched_task; 1490 static_call_update(amd_pmu_branch_hw_config, amd_pmu_lbr_hw_config); 1491 static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); 1492 static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); 1493 static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); 1494 1495 /* Only support branch_stack snapshot on perfmon v2 */ 1496 if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq) 1497 static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack); 1498 } else if (!amd_brs_init()) { 1499 /* 1500 * BRS requires special event constraints and flushing on ctxsw. 1501 */ 1502 x86_pmu.get_event_constraints = amd_get_event_constraints_f19h; 1503 x86_pmu.sched_task = amd_pmu_brs_sched_task; 1504 x86_pmu.limit_period = amd_pmu_limit_period; 1505 1506 static_call_update(amd_pmu_branch_hw_config, amd_brs_hw_config); 1507 static_call_update(amd_pmu_branch_reset, amd_brs_reset); 1508 static_call_update(amd_pmu_branch_add, amd_pmu_brs_add); 1509 static_call_update(amd_pmu_branch_del, amd_pmu_brs_del); 1510 1511 /* 1512 * put_event_constraints callback same as Fam17h, set above 1513 */ 1514 1515 /* branch sampling must be stopped when entering low power */ 1516 amd_brs_lopwr_init(); 1517 } 1518 1519 x86_pmu.attr_update = amd_attr_update; 1520 1521 pr_cont("core perfctr, "); 1522 return 0; 1523 } 1524 1525 __init int amd_pmu_init(void) 1526 { 1527 int ret; 1528 1529 /* Performance-monitoring supported from K7 and later: */ 1530 if (boot_cpu_data.x86 < 6) 1531 return -ENODEV; 1532 1533 x86_pmu = amd_pmu; 1534 1535 ret = amd_core_pmu_init(); 1536 if (ret) 1537 return ret; 1538 1539 if (num_possible_cpus() == 1) { 1540 /* 1541 * No point in allocating data structures to serialize 1542 * against other CPUs, when there is only the one CPU. 1543 */ 1544 x86_pmu.amd_nb_constraints = 0; 1545 } 1546 1547 if (boot_cpu_data.x86 >= 0x17) 1548 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids)); 1549 else 1550 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); 1551 1552 return 0; 1553 } 1554 1555 static inline void amd_pmu_reload_virt(void) 1556 { 1557 if (x86_pmu.version >= 2) { 1558 /* 1559 * Clear global enable bits, reprogram the PERF_CTL 1560 * registers with updated perf_ctr_virt_mask and then 1561 * set global enable bits once again 1562 */ 1563 amd_pmu_v2_disable_all(); 1564 amd_pmu_enable_all(0); 1565 amd_pmu_v2_enable_all(0); 1566 return; 1567 } 1568 1569 amd_pmu_disable_all(); 1570 amd_pmu_enable_all(0); 1571 } 1572 1573 void amd_pmu_enable_virt(void) 1574 { 1575 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1576 1577 cpuc->perf_ctr_virt_mask = 0; 1578 1579 /* Reload all events */ 1580 amd_pmu_reload_virt(); 1581 } 1582 EXPORT_SYMBOL_FOR_KVM(amd_pmu_enable_virt); 1583 1584 void amd_pmu_disable_virt(void) 1585 { 1586 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 1587 1588 /* 1589 * We only mask out the Host-only bit so that host-only counting works 1590 * when SVM is disabled. If someone sets up a guest-only counter when 1591 * SVM is disabled the Guest-only bits still gets set and the counter 1592 * will not count anything. 1593 */ 1594 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 1595 1596 /* Reload all events */ 1597 amd_pmu_reload_virt(); 1598 } 1599 EXPORT_SYMBOL_FOR_KVM(amd_pmu_disable_virt); 1600