1 #include <linux/perf_event.h> 2 #include <linux/export.h> 3 #include <linux/types.h> 4 #include <linux/init.h> 5 #include <linux/slab.h> 6 #include <linux/delay.h> 7 #include <asm/apicdef.h> 8 #include <asm/nmi.h> 9 10 #include "../perf_event.h" 11 12 static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); 13 14 static __initconst const u64 amd_hw_cache_event_ids 15 [PERF_COUNT_HW_CACHE_MAX] 16 [PERF_COUNT_HW_CACHE_OP_MAX] 17 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 18 { 19 [ C(L1D) ] = { 20 [ C(OP_READ) ] = { 21 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 22 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ 23 }, 24 [ C(OP_WRITE) ] = { 25 [ C(RESULT_ACCESS) ] = 0, 26 [ C(RESULT_MISS) ] = 0, 27 }, 28 [ C(OP_PREFETCH) ] = { 29 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ 30 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ 31 }, 32 }, 33 [ C(L1I ) ] = { 34 [ C(OP_READ) ] = { 35 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ 36 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ 37 }, 38 [ C(OP_WRITE) ] = { 39 [ C(RESULT_ACCESS) ] = -1, 40 [ C(RESULT_MISS) ] = -1, 41 }, 42 [ C(OP_PREFETCH) ] = { 43 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ 44 [ C(RESULT_MISS) ] = 0, 45 }, 46 }, 47 [ C(LL ) ] = { 48 [ C(OP_READ) ] = { 49 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ 50 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ 51 }, 52 [ C(OP_WRITE) ] = { 53 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ 54 [ C(RESULT_MISS) ] = 0, 55 }, 56 [ C(OP_PREFETCH) ] = { 57 [ C(RESULT_ACCESS) ] = 0, 58 [ C(RESULT_MISS) ] = 0, 59 }, 60 }, 61 [ C(DTLB) ] = { 62 [ C(OP_READ) ] = { 63 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 64 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ 65 }, 66 [ C(OP_WRITE) ] = { 67 [ C(RESULT_ACCESS) ] = 0, 68 [ C(RESULT_MISS) ] = 0, 69 }, 70 [ C(OP_PREFETCH) ] = { 71 [ C(RESULT_ACCESS) ] = 0, 72 [ C(RESULT_MISS) ] = 0, 73 }, 74 }, 75 [ C(ITLB) ] = { 76 [ C(OP_READ) ] = { 77 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 78 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ 79 }, 80 [ C(OP_WRITE) ] = { 81 [ C(RESULT_ACCESS) ] = -1, 82 [ C(RESULT_MISS) ] = -1, 83 }, 84 [ C(OP_PREFETCH) ] = { 85 [ C(RESULT_ACCESS) ] = -1, 86 [ C(RESULT_MISS) ] = -1, 87 }, 88 }, 89 [ C(BPU ) ] = { 90 [ C(OP_READ) ] = { 91 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ 92 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ 93 }, 94 [ C(OP_WRITE) ] = { 95 [ C(RESULT_ACCESS) ] = -1, 96 [ C(RESULT_MISS) ] = -1, 97 }, 98 [ C(OP_PREFETCH) ] = { 99 [ C(RESULT_ACCESS) ] = -1, 100 [ C(RESULT_MISS) ] = -1, 101 }, 102 }, 103 [ C(NODE) ] = { 104 [ C(OP_READ) ] = { 105 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ 106 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ 107 }, 108 [ C(OP_WRITE) ] = { 109 [ C(RESULT_ACCESS) ] = -1, 110 [ C(RESULT_MISS) ] = -1, 111 }, 112 [ C(OP_PREFETCH) ] = { 113 [ C(RESULT_ACCESS) ] = -1, 114 [ C(RESULT_MISS) ] = -1, 115 }, 116 }, 117 }; 118 119 /* 120 * AMD Performance Monitor K7 and later. 121 */ 122 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = 123 { 124 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 125 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 126 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, 127 [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, 128 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 129 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 130 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 131 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 132 }; 133 134 static u64 amd_pmu_event_map(int hw_event) 135 { 136 return amd_perfmon_event_map[hw_event]; 137 } 138 139 /* 140 * Previously calculated offsets 141 */ 142 static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; 143 static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; 144 145 /* 146 * Legacy CPUs: 147 * 4 counters starting at 0xc0010000 each offset by 1 148 * 149 * CPUs with core performance counter extensions: 150 * 6 counters starting at 0xc0010200 each offset by 2 151 */ 152 static inline int amd_pmu_addr_offset(int index, bool eventsel) 153 { 154 int offset; 155 156 if (!index) 157 return index; 158 159 if (eventsel) 160 offset = event_offsets[index]; 161 else 162 offset = count_offsets[index]; 163 164 if (offset) 165 return offset; 166 167 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 168 offset = index; 169 else 170 offset = index << 1; 171 172 if (eventsel) 173 event_offsets[index] = offset; 174 else 175 count_offsets[index] = offset; 176 177 return offset; 178 } 179 180 static int amd_core_hw_config(struct perf_event *event) 181 { 182 if (event->attr.exclude_host && event->attr.exclude_guest) 183 /* 184 * When HO == GO == 1 the hardware treats that as GO == HO == 0 185 * and will count in both modes. We don't want to count in that 186 * case so we emulate no-counting by setting US = OS = 0. 187 */ 188 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 189 ARCH_PERFMON_EVENTSEL_OS); 190 else if (event->attr.exclude_host) 191 event->hw.config |= AMD64_EVENTSEL_GUESTONLY; 192 else if (event->attr.exclude_guest) 193 event->hw.config |= AMD64_EVENTSEL_HOSTONLY; 194 195 return 0; 196 } 197 198 /* 199 * AMD64 events are detected based on their event codes. 200 */ 201 static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) 202 { 203 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); 204 } 205 206 static inline int amd_is_nb_event(struct hw_perf_event *hwc) 207 { 208 return (hwc->config & 0xe0) == 0xe0; 209 } 210 211 static inline int amd_has_nb(struct cpu_hw_events *cpuc) 212 { 213 struct amd_nb *nb = cpuc->amd_nb; 214 215 return nb && nb->nb_id != -1; 216 } 217 218 static int amd_pmu_hw_config(struct perf_event *event) 219 { 220 int ret; 221 222 /* pass precise event sampling to ibs: */ 223 if (event->attr.precise_ip && get_ibs_caps()) 224 return -ENOENT; 225 226 if (has_branch_stack(event)) 227 return -EOPNOTSUPP; 228 229 ret = x86_pmu_hw_config(event); 230 if (ret) 231 return ret; 232 233 if (event->attr.type == PERF_TYPE_RAW) 234 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 235 236 return amd_core_hw_config(event); 237 } 238 239 static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, 240 struct perf_event *event) 241 { 242 struct amd_nb *nb = cpuc->amd_nb; 243 int i; 244 245 /* 246 * need to scan whole list because event may not have 247 * been assigned during scheduling 248 * 249 * no race condition possible because event can only 250 * be removed on one CPU at a time AND PMU is disabled 251 * when we come here 252 */ 253 for (i = 0; i < x86_pmu.num_counters; i++) { 254 if (cmpxchg(nb->owners + i, event, NULL) == event) 255 break; 256 } 257 } 258 259 /* 260 * AMD64 NorthBridge events need special treatment because 261 * counter access needs to be synchronized across all cores 262 * of a package. Refer to BKDG section 3.12 263 * 264 * NB events are events measuring L3 cache, Hypertransport 265 * traffic. They are identified by an event code >= 0xe00. 266 * They measure events on the NorthBride which is shared 267 * by all cores on a package. NB events are counted on a 268 * shared set of counters. When a NB event is programmed 269 * in a counter, the data actually comes from a shared 270 * counter. Thus, access to those counters needs to be 271 * synchronized. 272 * 273 * We implement the synchronization such that no two cores 274 * can be measuring NB events using the same counters. Thus, 275 * we maintain a per-NB allocation table. The available slot 276 * is propagated using the event_constraint structure. 277 * 278 * We provide only one choice for each NB event based on 279 * the fact that only NB events have restrictions. Consequently, 280 * if a counter is available, there is a guarantee the NB event 281 * will be assigned to it. If no slot is available, an empty 282 * constraint is returned and scheduling will eventually fail 283 * for this event. 284 * 285 * Note that all cores attached the same NB compete for the same 286 * counters to host NB events, this is why we use atomic ops. Some 287 * multi-chip CPUs may have more than one NB. 288 * 289 * Given that resources are allocated (cmpxchg), they must be 290 * eventually freed for others to use. This is accomplished by 291 * calling __amd_put_nb_event_constraints() 292 * 293 * Non NB events are not impacted by this restriction. 294 */ 295 static struct event_constraint * 296 __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 297 struct event_constraint *c) 298 { 299 struct hw_perf_event *hwc = &event->hw; 300 struct amd_nb *nb = cpuc->amd_nb; 301 struct perf_event *old; 302 int idx, new = -1; 303 304 if (!c) 305 c = &unconstrained; 306 307 if (cpuc->is_fake) 308 return c; 309 310 /* 311 * detect if already present, if so reuse 312 * 313 * cannot merge with actual allocation 314 * because of possible holes 315 * 316 * event can already be present yet not assigned (in hwc->idx) 317 * because of successive calls to x86_schedule_events() from 318 * hw_perf_group_sched_in() without hw_perf_enable() 319 */ 320 for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { 321 if (new == -1 || hwc->idx == idx) 322 /* assign free slot, prefer hwc->idx */ 323 old = cmpxchg(nb->owners + idx, NULL, event); 324 else if (nb->owners[idx] == event) 325 /* event already present */ 326 old = event; 327 else 328 continue; 329 330 if (old && old != event) 331 continue; 332 333 /* reassign to this slot */ 334 if (new != -1) 335 cmpxchg(nb->owners + new, event, NULL); 336 new = idx; 337 338 /* already present, reuse */ 339 if (old == event) 340 break; 341 } 342 343 if (new == -1) 344 return &emptyconstraint; 345 346 return &nb->event_constraints[new]; 347 } 348 349 static struct amd_nb *amd_alloc_nb(int cpu) 350 { 351 struct amd_nb *nb; 352 int i; 353 354 nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); 355 if (!nb) 356 return NULL; 357 358 nb->nb_id = -1; 359 360 /* 361 * initialize all possible NB constraints 362 */ 363 for (i = 0; i < x86_pmu.num_counters; i++) { 364 __set_bit(i, nb->event_constraints[i].idxmsk); 365 nb->event_constraints[i].weight = 1; 366 } 367 return nb; 368 } 369 370 static int amd_pmu_cpu_prepare(int cpu) 371 { 372 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 373 374 WARN_ON_ONCE(cpuc->amd_nb); 375 376 if (!x86_pmu.amd_nb_constraints) 377 return 0; 378 379 cpuc->amd_nb = amd_alloc_nb(cpu); 380 if (!cpuc->amd_nb) 381 return -ENOMEM; 382 383 return 0; 384 } 385 386 static void amd_pmu_cpu_starting(int cpu) 387 { 388 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 389 void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; 390 struct amd_nb *nb; 391 int i, nb_id; 392 393 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 394 395 if (!x86_pmu.amd_nb_constraints) 396 return; 397 398 nb_id = amd_get_nb_id(cpu); 399 WARN_ON_ONCE(nb_id == BAD_APICID); 400 401 for_each_online_cpu(i) { 402 nb = per_cpu(cpu_hw_events, i).amd_nb; 403 if (WARN_ON_ONCE(!nb)) 404 continue; 405 406 if (nb->nb_id == nb_id) { 407 *onln = cpuc->amd_nb; 408 cpuc->amd_nb = nb; 409 break; 410 } 411 } 412 413 cpuc->amd_nb->nb_id = nb_id; 414 cpuc->amd_nb->refcnt++; 415 } 416 417 static void amd_pmu_cpu_dead(int cpu) 418 { 419 struct cpu_hw_events *cpuhw; 420 421 if (!x86_pmu.amd_nb_constraints) 422 return; 423 424 cpuhw = &per_cpu(cpu_hw_events, cpu); 425 426 if (cpuhw->amd_nb) { 427 struct amd_nb *nb = cpuhw->amd_nb; 428 429 if (nb->nb_id == -1 || --nb->refcnt == 0) 430 kfree(nb); 431 432 cpuhw->amd_nb = NULL; 433 } 434 } 435 436 /* 437 * When a PMC counter overflows, an NMI is used to process the event and 438 * reset the counter. NMI latency can result in the counter being updated 439 * before the NMI can run, which can result in what appear to be spurious 440 * NMIs. This function is intended to wait for the NMI to run and reset 441 * the counter to avoid possible unhandled NMI messages. 442 */ 443 #define OVERFLOW_WAIT_COUNT 50 444 445 static void amd_pmu_wait_on_overflow(int idx) 446 { 447 unsigned int i; 448 u64 counter; 449 450 /* 451 * Wait for the counter to be reset if it has overflowed. This loop 452 * should exit very, very quickly, but just in case, don't wait 453 * forever... 454 */ 455 for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) { 456 rdmsrl(x86_pmu_event_addr(idx), counter); 457 if (counter & (1ULL << (x86_pmu.cntval_bits - 1))) 458 break; 459 460 /* Might be in IRQ context, so can't sleep */ 461 udelay(1); 462 } 463 } 464 465 static void amd_pmu_disable_all(void) 466 { 467 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 468 int idx; 469 470 x86_pmu_disable_all(); 471 472 /* 473 * This shouldn't be called from NMI context, but add a safeguard here 474 * to return, since if we're in NMI context we can't wait for an NMI 475 * to reset an overflowed counter value. 476 */ 477 if (in_nmi()) 478 return; 479 480 /* 481 * Check each counter for overflow and wait for it to be reset by the 482 * NMI if it has overflowed. This relies on the fact that all active 483 * counters are always enabled when this function is caled and 484 * ARCH_PERFMON_EVENTSEL_INT is always set. 485 */ 486 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 487 if (!test_bit(idx, cpuc->active_mask)) 488 continue; 489 490 amd_pmu_wait_on_overflow(idx); 491 } 492 } 493 494 static void amd_pmu_disable_event(struct perf_event *event) 495 { 496 x86_pmu_disable_event(event); 497 498 /* 499 * This can be called from NMI context (via x86_pmu_stop). The counter 500 * may have overflowed, but either way, we'll never see it get reset 501 * by the NMI if we're already in the NMI. And the NMI latency support 502 * below will take care of any pending NMI that might have been 503 * generated by the overflow. 504 */ 505 if (in_nmi()) 506 return; 507 508 amd_pmu_wait_on_overflow(event->hw.idx); 509 } 510 511 /* 512 * Because of NMI latency, if multiple PMC counters are active or other sources 513 * of NMIs are received, the perf NMI handler can handle one or more overflowed 514 * PMC counters outside of the NMI associated with the PMC overflow. If the NMI 515 * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel 516 * back-to-back NMI support won't be active. This PMC handler needs to take into 517 * account that this can occur, otherwise this could result in unknown NMI 518 * messages being issued. Examples of this is PMC overflow while in the NMI 519 * handler when multiple PMCs are active or PMC overflow while handling some 520 * other source of an NMI. 521 * 522 * Attempt to mitigate this by using the number of active PMCs to determine 523 * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset 524 * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the 525 * number of active PMCs or 2. The value of 2 is used in case an NMI does not 526 * arrive at the LAPIC in time to be collapsed into an already pending NMI. 527 */ 528 static int amd_pmu_handle_irq(struct pt_regs *regs) 529 { 530 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 531 int active, handled; 532 533 /* 534 * Obtain the active count before calling x86_pmu_handle_irq() since 535 * it is possible that x86_pmu_handle_irq() may make a counter 536 * inactive (through x86_pmu_stop). 537 */ 538 active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX); 539 540 /* Process any counter overflows */ 541 handled = x86_pmu_handle_irq(regs); 542 543 /* 544 * If a counter was handled, record the number of possible remaining 545 * NMIs that can occur. 546 */ 547 if (handled) { 548 this_cpu_write(perf_nmi_counter, 549 min_t(unsigned int, 2, active)); 550 551 return handled; 552 } 553 554 if (!this_cpu_read(perf_nmi_counter)) 555 return NMI_DONE; 556 557 this_cpu_dec(perf_nmi_counter); 558 559 return NMI_HANDLED; 560 } 561 562 static struct event_constraint * 563 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 564 struct perf_event *event) 565 { 566 /* 567 * if not NB event or no NB, then no constraints 568 */ 569 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) 570 return &unconstrained; 571 572 return __amd_get_nb_event_constraints(cpuc, event, NULL); 573 } 574 575 static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 576 struct perf_event *event) 577 { 578 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) 579 __amd_put_nb_event_constraints(cpuc, event); 580 } 581 582 PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 583 PMU_FORMAT_ATTR(umask, "config:8-15" ); 584 PMU_FORMAT_ATTR(edge, "config:18" ); 585 PMU_FORMAT_ATTR(inv, "config:23" ); 586 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 587 588 static struct attribute *amd_format_attr[] = { 589 &format_attr_event.attr, 590 &format_attr_umask.attr, 591 &format_attr_edge.attr, 592 &format_attr_inv.attr, 593 &format_attr_cmask.attr, 594 NULL, 595 }; 596 597 /* AMD Family 15h */ 598 599 #define AMD_EVENT_TYPE_MASK 0x000000F0ULL 600 601 #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL 602 #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL 603 #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL 604 #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL 605 #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL 606 #define AMD_EVENT_EX_LS 0x000000C0ULL 607 #define AMD_EVENT_DE 0x000000D0ULL 608 #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL 609 610 /* 611 * AMD family 15h event code/PMC mappings: 612 * 613 * type = event_code & 0x0F0: 614 * 615 * 0x000 FP PERF_CTL[5:3] 616 * 0x010 FP PERF_CTL[5:3] 617 * 0x020 LS PERF_CTL[5:0] 618 * 0x030 LS PERF_CTL[5:0] 619 * 0x040 DC PERF_CTL[5:0] 620 * 0x050 DC PERF_CTL[5:0] 621 * 0x060 CU PERF_CTL[2:0] 622 * 0x070 CU PERF_CTL[2:0] 623 * 0x080 IC/DE PERF_CTL[2:0] 624 * 0x090 IC/DE PERF_CTL[2:0] 625 * 0x0A0 --- 626 * 0x0B0 --- 627 * 0x0C0 EX/LS PERF_CTL[5:0] 628 * 0x0D0 DE PERF_CTL[2:0] 629 * 0x0E0 NB NB_PERF_CTL[3:0] 630 * 0x0F0 NB NB_PERF_CTL[3:0] 631 * 632 * Exceptions: 633 * 634 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) 635 * 0x003 FP PERF_CTL[3] 636 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) 637 * 0x00B FP PERF_CTL[3] 638 * 0x00D FP PERF_CTL[3] 639 * 0x023 DE PERF_CTL[2:0] 640 * 0x02D LS PERF_CTL[3] 641 * 0x02E LS PERF_CTL[3,0] 642 * 0x031 LS PERF_CTL[2:0] (**) 643 * 0x043 CU PERF_CTL[2:0] 644 * 0x045 CU PERF_CTL[2:0] 645 * 0x046 CU PERF_CTL[2:0] 646 * 0x054 CU PERF_CTL[2:0] 647 * 0x055 CU PERF_CTL[2:0] 648 * 0x08F IC PERF_CTL[0] 649 * 0x187 DE PERF_CTL[0] 650 * 0x188 DE PERF_CTL[0] 651 * 0x0DB EX PERF_CTL[5:0] 652 * 0x0DC LS PERF_CTL[5:0] 653 * 0x0DD LS PERF_CTL[5:0] 654 * 0x0DE LS PERF_CTL[5:0] 655 * 0x0DF LS PERF_CTL[5:0] 656 * 0x1C0 EX PERF_CTL[5:3] 657 * 0x1D6 EX PERF_CTL[5:0] 658 * 0x1D8 EX PERF_CTL[5:0] 659 * 660 * (*) depending on the umask all FPU counters may be used 661 * (**) only one unitmask enabled at a time 662 */ 663 664 static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 665 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 666 static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 667 static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 668 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 669 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 670 671 static struct event_constraint * 672 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, 673 struct perf_event *event) 674 { 675 struct hw_perf_event *hwc = &event->hw; 676 unsigned int event_code = amd_get_event_code(hwc); 677 678 switch (event_code & AMD_EVENT_TYPE_MASK) { 679 case AMD_EVENT_FP: 680 switch (event_code) { 681 case 0x000: 682 if (!(hwc->config & 0x0000F000ULL)) 683 break; 684 if (!(hwc->config & 0x00000F00ULL)) 685 break; 686 return &amd_f15_PMC3; 687 case 0x004: 688 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 689 break; 690 return &amd_f15_PMC3; 691 case 0x003: 692 case 0x00B: 693 case 0x00D: 694 return &amd_f15_PMC3; 695 } 696 return &amd_f15_PMC53; 697 case AMD_EVENT_LS: 698 case AMD_EVENT_DC: 699 case AMD_EVENT_EX_LS: 700 switch (event_code) { 701 case 0x023: 702 case 0x043: 703 case 0x045: 704 case 0x046: 705 case 0x054: 706 case 0x055: 707 return &amd_f15_PMC20; 708 case 0x02D: 709 return &amd_f15_PMC3; 710 case 0x02E: 711 return &amd_f15_PMC30; 712 case 0x031: 713 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 714 return &amd_f15_PMC20; 715 return &emptyconstraint; 716 case 0x1C0: 717 return &amd_f15_PMC53; 718 default: 719 return &amd_f15_PMC50; 720 } 721 case AMD_EVENT_CU: 722 case AMD_EVENT_IC_DE: 723 case AMD_EVENT_DE: 724 switch (event_code) { 725 case 0x08F: 726 case 0x187: 727 case 0x188: 728 return &amd_f15_PMC0; 729 case 0x0DB ... 0x0DF: 730 case 0x1D6: 731 case 0x1D8: 732 return &amd_f15_PMC50; 733 default: 734 return &amd_f15_PMC20; 735 } 736 case AMD_EVENT_NB: 737 /* moved to uncore.c */ 738 return &emptyconstraint; 739 default: 740 return &emptyconstraint; 741 } 742 } 743 744 static ssize_t amd_event_sysfs_show(char *page, u64 config) 745 { 746 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | 747 (config & AMD64_EVENTSEL_EVENT) >> 24; 748 749 return x86_event_sysfs_show(page, config, event); 750 } 751 752 static __initconst const struct x86_pmu amd_pmu = { 753 .name = "AMD", 754 .handle_irq = amd_pmu_handle_irq, 755 .disable_all = amd_pmu_disable_all, 756 .enable_all = x86_pmu_enable_all, 757 .enable = x86_pmu_enable_event, 758 .disable = amd_pmu_disable_event, 759 .hw_config = amd_pmu_hw_config, 760 .schedule_events = x86_schedule_events, 761 .eventsel = MSR_K7_EVNTSEL0, 762 .perfctr = MSR_K7_PERFCTR0, 763 .addr_offset = amd_pmu_addr_offset, 764 .event_map = amd_pmu_event_map, 765 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 766 .num_counters = AMD64_NUM_COUNTERS, 767 .cntval_bits = 48, 768 .cntval_mask = (1ULL << 48) - 1, 769 .apic = 1, 770 /* use highest bit to detect overflow */ 771 .max_period = (1ULL << 47) - 1, 772 .get_event_constraints = amd_get_event_constraints, 773 .put_event_constraints = amd_put_event_constraints, 774 775 .format_attrs = amd_format_attr, 776 .events_sysfs_show = amd_event_sysfs_show, 777 778 .cpu_prepare = amd_pmu_cpu_prepare, 779 .cpu_starting = amd_pmu_cpu_starting, 780 .cpu_dead = amd_pmu_cpu_dead, 781 782 .amd_nb_constraints = 1, 783 }; 784 785 static int __init amd_core_pmu_init(void) 786 { 787 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 788 return 0; 789 790 switch (boot_cpu_data.x86) { 791 case 0x15: 792 pr_cont("Fam15h "); 793 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 794 break; 795 case 0x17: 796 pr_cont("Fam17h "); 797 /* 798 * In family 17h, there are no event constraints in the PMC hardware. 799 * We fallback to using default amd_get_event_constraints. 800 */ 801 break; 802 case 0x18: 803 pr_cont("Fam18h "); 804 /* Using default amd_get_event_constraints. */ 805 break; 806 default: 807 pr_err("core perfctr but no constraints; unknown hardware!\n"); 808 return -ENODEV; 809 } 810 811 /* 812 * If core performance counter extensions exists, we must use 813 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 814 * amd_pmu_addr_offset(). 815 */ 816 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 817 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 818 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; 819 /* 820 * AMD Core perfctr has separate MSRs for the NB events, see 821 * the amd/uncore.c driver. 822 */ 823 x86_pmu.amd_nb_constraints = 0; 824 825 pr_cont("core perfctr, "); 826 return 0; 827 } 828 829 __init int amd_pmu_init(void) 830 { 831 int ret; 832 833 /* Performance-monitoring supported from K7 and later: */ 834 if (boot_cpu_data.x86 < 6) 835 return -ENODEV; 836 837 x86_pmu = amd_pmu; 838 839 ret = amd_core_pmu_init(); 840 if (ret) 841 return ret; 842 843 if (num_possible_cpus() == 1) { 844 /* 845 * No point in allocating data structures to serialize 846 * against other CPUs, when there is only the one CPU. 847 */ 848 x86_pmu.amd_nb_constraints = 0; 849 } 850 851 /* Events are common for all AMDs */ 852 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 853 sizeof(hw_cache_event_ids)); 854 855 return 0; 856 } 857 858 void amd_pmu_enable_virt(void) 859 { 860 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 861 862 cpuc->perf_ctr_virt_mask = 0; 863 864 /* Reload all events */ 865 amd_pmu_disable_all(); 866 x86_pmu_enable_all(0); 867 } 868 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); 869 870 void amd_pmu_disable_virt(void) 871 { 872 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 873 874 /* 875 * We only mask out the Host-only bit so that host-only counting works 876 * when SVM is disabled. If someone sets up a guest-only counter when 877 * SVM is disabled the Guest-only bits still gets set and the counter 878 * will not count anything. 879 */ 880 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 881 882 /* Reload all events */ 883 amd_pmu_disable_all(); 884 x86_pmu_enable_all(0); 885 } 886 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); 887