1 #include <linux/perf_event.h> 2 #include <linux/export.h> 3 #include <linux/types.h> 4 #include <linux/init.h> 5 #include <linux/slab.h> 6 #include <asm/apicdef.h> 7 8 #include "../perf_event.h" 9 10 static __initconst const u64 amd_hw_cache_event_ids 11 [PERF_COUNT_HW_CACHE_MAX] 12 [PERF_COUNT_HW_CACHE_OP_MAX] 13 [PERF_COUNT_HW_CACHE_RESULT_MAX] = 14 { 15 [ C(L1D) ] = { 16 [ C(OP_READ) ] = { 17 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 18 [ C(RESULT_MISS) ] = 0x0141, /* Data Cache Misses */ 19 }, 20 [ C(OP_WRITE) ] = { 21 [ C(RESULT_ACCESS) ] = 0, 22 [ C(RESULT_MISS) ] = 0, 23 }, 24 [ C(OP_PREFETCH) ] = { 25 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ 26 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ 27 }, 28 }, 29 [ C(L1I ) ] = { 30 [ C(OP_READ) ] = { 31 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ 32 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ 33 }, 34 [ C(OP_WRITE) ] = { 35 [ C(RESULT_ACCESS) ] = -1, 36 [ C(RESULT_MISS) ] = -1, 37 }, 38 [ C(OP_PREFETCH) ] = { 39 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ 40 [ C(RESULT_MISS) ] = 0, 41 }, 42 }, 43 [ C(LL ) ] = { 44 [ C(OP_READ) ] = { 45 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ 46 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ 47 }, 48 [ C(OP_WRITE) ] = { 49 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ 50 [ C(RESULT_MISS) ] = 0, 51 }, 52 [ C(OP_PREFETCH) ] = { 53 [ C(RESULT_ACCESS) ] = 0, 54 [ C(RESULT_MISS) ] = 0, 55 }, 56 }, 57 [ C(DTLB) ] = { 58 [ C(OP_READ) ] = { 59 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ 60 [ C(RESULT_MISS) ] = 0x0746, /* L1_DTLB_AND_L2_DLTB_MISS.ALL */ 61 }, 62 [ C(OP_WRITE) ] = { 63 [ C(RESULT_ACCESS) ] = 0, 64 [ C(RESULT_MISS) ] = 0, 65 }, 66 [ C(OP_PREFETCH) ] = { 67 [ C(RESULT_ACCESS) ] = 0, 68 [ C(RESULT_MISS) ] = 0, 69 }, 70 }, 71 [ C(ITLB) ] = { 72 [ C(OP_READ) ] = { 73 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ 74 [ C(RESULT_MISS) ] = 0x0385, /* L1_ITLB_AND_L2_ITLB_MISS.ALL */ 75 }, 76 [ C(OP_WRITE) ] = { 77 [ C(RESULT_ACCESS) ] = -1, 78 [ C(RESULT_MISS) ] = -1, 79 }, 80 [ C(OP_PREFETCH) ] = { 81 [ C(RESULT_ACCESS) ] = -1, 82 [ C(RESULT_MISS) ] = -1, 83 }, 84 }, 85 [ C(BPU ) ] = { 86 [ C(OP_READ) ] = { 87 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ 88 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ 89 }, 90 [ C(OP_WRITE) ] = { 91 [ C(RESULT_ACCESS) ] = -1, 92 [ C(RESULT_MISS) ] = -1, 93 }, 94 [ C(OP_PREFETCH) ] = { 95 [ C(RESULT_ACCESS) ] = -1, 96 [ C(RESULT_MISS) ] = -1, 97 }, 98 }, 99 [ C(NODE) ] = { 100 [ C(OP_READ) ] = { 101 [ C(RESULT_ACCESS) ] = 0xb8e9, /* CPU Request to Memory, l+r */ 102 [ C(RESULT_MISS) ] = 0x98e9, /* CPU Request to Memory, r */ 103 }, 104 [ C(OP_WRITE) ] = { 105 [ C(RESULT_ACCESS) ] = -1, 106 [ C(RESULT_MISS) ] = -1, 107 }, 108 [ C(OP_PREFETCH) ] = { 109 [ C(RESULT_ACCESS) ] = -1, 110 [ C(RESULT_MISS) ] = -1, 111 }, 112 }, 113 }; 114 115 /* 116 * AMD Performance Monitor K7 and later. 117 */ 118 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = 119 { 120 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 121 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 122 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, 123 [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, 124 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 125 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 126 [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 127 [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 128 }; 129 130 static u64 amd_pmu_event_map(int hw_event) 131 { 132 return amd_perfmon_event_map[hw_event]; 133 } 134 135 /* 136 * Previously calculated offsets 137 */ 138 static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; 139 static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; 140 141 /* 142 * Legacy CPUs: 143 * 4 counters starting at 0xc0010000 each offset by 1 144 * 145 * CPUs with core performance counter extensions: 146 * 6 counters starting at 0xc0010200 each offset by 2 147 */ 148 static inline int amd_pmu_addr_offset(int index, bool eventsel) 149 { 150 int offset; 151 152 if (!index) 153 return index; 154 155 if (eventsel) 156 offset = event_offsets[index]; 157 else 158 offset = count_offsets[index]; 159 160 if (offset) 161 return offset; 162 163 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 164 offset = index; 165 else 166 offset = index << 1; 167 168 if (eventsel) 169 event_offsets[index] = offset; 170 else 171 count_offsets[index] = offset; 172 173 return offset; 174 } 175 176 static int amd_core_hw_config(struct perf_event *event) 177 { 178 if (event->attr.exclude_host && event->attr.exclude_guest) 179 /* 180 * When HO == GO == 1 the hardware treats that as GO == HO == 0 181 * and will count in both modes. We don't want to count in that 182 * case so we emulate no-counting by setting US = OS = 0. 183 */ 184 event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | 185 ARCH_PERFMON_EVENTSEL_OS); 186 else if (event->attr.exclude_host) 187 event->hw.config |= AMD64_EVENTSEL_GUESTONLY; 188 else if (event->attr.exclude_guest) 189 event->hw.config |= AMD64_EVENTSEL_HOSTONLY; 190 191 return 0; 192 } 193 194 /* 195 * AMD64 events are detected based on their event codes. 196 */ 197 static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) 198 { 199 return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); 200 } 201 202 static inline int amd_is_nb_event(struct hw_perf_event *hwc) 203 { 204 return (hwc->config & 0xe0) == 0xe0; 205 } 206 207 static inline int amd_has_nb(struct cpu_hw_events *cpuc) 208 { 209 struct amd_nb *nb = cpuc->amd_nb; 210 211 return nb && nb->nb_id != -1; 212 } 213 214 static int amd_pmu_hw_config(struct perf_event *event) 215 { 216 int ret; 217 218 /* pass precise event sampling to ibs: */ 219 if (event->attr.precise_ip && get_ibs_caps()) 220 return -ENOENT; 221 222 if (has_branch_stack(event)) 223 return -EOPNOTSUPP; 224 225 ret = x86_pmu_hw_config(event); 226 if (ret) 227 return ret; 228 229 if (event->attr.type == PERF_TYPE_RAW) 230 event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; 231 232 return amd_core_hw_config(event); 233 } 234 235 static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, 236 struct perf_event *event) 237 { 238 struct amd_nb *nb = cpuc->amd_nb; 239 int i; 240 241 /* 242 * need to scan whole list because event may not have 243 * been assigned during scheduling 244 * 245 * no race condition possible because event can only 246 * be removed on one CPU at a time AND PMU is disabled 247 * when we come here 248 */ 249 for (i = 0; i < x86_pmu.num_counters; i++) { 250 if (cmpxchg(nb->owners + i, event, NULL) == event) 251 break; 252 } 253 } 254 255 /* 256 * AMD64 NorthBridge events need special treatment because 257 * counter access needs to be synchronized across all cores 258 * of a package. Refer to BKDG section 3.12 259 * 260 * NB events are events measuring L3 cache, Hypertransport 261 * traffic. They are identified by an event code >= 0xe00. 262 * They measure events on the NorthBride which is shared 263 * by all cores on a package. NB events are counted on a 264 * shared set of counters. When a NB event is programmed 265 * in a counter, the data actually comes from a shared 266 * counter. Thus, access to those counters needs to be 267 * synchronized. 268 * 269 * We implement the synchronization such that no two cores 270 * can be measuring NB events using the same counters. Thus, 271 * we maintain a per-NB allocation table. The available slot 272 * is propagated using the event_constraint structure. 273 * 274 * We provide only one choice for each NB event based on 275 * the fact that only NB events have restrictions. Consequently, 276 * if a counter is available, there is a guarantee the NB event 277 * will be assigned to it. If no slot is available, an empty 278 * constraint is returned and scheduling will eventually fail 279 * for this event. 280 * 281 * Note that all cores attached the same NB compete for the same 282 * counters to host NB events, this is why we use atomic ops. Some 283 * multi-chip CPUs may have more than one NB. 284 * 285 * Given that resources are allocated (cmpxchg), they must be 286 * eventually freed for others to use. This is accomplished by 287 * calling __amd_put_nb_event_constraints() 288 * 289 * Non NB events are not impacted by this restriction. 290 */ 291 static struct event_constraint * 292 __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, 293 struct event_constraint *c) 294 { 295 struct hw_perf_event *hwc = &event->hw; 296 struct amd_nb *nb = cpuc->amd_nb; 297 struct perf_event *old; 298 int idx, new = -1; 299 300 if (!c) 301 c = &unconstrained; 302 303 if (cpuc->is_fake) 304 return c; 305 306 /* 307 * detect if already present, if so reuse 308 * 309 * cannot merge with actual allocation 310 * because of possible holes 311 * 312 * event can already be present yet not assigned (in hwc->idx) 313 * because of successive calls to x86_schedule_events() from 314 * hw_perf_group_sched_in() without hw_perf_enable() 315 */ 316 for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { 317 if (new == -1 || hwc->idx == idx) 318 /* assign free slot, prefer hwc->idx */ 319 old = cmpxchg(nb->owners + idx, NULL, event); 320 else if (nb->owners[idx] == event) 321 /* event already present */ 322 old = event; 323 else 324 continue; 325 326 if (old && old != event) 327 continue; 328 329 /* reassign to this slot */ 330 if (new != -1) 331 cmpxchg(nb->owners + new, event, NULL); 332 new = idx; 333 334 /* already present, reuse */ 335 if (old == event) 336 break; 337 } 338 339 if (new == -1) 340 return &emptyconstraint; 341 342 return &nb->event_constraints[new]; 343 } 344 345 static struct amd_nb *amd_alloc_nb(int cpu) 346 { 347 struct amd_nb *nb; 348 int i; 349 350 nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu)); 351 if (!nb) 352 return NULL; 353 354 nb->nb_id = -1; 355 356 /* 357 * initialize all possible NB constraints 358 */ 359 for (i = 0; i < x86_pmu.num_counters; i++) { 360 __set_bit(i, nb->event_constraints[i].idxmsk); 361 nb->event_constraints[i].weight = 1; 362 } 363 return nb; 364 } 365 366 static int amd_pmu_cpu_prepare(int cpu) 367 { 368 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 369 370 WARN_ON_ONCE(cpuc->amd_nb); 371 372 if (!x86_pmu.amd_nb_constraints) 373 return 0; 374 375 cpuc->amd_nb = amd_alloc_nb(cpu); 376 if (!cpuc->amd_nb) 377 return -ENOMEM; 378 379 return 0; 380 } 381 382 static void amd_pmu_cpu_starting(int cpu) 383 { 384 struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); 385 void **onln = &cpuc->kfree_on_online[X86_PERF_KFREE_SHARED]; 386 struct amd_nb *nb; 387 int i, nb_id; 388 389 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 390 391 if (!x86_pmu.amd_nb_constraints) 392 return; 393 394 nb_id = amd_get_nb_id(cpu); 395 WARN_ON_ONCE(nb_id == BAD_APICID); 396 397 for_each_online_cpu(i) { 398 nb = per_cpu(cpu_hw_events, i).amd_nb; 399 if (WARN_ON_ONCE(!nb)) 400 continue; 401 402 if (nb->nb_id == nb_id) { 403 *onln = cpuc->amd_nb; 404 cpuc->amd_nb = nb; 405 break; 406 } 407 } 408 409 cpuc->amd_nb->nb_id = nb_id; 410 cpuc->amd_nb->refcnt++; 411 } 412 413 static void amd_pmu_cpu_dead(int cpu) 414 { 415 struct cpu_hw_events *cpuhw; 416 417 if (!x86_pmu.amd_nb_constraints) 418 return; 419 420 cpuhw = &per_cpu(cpu_hw_events, cpu); 421 422 if (cpuhw->amd_nb) { 423 struct amd_nb *nb = cpuhw->amd_nb; 424 425 if (nb->nb_id == -1 || --nb->refcnt == 0) 426 kfree(nb); 427 428 cpuhw->amd_nb = NULL; 429 } 430 } 431 432 static struct event_constraint * 433 amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx, 434 struct perf_event *event) 435 { 436 /* 437 * if not NB event or no NB, then no constraints 438 */ 439 if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) 440 return &unconstrained; 441 442 return __amd_get_nb_event_constraints(cpuc, event, NULL); 443 } 444 445 static void amd_put_event_constraints(struct cpu_hw_events *cpuc, 446 struct perf_event *event) 447 { 448 if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) 449 __amd_put_nb_event_constraints(cpuc, event); 450 } 451 452 PMU_FORMAT_ATTR(event, "config:0-7,32-35"); 453 PMU_FORMAT_ATTR(umask, "config:8-15" ); 454 PMU_FORMAT_ATTR(edge, "config:18" ); 455 PMU_FORMAT_ATTR(inv, "config:23" ); 456 PMU_FORMAT_ATTR(cmask, "config:24-31" ); 457 458 static struct attribute *amd_format_attr[] = { 459 &format_attr_event.attr, 460 &format_attr_umask.attr, 461 &format_attr_edge.attr, 462 &format_attr_inv.attr, 463 &format_attr_cmask.attr, 464 NULL, 465 }; 466 467 /* AMD Family 15h */ 468 469 #define AMD_EVENT_TYPE_MASK 0x000000F0ULL 470 471 #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL 472 #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL 473 #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL 474 #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL 475 #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL 476 #define AMD_EVENT_EX_LS 0x000000C0ULL 477 #define AMD_EVENT_DE 0x000000D0ULL 478 #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL 479 480 /* 481 * AMD family 15h event code/PMC mappings: 482 * 483 * type = event_code & 0x0F0: 484 * 485 * 0x000 FP PERF_CTL[5:3] 486 * 0x010 FP PERF_CTL[5:3] 487 * 0x020 LS PERF_CTL[5:0] 488 * 0x030 LS PERF_CTL[5:0] 489 * 0x040 DC PERF_CTL[5:0] 490 * 0x050 DC PERF_CTL[5:0] 491 * 0x060 CU PERF_CTL[2:0] 492 * 0x070 CU PERF_CTL[2:0] 493 * 0x080 IC/DE PERF_CTL[2:0] 494 * 0x090 IC/DE PERF_CTL[2:0] 495 * 0x0A0 --- 496 * 0x0B0 --- 497 * 0x0C0 EX/LS PERF_CTL[5:0] 498 * 0x0D0 DE PERF_CTL[2:0] 499 * 0x0E0 NB NB_PERF_CTL[3:0] 500 * 0x0F0 NB NB_PERF_CTL[3:0] 501 * 502 * Exceptions: 503 * 504 * 0x000 FP PERF_CTL[3], PERF_CTL[5:3] (*) 505 * 0x003 FP PERF_CTL[3] 506 * 0x004 FP PERF_CTL[3], PERF_CTL[5:3] (*) 507 * 0x00B FP PERF_CTL[3] 508 * 0x00D FP PERF_CTL[3] 509 * 0x023 DE PERF_CTL[2:0] 510 * 0x02D LS PERF_CTL[3] 511 * 0x02E LS PERF_CTL[3,0] 512 * 0x031 LS PERF_CTL[2:0] (**) 513 * 0x043 CU PERF_CTL[2:0] 514 * 0x045 CU PERF_CTL[2:0] 515 * 0x046 CU PERF_CTL[2:0] 516 * 0x054 CU PERF_CTL[2:0] 517 * 0x055 CU PERF_CTL[2:0] 518 * 0x08F IC PERF_CTL[0] 519 * 0x187 DE PERF_CTL[0] 520 * 0x188 DE PERF_CTL[0] 521 * 0x0DB EX PERF_CTL[5:0] 522 * 0x0DC LS PERF_CTL[5:0] 523 * 0x0DD LS PERF_CTL[5:0] 524 * 0x0DE LS PERF_CTL[5:0] 525 * 0x0DF LS PERF_CTL[5:0] 526 * 0x1C0 EX PERF_CTL[5:3] 527 * 0x1D6 EX PERF_CTL[5:0] 528 * 0x1D8 EX PERF_CTL[5:0] 529 * 530 * (*) depending on the umask all FPU counters may be used 531 * (**) only one unitmask enabled at a time 532 */ 533 534 static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); 535 static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); 536 static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); 537 static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); 538 static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); 539 static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); 540 541 static struct event_constraint * 542 amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, 543 struct perf_event *event) 544 { 545 struct hw_perf_event *hwc = &event->hw; 546 unsigned int event_code = amd_get_event_code(hwc); 547 548 switch (event_code & AMD_EVENT_TYPE_MASK) { 549 case AMD_EVENT_FP: 550 switch (event_code) { 551 case 0x000: 552 if (!(hwc->config & 0x0000F000ULL)) 553 break; 554 if (!(hwc->config & 0x00000F00ULL)) 555 break; 556 return &amd_f15_PMC3; 557 case 0x004: 558 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 559 break; 560 return &amd_f15_PMC3; 561 case 0x003: 562 case 0x00B: 563 case 0x00D: 564 return &amd_f15_PMC3; 565 } 566 return &amd_f15_PMC53; 567 case AMD_EVENT_LS: 568 case AMD_EVENT_DC: 569 case AMD_EVENT_EX_LS: 570 switch (event_code) { 571 case 0x023: 572 case 0x043: 573 case 0x045: 574 case 0x046: 575 case 0x054: 576 case 0x055: 577 return &amd_f15_PMC20; 578 case 0x02D: 579 return &amd_f15_PMC3; 580 case 0x02E: 581 return &amd_f15_PMC30; 582 case 0x031: 583 if (hweight_long(hwc->config & ARCH_PERFMON_EVENTSEL_UMASK) <= 1) 584 return &amd_f15_PMC20; 585 return &emptyconstraint; 586 case 0x1C0: 587 return &amd_f15_PMC53; 588 default: 589 return &amd_f15_PMC50; 590 } 591 case AMD_EVENT_CU: 592 case AMD_EVENT_IC_DE: 593 case AMD_EVENT_DE: 594 switch (event_code) { 595 case 0x08F: 596 case 0x187: 597 case 0x188: 598 return &amd_f15_PMC0; 599 case 0x0DB ... 0x0DF: 600 case 0x1D6: 601 case 0x1D8: 602 return &amd_f15_PMC50; 603 default: 604 return &amd_f15_PMC20; 605 } 606 case AMD_EVENT_NB: 607 /* moved to uncore.c */ 608 return &emptyconstraint; 609 default: 610 return &emptyconstraint; 611 } 612 } 613 614 static ssize_t amd_event_sysfs_show(char *page, u64 config) 615 { 616 u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) | 617 (config & AMD64_EVENTSEL_EVENT) >> 24; 618 619 return x86_event_sysfs_show(page, config, event); 620 } 621 622 static __initconst const struct x86_pmu amd_pmu = { 623 .name = "AMD", 624 .handle_irq = x86_pmu_handle_irq, 625 .disable_all = x86_pmu_disable_all, 626 .enable_all = x86_pmu_enable_all, 627 .enable = x86_pmu_enable_event, 628 .disable = x86_pmu_disable_event, 629 .hw_config = amd_pmu_hw_config, 630 .schedule_events = x86_schedule_events, 631 .eventsel = MSR_K7_EVNTSEL0, 632 .perfctr = MSR_K7_PERFCTR0, 633 .addr_offset = amd_pmu_addr_offset, 634 .event_map = amd_pmu_event_map, 635 .max_events = ARRAY_SIZE(amd_perfmon_event_map), 636 .num_counters = AMD64_NUM_COUNTERS, 637 .cntval_bits = 48, 638 .cntval_mask = (1ULL << 48) - 1, 639 .apic = 1, 640 /* use highest bit to detect overflow */ 641 .max_period = (1ULL << 47) - 1, 642 .get_event_constraints = amd_get_event_constraints, 643 .put_event_constraints = amd_put_event_constraints, 644 645 .format_attrs = amd_format_attr, 646 .events_sysfs_show = amd_event_sysfs_show, 647 648 .cpu_prepare = amd_pmu_cpu_prepare, 649 .cpu_starting = amd_pmu_cpu_starting, 650 .cpu_dead = amd_pmu_cpu_dead, 651 652 .amd_nb_constraints = 1, 653 }; 654 655 static int __init amd_core_pmu_init(void) 656 { 657 if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) 658 return 0; 659 660 switch (boot_cpu_data.x86) { 661 case 0x15: 662 pr_cont("Fam15h "); 663 x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; 664 break; 665 case 0x17: 666 pr_cont("Fam17h "); 667 /* 668 * In family 17h, there are no event constraints in the PMC hardware. 669 * We fallback to using default amd_get_event_constraints. 670 */ 671 break; 672 case 0x18: 673 pr_cont("Fam18h "); 674 /* Using default amd_get_event_constraints. */ 675 break; 676 default: 677 pr_err("core perfctr but no constraints; unknown hardware!\n"); 678 return -ENODEV; 679 } 680 681 /* 682 * If core performance counter extensions exists, we must use 683 * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also 684 * amd_pmu_addr_offset(). 685 */ 686 x86_pmu.eventsel = MSR_F15H_PERF_CTL; 687 x86_pmu.perfctr = MSR_F15H_PERF_CTR; 688 x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; 689 /* 690 * AMD Core perfctr has separate MSRs for the NB events, see 691 * the amd/uncore.c driver. 692 */ 693 x86_pmu.amd_nb_constraints = 0; 694 695 pr_cont("core perfctr, "); 696 return 0; 697 } 698 699 __init int amd_pmu_init(void) 700 { 701 int ret; 702 703 /* Performance-monitoring supported from K7 and later: */ 704 if (boot_cpu_data.x86 < 6) 705 return -ENODEV; 706 707 x86_pmu = amd_pmu; 708 709 ret = amd_core_pmu_init(); 710 if (ret) 711 return ret; 712 713 if (num_possible_cpus() == 1) { 714 /* 715 * No point in allocating data structures to serialize 716 * against other CPUs, when there is only the one CPU. 717 */ 718 x86_pmu.amd_nb_constraints = 0; 719 } 720 721 /* Events are common for all AMDs */ 722 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, 723 sizeof(hw_cache_event_ids)); 724 725 return 0; 726 } 727 728 void amd_pmu_enable_virt(void) 729 { 730 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 731 732 cpuc->perf_ctr_virt_mask = 0; 733 734 /* Reload all events */ 735 x86_pmu_disable_all(); 736 x86_pmu_enable_all(0); 737 } 738 EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); 739 740 void amd_pmu_disable_virt(void) 741 { 742 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 743 744 /* 745 * We only mask out the Host-only bit so that host-only counting works 746 * when SVM is disabled. If someone sets up a guest-only counter when 747 * SVM is disabled the Guest-only bits still gets set and the counter 748 * will not count anything. 749 */ 750 cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; 751 752 /* Reload all events */ 753 x86_pmu_disable_all(); 754 x86_pmu_enable_all(0); 755 } 756 EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); 757