1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/types.h> 3 #include <linux/interrupt.h> 4 5 #include <asm/xen/hypercall.h> 6 #include <xen/xen.h> 7 #include <xen/page.h> 8 #include <xen/interface/xen.h> 9 #include <xen/interface/vcpu.h> 10 #include <xen/interface/xenpmu.h> 11 12 #include "xen-ops.h" 13 14 /* x86_pmu.handle_irq definition */ 15 #include "../events/perf_event.h" 16 17 #define XENPMU_IRQ_PROCESSING 1 18 struct xenpmu { 19 /* Shared page between hypervisor and domain */ 20 struct xen_pmu_data *xenpmu_data; 21 22 uint8_t flags; 23 }; 24 static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared); 25 #define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data) 26 #define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags) 27 28 /* Macro for computing address of a PMU MSR bank */ 29 #define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \ 30 (uintptr_t)ctxt->field)) 31 32 /* AMD PMU */ 33 #define F15H_NUM_COUNTERS 6 34 #define F10H_NUM_COUNTERS 4 35 36 static __read_mostly uint32_t amd_counters_base; 37 static __read_mostly uint32_t amd_ctrls_base; 38 static __read_mostly int amd_msr_step; 39 static __read_mostly int k7_counters_mirrored; 40 static __read_mostly int amd_num_counters; 41 42 /* Intel PMU */ 43 #define MSR_TYPE_COUNTER 0 44 #define MSR_TYPE_CTRL 1 45 #define MSR_TYPE_GLOBAL 2 46 #define MSR_TYPE_ARCH_COUNTER 3 47 #define MSR_TYPE_ARCH_CTRL 4 48 49 /* Number of general pmu registers (CPUID.EAX[0xa].EAX[8..15]) */ 50 #define PMU_GENERAL_NR_SHIFT 8 51 #define PMU_GENERAL_NR_BITS 8 52 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) \ 53 << PMU_GENERAL_NR_SHIFT) 54 55 /* Number of fixed pmu registers (CPUID.EDX[0xa].EDX[0..4]) */ 56 #define PMU_FIXED_NR_SHIFT 0 57 #define PMU_FIXED_NR_BITS 5 58 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) - 1) \ 59 << PMU_FIXED_NR_SHIFT) 60 61 /* Alias registers (0x4c1) for full-width writes to PMCs */ 62 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0)) 63 64 #define INTEL_PMC_TYPE_SHIFT 30 65 66 static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters; 67 68 69 static void xen_pmu_arch_init(void) 70 { 71 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 72 73 switch (boot_cpu_data.x86) { 74 case 0x15: 75 amd_num_counters = F15H_NUM_COUNTERS; 76 amd_counters_base = MSR_F15H_PERF_CTR; 77 amd_ctrls_base = MSR_F15H_PERF_CTL; 78 amd_msr_step = 2; 79 k7_counters_mirrored = 1; 80 break; 81 case 0x10: 82 case 0x12: 83 case 0x14: 84 case 0x16: 85 default: 86 amd_num_counters = F10H_NUM_COUNTERS; 87 amd_counters_base = MSR_K7_PERFCTR0; 88 amd_ctrls_base = MSR_K7_EVNTSEL0; 89 amd_msr_step = 1; 90 k7_counters_mirrored = 0; 91 break; 92 } 93 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { 94 amd_num_counters = F10H_NUM_COUNTERS; 95 amd_counters_base = MSR_K7_PERFCTR0; 96 amd_ctrls_base = MSR_K7_EVNTSEL0; 97 amd_msr_step = 1; 98 k7_counters_mirrored = 0; 99 } else { 100 uint32_t eax, ebx, ecx, edx; 101 102 cpuid(0xa, &eax, &ebx, &ecx, &edx); 103 104 intel_num_arch_counters = (eax & PMU_GENERAL_NR_MASK) >> 105 PMU_GENERAL_NR_SHIFT; 106 intel_num_fixed_counters = (edx & PMU_FIXED_NR_MASK) >> 107 PMU_FIXED_NR_SHIFT; 108 } 109 } 110 111 static inline uint32_t get_fam15h_addr(u32 addr) 112 { 113 switch (addr) { 114 case MSR_K7_PERFCTR0: 115 case MSR_K7_PERFCTR1: 116 case MSR_K7_PERFCTR2: 117 case MSR_K7_PERFCTR3: 118 return MSR_F15H_PERF_CTR + (addr - MSR_K7_PERFCTR0); 119 case MSR_K7_EVNTSEL0: 120 case MSR_K7_EVNTSEL1: 121 case MSR_K7_EVNTSEL2: 122 case MSR_K7_EVNTSEL3: 123 return MSR_F15H_PERF_CTL + (addr - MSR_K7_EVNTSEL0); 124 default: 125 break; 126 } 127 128 return addr; 129 } 130 131 static inline bool is_amd_pmu_msr(unsigned int msr) 132 { 133 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && 134 boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) 135 return false; 136 137 if ((msr >= MSR_F15H_PERF_CTL && 138 msr < MSR_F15H_PERF_CTR + (amd_num_counters * 2)) || 139 (msr >= MSR_K7_EVNTSEL0 && 140 msr < MSR_K7_PERFCTR0 + amd_num_counters)) 141 return true; 142 143 return false; 144 } 145 146 static bool is_intel_pmu_msr(u32 msr_index, int *type, int *index) 147 { 148 u32 msr_index_pmc; 149 150 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL && 151 boot_cpu_data.x86_vendor != X86_VENDOR_CENTAUR && 152 boot_cpu_data.x86_vendor != X86_VENDOR_ZHAOXIN) 153 return false; 154 155 switch (msr_index) { 156 case MSR_CORE_PERF_FIXED_CTR_CTRL: 157 case MSR_IA32_DS_AREA: 158 case MSR_IA32_PEBS_ENABLE: 159 *type = MSR_TYPE_CTRL; 160 return true; 161 162 case MSR_CORE_PERF_GLOBAL_CTRL: 163 case MSR_CORE_PERF_GLOBAL_STATUS: 164 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 165 *type = MSR_TYPE_GLOBAL; 166 return true; 167 168 default: 169 170 if ((msr_index >= MSR_CORE_PERF_FIXED_CTR0) && 171 (msr_index < MSR_CORE_PERF_FIXED_CTR0 + 172 intel_num_fixed_counters)) { 173 *index = msr_index - MSR_CORE_PERF_FIXED_CTR0; 174 *type = MSR_TYPE_COUNTER; 175 return true; 176 } 177 178 if ((msr_index >= MSR_P6_EVNTSEL0) && 179 (msr_index < MSR_P6_EVNTSEL0 + intel_num_arch_counters)) { 180 *index = msr_index - MSR_P6_EVNTSEL0; 181 *type = MSR_TYPE_ARCH_CTRL; 182 return true; 183 } 184 185 msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK; 186 if ((msr_index_pmc >= MSR_IA32_PERFCTR0) && 187 (msr_index_pmc < MSR_IA32_PERFCTR0 + 188 intel_num_arch_counters)) { 189 *type = MSR_TYPE_ARCH_COUNTER; 190 *index = msr_index_pmc - MSR_IA32_PERFCTR0; 191 return true; 192 } 193 return false; 194 } 195 } 196 197 static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type, 198 int index, bool is_read) 199 { 200 uint64_t *reg = NULL; 201 struct xen_pmu_intel_ctxt *ctxt; 202 uint64_t *fix_counters; 203 struct xen_pmu_cntr_pair *arch_cntr_pair; 204 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 205 uint8_t xenpmu_flags = get_xenpmu_flags(); 206 207 208 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) 209 return false; 210 211 ctxt = &xenpmu_data->pmu.c.intel; 212 213 switch (msr) { 214 case MSR_CORE_PERF_GLOBAL_OVF_CTRL: 215 reg = &ctxt->global_ovf_ctrl; 216 break; 217 case MSR_CORE_PERF_GLOBAL_STATUS: 218 reg = &ctxt->global_status; 219 break; 220 case MSR_CORE_PERF_GLOBAL_CTRL: 221 reg = &ctxt->global_ctrl; 222 break; 223 case MSR_CORE_PERF_FIXED_CTR_CTRL: 224 reg = &ctxt->fixed_ctrl; 225 break; 226 default: 227 switch (type) { 228 case MSR_TYPE_COUNTER: 229 fix_counters = field_offset(ctxt, fixed_counters); 230 reg = &fix_counters[index]; 231 break; 232 case MSR_TYPE_ARCH_COUNTER: 233 arch_cntr_pair = field_offset(ctxt, arch_counters); 234 reg = &arch_cntr_pair[index].counter; 235 break; 236 case MSR_TYPE_ARCH_CTRL: 237 arch_cntr_pair = field_offset(ctxt, arch_counters); 238 reg = &arch_cntr_pair[index].control; 239 break; 240 default: 241 return false; 242 } 243 } 244 245 if (reg) { 246 if (is_read) 247 *val = *reg; 248 else { 249 *reg = *val; 250 251 if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL) 252 ctxt->global_status &= (~(*val)); 253 } 254 return true; 255 } 256 257 return false; 258 } 259 260 static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read) 261 { 262 uint64_t *reg = NULL; 263 int i, off = 0; 264 struct xen_pmu_amd_ctxt *ctxt; 265 uint64_t *counter_regs, *ctrl_regs; 266 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 267 uint8_t xenpmu_flags = get_xenpmu_flags(); 268 269 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) 270 return false; 271 272 if (k7_counters_mirrored && 273 ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3))) 274 msr = get_fam15h_addr(msr); 275 276 ctxt = &xenpmu_data->pmu.c.amd; 277 for (i = 0; i < amd_num_counters; i++) { 278 if (msr == amd_ctrls_base + off) { 279 ctrl_regs = field_offset(ctxt, ctrls); 280 reg = &ctrl_regs[i]; 281 break; 282 } else if (msr == amd_counters_base + off) { 283 counter_regs = field_offset(ctxt, counters); 284 reg = &counter_regs[i]; 285 break; 286 } 287 off += amd_msr_step; 288 } 289 290 if (reg) { 291 if (is_read) 292 *val = *reg; 293 else 294 *reg = *val; 295 296 return true; 297 } 298 return false; 299 } 300 301 static bool pmu_msr_chk_emulated(unsigned int msr, uint64_t *val, bool is_read, 302 bool *emul) 303 { 304 int type, index = 0; 305 306 if (is_amd_pmu_msr(msr)) 307 *emul = xen_amd_pmu_emulate(msr, val, is_read); 308 else if (is_intel_pmu_msr(msr, &type, &index)) 309 *emul = xen_intel_pmu_emulate(msr, val, type, index, is_read); 310 else 311 return false; 312 313 return true; 314 } 315 316 bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err) 317 { 318 bool emulated; 319 320 if (!pmu_msr_chk_emulated(msr, val, true, &emulated)) 321 return false; 322 323 if (!emulated) { 324 *val = err ? native_read_msr_safe(msr, err) 325 : native_read_msr(msr); 326 } 327 328 return true; 329 } 330 331 bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err) 332 { 333 uint64_t val = ((uint64_t)high << 32) | low; 334 bool emulated; 335 336 if (!pmu_msr_chk_emulated(msr, &val, false, &emulated)) 337 return false; 338 339 if (!emulated) { 340 if (err) 341 *err = native_write_msr_safe(msr, low, high); 342 else 343 native_write_msr(msr, low, high); 344 } 345 346 return true; 347 } 348 349 static unsigned long long xen_amd_read_pmc(int counter) 350 { 351 struct xen_pmu_amd_ctxt *ctxt; 352 uint64_t *counter_regs; 353 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 354 uint8_t xenpmu_flags = get_xenpmu_flags(); 355 356 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { 357 uint32_t msr; 358 int err; 359 360 msr = amd_counters_base + (counter * amd_msr_step); 361 return native_read_msr_safe(msr, &err); 362 } 363 364 ctxt = &xenpmu_data->pmu.c.amd; 365 counter_regs = field_offset(ctxt, counters); 366 return counter_regs[counter]; 367 } 368 369 static unsigned long long xen_intel_read_pmc(int counter) 370 { 371 struct xen_pmu_intel_ctxt *ctxt; 372 uint64_t *fixed_counters; 373 struct xen_pmu_cntr_pair *arch_cntr_pair; 374 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 375 uint8_t xenpmu_flags = get_xenpmu_flags(); 376 377 if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) { 378 uint32_t msr; 379 int err; 380 381 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) 382 msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff); 383 else 384 msr = MSR_IA32_PERFCTR0 + counter; 385 386 return native_read_msr_safe(msr, &err); 387 } 388 389 ctxt = &xenpmu_data->pmu.c.intel; 390 if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) { 391 fixed_counters = field_offset(ctxt, fixed_counters); 392 return fixed_counters[counter & 0xffff]; 393 } 394 395 arch_cntr_pair = field_offset(ctxt, arch_counters); 396 return arch_cntr_pair[counter].counter; 397 } 398 399 unsigned long long xen_read_pmc(int counter) 400 { 401 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 402 return xen_amd_read_pmc(counter); 403 else 404 return xen_intel_read_pmc(counter); 405 } 406 407 int pmu_apic_update(uint32_t val) 408 { 409 int ret; 410 struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 411 412 if (!xenpmu_data) { 413 pr_warn_once("%s: pmudata not initialized\n", __func__); 414 return -EINVAL; 415 } 416 417 xenpmu_data->pmu.l.lapic_lvtpc = val; 418 419 if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING) 420 return 0; 421 422 ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL); 423 424 return ret; 425 } 426 427 /* perf callbacks */ 428 static unsigned int xen_guest_state(void) 429 { 430 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 431 unsigned int state = 0; 432 433 if (!xenpmu_data) { 434 pr_warn_once("%s: pmudata not initialized\n", __func__); 435 return state; 436 } 437 438 if (!xen_initial_domain() || (xenpmu_data->domain_id >= DOMID_SELF)) 439 return state; 440 441 state |= PERF_GUEST_ACTIVE; 442 443 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_PV) { 444 if (xenpmu_data->pmu.pmu_flags & PMU_SAMPLE_USER) 445 state |= PERF_GUEST_USER; 446 } else if (xenpmu_data->pmu.r.regs.cpl & 3) { 447 state |= PERF_GUEST_USER; 448 } 449 450 return state; 451 } 452 453 static unsigned long xen_get_guest_ip(void) 454 { 455 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 456 457 if (!xenpmu_data) { 458 pr_warn_once("%s: pmudata not initialized\n", __func__); 459 return 0; 460 } 461 462 return xenpmu_data->pmu.r.regs.ip; 463 } 464 465 static struct perf_guest_info_callbacks xen_guest_cbs = { 466 .state = xen_guest_state, 467 .get_ip = xen_get_guest_ip, 468 }; 469 470 /* Convert registers from Xen's format to Linux' */ 471 static void xen_convert_regs(const struct xen_pmu_regs *xen_regs, 472 struct pt_regs *regs, uint64_t pmu_flags) 473 { 474 regs->ip = xen_regs->ip; 475 regs->cs = xen_regs->cs; 476 regs->sp = xen_regs->sp; 477 478 if (pmu_flags & PMU_SAMPLE_PV) { 479 if (pmu_flags & PMU_SAMPLE_USER) 480 regs->cs |= 3; 481 else 482 regs->cs &= ~3; 483 } else { 484 if (xen_regs->cpl) 485 regs->cs |= 3; 486 else 487 regs->cs &= ~3; 488 } 489 } 490 491 irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id) 492 { 493 int err, ret = IRQ_NONE; 494 struct pt_regs regs = {0}; 495 const struct xen_pmu_data *xenpmu_data = get_xenpmu_data(); 496 uint8_t xenpmu_flags = get_xenpmu_flags(); 497 498 if (!xenpmu_data) { 499 pr_warn_once("%s: pmudata not initialized\n", __func__); 500 return ret; 501 } 502 503 this_cpu_ptr(&xenpmu_shared)->flags = 504 xenpmu_flags | XENPMU_IRQ_PROCESSING; 505 xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s, 506 xenpmu_data->pmu.pmu_flags); 507 if (x86_pmu.handle_irq(®s)) 508 ret = IRQ_HANDLED; 509 510 /* Write out cached context to HW */ 511 err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL); 512 this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags; 513 if (err) { 514 pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err); 515 return IRQ_NONE; 516 } 517 518 return ret; 519 } 520 521 bool is_xen_pmu; 522 523 void xen_pmu_init(int cpu) 524 { 525 int err; 526 struct xen_pmu_params xp; 527 unsigned long pfn; 528 struct xen_pmu_data *xenpmu_data; 529 530 BUILD_BUG_ON(sizeof(struct xen_pmu_data) > PAGE_SIZE); 531 532 if (xen_hvm_domain() || (cpu != 0 && !is_xen_pmu)) 533 return; 534 535 xenpmu_data = (struct xen_pmu_data *)get_zeroed_page(GFP_KERNEL); 536 if (!xenpmu_data) { 537 pr_err("VPMU init: No memory\n"); 538 return; 539 } 540 pfn = virt_to_pfn(xenpmu_data); 541 542 xp.val = pfn_to_mfn(pfn); 543 xp.vcpu = cpu; 544 xp.version.maj = XENPMU_VER_MAJ; 545 xp.version.min = XENPMU_VER_MIN; 546 err = HYPERVISOR_xenpmu_op(XENPMU_init, &xp); 547 if (err) 548 goto fail; 549 550 per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data; 551 per_cpu(xenpmu_shared, cpu).flags = 0; 552 553 if (!is_xen_pmu) { 554 is_xen_pmu = true; 555 perf_register_guest_info_callbacks(&xen_guest_cbs); 556 xen_pmu_arch_init(); 557 } 558 559 return; 560 561 fail: 562 if (err == -EOPNOTSUPP || err == -ENOSYS) 563 pr_info_once("VPMU disabled by hypervisor.\n"); 564 else 565 pr_info_once("Could not initialize VPMU for cpu %d, error %d\n", 566 cpu, err); 567 free_pages((unsigned long)xenpmu_data, 0); 568 } 569 570 void xen_pmu_finish(int cpu) 571 { 572 struct xen_pmu_params xp; 573 574 if (xen_hvm_domain()) 575 return; 576 577 xp.vcpu = cpu; 578 xp.version.maj = XENPMU_VER_MAJ; 579 xp.version.min = XENPMU_VER_MIN; 580 581 (void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp); 582 583 free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0); 584 per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL; 585 } 586