1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM PMU support for Intel CPUs 4 * 5 * Copyright 2011 Red Hat, Inc. and/or its affiliates. 6 * 7 * Authors: 8 * Avi Kivity <avi@redhat.com> 9 * Gleb Natapov <gleb@redhat.com> 10 */ 11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12 13 #include <linux/types.h> 14 #include <linux/kvm_host.h> 15 #include <linux/perf_event.h> 16 #include <asm/perf_event.h> 17 #include "x86.h" 18 #include "cpuid.h" 19 #include "lapic.h" 20 #include "nested.h" 21 #include "pmu.h" 22 23 #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) 24 25 static struct { 26 u8 eventsel; 27 u8 unit_mask; 28 } const intel_arch_events[] = { 29 [0] = { 0x3c, 0x00 }, 30 [1] = { 0xc0, 0x00 }, 31 [2] = { 0x3c, 0x01 }, 32 [3] = { 0x2e, 0x4f }, 33 [4] = { 0x2e, 0x41 }, 34 [5] = { 0xc4, 0x00 }, 35 [6] = { 0xc5, 0x00 }, 36 /* The above index must match CPUID 0x0A.EBX bit vector */ 37 [7] = { 0x00, 0x03 }, 38 }; 39 40 /* mapping between fixed pmc index and intel_arch_events array */ 41 static int fixed_pmc_events[] = {1, 0, 7}; 42 43 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) 44 { 45 struct kvm_pmc *pmc; 46 u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl; 47 int i; 48 49 pmu->fixed_ctr_ctrl = data; 50 for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { 51 u8 new_ctrl = fixed_ctrl_field(data, i); 52 u8 old_ctrl = fixed_ctrl_field(old_fixed_ctr_ctrl, i); 53 54 if (old_ctrl == new_ctrl) 55 continue; 56 57 pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); 58 59 __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); 60 kvm_pmu_request_counter_reprogram(pmc); 61 } 62 } 63 64 static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) 65 { 66 if (pmc_idx < INTEL_PMC_IDX_FIXED) { 67 return get_gp_pmc(pmu, MSR_P6_EVNTSEL0 + pmc_idx, 68 MSR_P6_EVNTSEL0); 69 } else { 70 u32 idx = pmc_idx - INTEL_PMC_IDX_FIXED; 71 72 return get_fixed_pmc(pmu, idx + MSR_CORE_PERF_FIXED_CTR0); 73 } 74 } 75 76 static bool intel_hw_event_available(struct kvm_pmc *pmc) 77 { 78 struct kvm_pmu *pmu = pmc_to_pmu(pmc); 79 u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; 80 u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; 81 int i; 82 83 for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) { 84 if (intel_arch_events[i].eventsel != event_select || 85 intel_arch_events[i].unit_mask != unit_mask) 86 continue; 87 88 /* disable event that reported as not present by cpuid */ 89 if ((i < 7) && !(pmu->available_event_types & (1 << i))) 90 return false; 91 92 break; 93 } 94 95 return true; 96 } 97 98 static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) 99 { 100 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 101 bool fixed = idx & (1u << 30); 102 103 idx &= ~(3u << 30); 104 105 return fixed ? idx < pmu->nr_arch_fixed_counters 106 : idx < pmu->nr_arch_gp_counters; 107 } 108 109 static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, 110 unsigned int idx, u64 *mask) 111 { 112 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 113 bool fixed = idx & (1u << 30); 114 struct kvm_pmc *counters; 115 unsigned int num_counters; 116 117 idx &= ~(3u << 30); 118 if (fixed) { 119 counters = pmu->fixed_counters; 120 num_counters = pmu->nr_arch_fixed_counters; 121 } else { 122 counters = pmu->gp_counters; 123 num_counters = pmu->nr_arch_gp_counters; 124 } 125 if (idx >= num_counters) 126 return NULL; 127 *mask &= pmu->counter_bitmask[fixed ? KVM_PMC_FIXED : KVM_PMC_GP]; 128 return &counters[array_index_nospec(idx, num_counters)]; 129 } 130 131 static inline u64 vcpu_get_perf_capabilities(struct kvm_vcpu *vcpu) 132 { 133 if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) 134 return 0; 135 136 return vcpu->arch.perf_capabilities; 137 } 138 139 static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) 140 { 141 return (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_FW_WRITES) != 0; 142 } 143 144 static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) 145 { 146 if (!fw_writes_is_enabled(pmu_to_vcpu(pmu))) 147 return NULL; 148 149 return get_gp_pmc(pmu, msr, MSR_IA32_PMC0); 150 } 151 152 static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index) 153 { 154 struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu); 155 bool ret = false; 156 157 if (!intel_pmu_lbr_is_enabled(vcpu)) 158 return ret; 159 160 ret = (index == MSR_LBR_SELECT) || (index == MSR_LBR_TOS) || 161 (index >= records->from && index < records->from + records->nr) || 162 (index >= records->to && index < records->to + records->nr); 163 164 if (!ret && records->info) 165 ret = (index >= records->info && index < records->info + records->nr); 166 167 return ret; 168 } 169 170 static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) 171 { 172 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 173 u64 perf_capabilities; 174 int ret; 175 176 switch (msr) { 177 case MSR_CORE_PERF_FIXED_CTR_CTRL: 178 return kvm_pmu_has_perf_global_ctrl(pmu); 179 case MSR_IA32_PEBS_ENABLE: 180 ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT; 181 break; 182 case MSR_IA32_DS_AREA: 183 ret = guest_cpuid_has(vcpu, X86_FEATURE_DS); 184 break; 185 case MSR_PEBS_DATA_CFG: 186 perf_capabilities = vcpu_get_perf_capabilities(vcpu); 187 ret = (perf_capabilities & PERF_CAP_PEBS_BASELINE) && 188 ((perf_capabilities & PERF_CAP_PEBS_FORMAT) > 3); 189 break; 190 default: 191 ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || 192 get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || 193 get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr) || 194 intel_pmu_is_valid_lbr_msr(vcpu, msr); 195 break; 196 } 197 198 return ret; 199 } 200 201 static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) 202 { 203 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 204 struct kvm_pmc *pmc; 205 206 pmc = get_fixed_pmc(pmu, msr); 207 pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0); 208 pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0); 209 210 return pmc; 211 } 212 213 static inline void intel_pmu_release_guest_lbr_event(struct kvm_vcpu *vcpu) 214 { 215 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 216 217 if (lbr_desc->event) { 218 perf_event_release_kernel(lbr_desc->event); 219 lbr_desc->event = NULL; 220 vcpu_to_pmu(vcpu)->event_count--; 221 } 222 } 223 224 int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu) 225 { 226 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 227 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 228 struct perf_event *event; 229 230 /* 231 * The perf_event_attr is constructed in the minimum efficient way: 232 * - set 'pinned = true' to make it task pinned so that if another 233 * cpu pinned event reclaims LBR, the event->oncpu will be set to -1; 234 * - set '.exclude_host = true' to record guest branches behavior; 235 * 236 * - set '.config = INTEL_FIXED_VLBR_EVENT' to indicates host perf 237 * schedule the event without a real HW counter but a fake one; 238 * check is_guest_lbr_event() and __intel_get_event_constraints(); 239 * 240 * - set 'sample_type = PERF_SAMPLE_BRANCH_STACK' and 241 * 'branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK | 242 * PERF_SAMPLE_BRANCH_USER' to configure it as a LBR callstack 243 * event, which helps KVM to save/restore guest LBR records 244 * during host context switches and reduces quite a lot overhead, 245 * check branch_user_callstack() and intel_pmu_lbr_sched_task(); 246 */ 247 struct perf_event_attr attr = { 248 .type = PERF_TYPE_RAW, 249 .size = sizeof(attr), 250 .config = INTEL_FIXED_VLBR_EVENT, 251 .sample_type = PERF_SAMPLE_BRANCH_STACK, 252 .pinned = true, 253 .exclude_host = true, 254 .branch_sample_type = PERF_SAMPLE_BRANCH_CALL_STACK | 255 PERF_SAMPLE_BRANCH_USER, 256 }; 257 258 if (unlikely(lbr_desc->event)) { 259 __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); 260 return 0; 261 } 262 263 event = perf_event_create_kernel_counter(&attr, -1, 264 current, NULL, NULL); 265 if (IS_ERR(event)) { 266 pr_debug_ratelimited("%s: failed %ld\n", 267 __func__, PTR_ERR(event)); 268 return PTR_ERR(event); 269 } 270 lbr_desc->event = event; 271 pmu->event_count++; 272 __set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); 273 return 0; 274 } 275 276 /* 277 * It's safe to access LBR msrs from guest when they have not 278 * been passthrough since the host would help restore or reset 279 * the LBR msrs records when the guest LBR event is scheduled in. 280 */ 281 static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu, 282 struct msr_data *msr_info, bool read) 283 { 284 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 285 u32 index = msr_info->index; 286 287 if (!intel_pmu_is_valid_lbr_msr(vcpu, index)) 288 return false; 289 290 if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0) 291 goto dummy; 292 293 /* 294 * Disable irq to ensure the LBR feature doesn't get reclaimed by the 295 * host at the time the value is read from the msr, and this avoids the 296 * host LBR value to be leaked to the guest. If LBR has been reclaimed, 297 * return 0 on guest reads. 298 */ 299 local_irq_disable(); 300 if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) { 301 if (read) 302 rdmsrl(index, msr_info->data); 303 else 304 wrmsrl(index, msr_info->data); 305 __set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use); 306 local_irq_enable(); 307 return true; 308 } 309 clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use); 310 local_irq_enable(); 311 312 dummy: 313 if (read) 314 msr_info->data = 0; 315 return true; 316 } 317 318 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 319 { 320 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 321 struct kvm_pmc *pmc; 322 u32 msr = msr_info->index; 323 324 switch (msr) { 325 case MSR_CORE_PERF_FIXED_CTR_CTRL: 326 msr_info->data = pmu->fixed_ctr_ctrl; 327 break; 328 case MSR_IA32_PEBS_ENABLE: 329 msr_info->data = pmu->pebs_enable; 330 break; 331 case MSR_IA32_DS_AREA: 332 msr_info->data = pmu->ds_area; 333 break; 334 case MSR_PEBS_DATA_CFG: 335 msr_info->data = pmu->pebs_data_cfg; 336 break; 337 default: 338 if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || 339 (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { 340 u64 val = pmc_read_counter(pmc); 341 msr_info->data = 342 val & pmu->counter_bitmask[KVM_PMC_GP]; 343 break; 344 } else if ((pmc = get_fixed_pmc(pmu, msr))) { 345 u64 val = pmc_read_counter(pmc); 346 msr_info->data = 347 val & pmu->counter_bitmask[KVM_PMC_FIXED]; 348 break; 349 } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { 350 msr_info->data = pmc->eventsel; 351 break; 352 } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) { 353 break; 354 } 355 return 1; 356 } 357 358 return 0; 359 } 360 361 static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 362 { 363 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 364 struct kvm_pmc *pmc; 365 u32 msr = msr_info->index; 366 u64 data = msr_info->data; 367 u64 reserved_bits, diff; 368 369 switch (msr) { 370 case MSR_CORE_PERF_FIXED_CTR_CTRL: 371 if (data & pmu->fixed_ctr_ctrl_mask) 372 return 1; 373 374 if (pmu->fixed_ctr_ctrl != data) 375 reprogram_fixed_counters(pmu, data); 376 break; 377 case MSR_IA32_PEBS_ENABLE: 378 if (data & pmu->pebs_enable_mask) 379 return 1; 380 381 if (pmu->pebs_enable != data) { 382 diff = pmu->pebs_enable ^ data; 383 pmu->pebs_enable = data; 384 reprogram_counters(pmu, diff); 385 } 386 break; 387 case MSR_IA32_DS_AREA: 388 if (is_noncanonical_address(data, vcpu)) 389 return 1; 390 391 pmu->ds_area = data; 392 break; 393 case MSR_PEBS_DATA_CFG: 394 if (data & pmu->pebs_data_cfg_mask) 395 return 1; 396 397 pmu->pebs_data_cfg = data; 398 break; 399 default: 400 if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || 401 (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { 402 if ((msr & MSR_PMC_FULL_WIDTH_BIT) && 403 (data & ~pmu->counter_bitmask[KVM_PMC_GP])) 404 return 1; 405 406 if (!msr_info->host_initiated && 407 !(msr & MSR_PMC_FULL_WIDTH_BIT)) 408 data = (s64)(s32)data; 409 pmc->counter += data - pmc_read_counter(pmc); 410 pmc_update_sample_period(pmc); 411 break; 412 } else if ((pmc = get_fixed_pmc(pmu, msr))) { 413 pmc->counter += data - pmc_read_counter(pmc); 414 pmc_update_sample_period(pmc); 415 break; 416 } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { 417 reserved_bits = pmu->reserved_bits; 418 if ((pmc->idx == 2) && 419 (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) 420 reserved_bits ^= HSW_IN_TX_CHECKPOINTED; 421 if (data & reserved_bits) 422 return 1; 423 424 if (data != pmc->eventsel) { 425 pmc->eventsel = data; 426 kvm_pmu_request_counter_reprogram(pmc); 427 } 428 break; 429 } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { 430 break; 431 } 432 /* Not a known PMU MSR. */ 433 return 1; 434 } 435 436 return 0; 437 } 438 439 static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) 440 { 441 size_t size = ARRAY_SIZE(fixed_pmc_events); 442 struct kvm_pmc *pmc; 443 u32 event; 444 int i; 445 446 for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { 447 pmc = &pmu->fixed_counters[i]; 448 event = fixed_pmc_events[array_index_nospec(i, size)]; 449 pmc->eventsel = (intel_arch_events[event].unit_mask << 8) | 450 intel_arch_events[event].eventsel; 451 } 452 } 453 454 static void intel_pmu_refresh(struct kvm_vcpu *vcpu) 455 { 456 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 457 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 458 struct kvm_cpuid_entry2 *entry; 459 union cpuid10_eax eax; 460 union cpuid10_edx edx; 461 u64 perf_capabilities; 462 u64 counter_mask; 463 int i; 464 465 pmu->nr_arch_gp_counters = 0; 466 pmu->nr_arch_fixed_counters = 0; 467 pmu->counter_bitmask[KVM_PMC_GP] = 0; 468 pmu->counter_bitmask[KVM_PMC_FIXED] = 0; 469 pmu->version = 0; 470 pmu->reserved_bits = 0xffffffff00200000ull; 471 pmu->raw_event_mask = X86_RAW_EVENT_MASK; 472 pmu->global_ctrl_mask = ~0ull; 473 pmu->global_status_mask = ~0ull; 474 pmu->fixed_ctr_ctrl_mask = ~0ull; 475 pmu->pebs_enable_mask = ~0ull; 476 pmu->pebs_data_cfg_mask = ~0ull; 477 478 memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); 479 480 /* 481 * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, 482 * and PMU refresh is disallowed after the vCPU has run, i.e. this code 483 * should never be reached while KVM is passing through MSRs. 484 */ 485 if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) 486 return; 487 488 entry = kvm_find_cpuid_entry(vcpu, 0xa); 489 if (!entry || !vcpu->kvm->arch.enable_pmu) 490 return; 491 eax.full = entry->eax; 492 edx.full = entry->edx; 493 494 pmu->version = eax.split.version_id; 495 if (!pmu->version) 496 return; 497 498 pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, 499 kvm_pmu_cap.num_counters_gp); 500 eax.split.bit_width = min_t(int, eax.split.bit_width, 501 kvm_pmu_cap.bit_width_gp); 502 pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1; 503 eax.split.mask_length = min_t(int, eax.split.mask_length, 504 kvm_pmu_cap.events_mask_len); 505 pmu->available_event_types = ~entry->ebx & 506 ((1ull << eax.split.mask_length) - 1); 507 508 if (pmu->version == 1) { 509 pmu->nr_arch_fixed_counters = 0; 510 } else { 511 pmu->nr_arch_fixed_counters = 512 min3(ARRAY_SIZE(fixed_pmc_events), 513 (size_t) edx.split.num_counters_fixed, 514 (size_t)kvm_pmu_cap.num_counters_fixed); 515 edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed, 516 kvm_pmu_cap.bit_width_fixed); 517 pmu->counter_bitmask[KVM_PMC_FIXED] = 518 ((u64)1 << edx.split.bit_width_fixed) - 1; 519 setup_fixed_pmc_eventsel(pmu); 520 } 521 522 for (i = 0; i < pmu->nr_arch_fixed_counters; i++) 523 pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4)); 524 counter_mask = ~(((1ull << pmu->nr_arch_gp_counters) - 1) | 525 (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED)); 526 pmu->global_ctrl_mask = counter_mask; 527 528 /* 529 * GLOBAL_STATUS and GLOBAL_OVF_CONTROL (a.k.a. GLOBAL_STATUS_RESET) 530 * share reserved bit definitions. The kernel just happens to use 531 * OVF_CTRL for the names. 532 */ 533 pmu->global_status_mask = pmu->global_ctrl_mask 534 & ~(MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF | 535 MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD); 536 if (vmx_pt_mode_is_host_guest()) 537 pmu->global_status_mask &= 538 ~MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI; 539 540 entry = kvm_find_cpuid_entry_index(vcpu, 7, 0); 541 if (entry && 542 (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && 543 (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) { 544 pmu->reserved_bits ^= HSW_IN_TX; 545 pmu->raw_event_mask |= (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); 546 } 547 548 bitmap_set(pmu->all_valid_pmc_idx, 549 0, pmu->nr_arch_gp_counters); 550 bitmap_set(pmu->all_valid_pmc_idx, 551 INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters); 552 553 perf_capabilities = vcpu_get_perf_capabilities(vcpu); 554 if (cpuid_model_is_consistent(vcpu) && 555 (perf_capabilities & PMU_CAP_LBR_FMT)) 556 x86_perf_get_lbr(&lbr_desc->records); 557 else 558 lbr_desc->records.nr = 0; 559 560 if (lbr_desc->records.nr) 561 bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); 562 563 if (perf_capabilities & PERF_CAP_PEBS_FORMAT) { 564 if (perf_capabilities & PERF_CAP_PEBS_BASELINE) { 565 pmu->pebs_enable_mask = counter_mask; 566 pmu->reserved_bits &= ~ICL_EVENTSEL_ADAPTIVE; 567 for (i = 0; i < pmu->nr_arch_fixed_counters; i++) { 568 pmu->fixed_ctr_ctrl_mask &= 569 ~(1ULL << (INTEL_PMC_IDX_FIXED + i * 4)); 570 } 571 pmu->pebs_data_cfg_mask = ~0xff00000full; 572 } else { 573 pmu->pebs_enable_mask = 574 ~((1ull << pmu->nr_arch_gp_counters) - 1); 575 } 576 } 577 } 578 579 static void intel_pmu_init(struct kvm_vcpu *vcpu) 580 { 581 int i; 582 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 583 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 584 585 for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { 586 pmu->gp_counters[i].type = KVM_PMC_GP; 587 pmu->gp_counters[i].vcpu = vcpu; 588 pmu->gp_counters[i].idx = i; 589 pmu->gp_counters[i].current_config = 0; 590 } 591 592 for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { 593 pmu->fixed_counters[i].type = KVM_PMC_FIXED; 594 pmu->fixed_counters[i].vcpu = vcpu; 595 pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; 596 pmu->fixed_counters[i].current_config = 0; 597 } 598 599 lbr_desc->records.nr = 0; 600 lbr_desc->event = NULL; 601 lbr_desc->msr_passthrough = false; 602 } 603 604 static void intel_pmu_reset(struct kvm_vcpu *vcpu) 605 { 606 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 607 struct kvm_pmc *pmc = NULL; 608 int i; 609 610 for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) { 611 pmc = &pmu->gp_counters[i]; 612 613 pmc_stop_counter(pmc); 614 pmc->counter = pmc->prev_counter = pmc->eventsel = 0; 615 } 616 617 for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { 618 pmc = &pmu->fixed_counters[i]; 619 620 pmc_stop_counter(pmc); 621 pmc->counter = pmc->prev_counter = 0; 622 } 623 624 pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0; 625 626 intel_pmu_release_guest_lbr_event(vcpu); 627 } 628 629 /* 630 * Emulate LBR_On_PMI behavior for 1 < pmu.version < 4. 631 * 632 * If Freeze_LBR_On_PMI = 1, the LBR is frozen on PMI and 633 * the KVM emulates to clear the LBR bit (bit 0) in IA32_DEBUGCTL. 634 * 635 * Guest needs to re-enable LBR to resume branches recording. 636 */ 637 static void intel_pmu_legacy_freezing_lbrs_on_pmi(struct kvm_vcpu *vcpu) 638 { 639 u64 data = vmcs_read64(GUEST_IA32_DEBUGCTL); 640 641 if (data & DEBUGCTLMSR_FREEZE_LBRS_ON_PMI) { 642 data &= ~DEBUGCTLMSR_LBR; 643 vmcs_write64(GUEST_IA32_DEBUGCTL, data); 644 } 645 } 646 647 static void intel_pmu_deliver_pmi(struct kvm_vcpu *vcpu) 648 { 649 u8 version = vcpu_to_pmu(vcpu)->version; 650 651 if (!intel_pmu_lbr_is_enabled(vcpu)) 652 return; 653 654 if (version > 1 && version < 4) 655 intel_pmu_legacy_freezing_lbrs_on_pmi(vcpu); 656 } 657 658 static void vmx_update_intercept_for_lbr_msrs(struct kvm_vcpu *vcpu, bool set) 659 { 660 struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu); 661 int i; 662 663 for (i = 0; i < lbr->nr; i++) { 664 vmx_set_intercept_for_msr(vcpu, lbr->from + i, MSR_TYPE_RW, set); 665 vmx_set_intercept_for_msr(vcpu, lbr->to + i, MSR_TYPE_RW, set); 666 if (lbr->info) 667 vmx_set_intercept_for_msr(vcpu, lbr->info + i, MSR_TYPE_RW, set); 668 } 669 670 vmx_set_intercept_for_msr(vcpu, MSR_LBR_SELECT, MSR_TYPE_RW, set); 671 vmx_set_intercept_for_msr(vcpu, MSR_LBR_TOS, MSR_TYPE_RW, set); 672 } 673 674 static inline void vmx_disable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu) 675 { 676 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 677 678 if (!lbr_desc->msr_passthrough) 679 return; 680 681 vmx_update_intercept_for_lbr_msrs(vcpu, true); 682 lbr_desc->msr_passthrough = false; 683 } 684 685 static inline void vmx_enable_lbr_msrs_passthrough(struct kvm_vcpu *vcpu) 686 { 687 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 688 689 if (lbr_desc->msr_passthrough) 690 return; 691 692 vmx_update_intercept_for_lbr_msrs(vcpu, false); 693 lbr_desc->msr_passthrough = true; 694 } 695 696 /* 697 * Higher priority host perf events (e.g. cpu pinned) could reclaim the 698 * pmu resources (e.g. LBR) that were assigned to the guest. This is 699 * usually done via ipi calls (more details in perf_install_in_context). 700 * 701 * Before entering the non-root mode (with irq disabled here), double 702 * confirm that the pmu features enabled to the guest are not reclaimed 703 * by higher priority host events. Otherwise, disallow vcpu's access to 704 * the reclaimed features. 705 */ 706 void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu) 707 { 708 struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); 709 struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); 710 711 if (!lbr_desc->event) { 712 vmx_disable_lbr_msrs_passthrough(vcpu); 713 if (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR) 714 goto warn; 715 if (test_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use)) 716 goto warn; 717 return; 718 } 719 720 if (lbr_desc->event->state < PERF_EVENT_STATE_ACTIVE) { 721 vmx_disable_lbr_msrs_passthrough(vcpu); 722 __clear_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use); 723 goto warn; 724 } else 725 vmx_enable_lbr_msrs_passthrough(vcpu); 726 727 return; 728 729 warn: 730 pr_warn_ratelimited("vcpu-%d: fail to passthrough LBR.\n", vcpu->vcpu_id); 731 } 732 733 static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) 734 { 735 if (!(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) 736 intel_pmu_release_guest_lbr_event(vcpu); 737 } 738 739 void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) 740 { 741 struct kvm_pmc *pmc = NULL; 742 int bit, hw_idx; 743 744 for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, 745 X86_PMC_IDX_MAX) { 746 pmc = intel_pmc_idx_to_pmc(pmu, bit); 747 748 if (!pmc || !pmc_speculative_in_use(pmc) || 749 !pmc_is_globally_enabled(pmc) || !pmc->perf_event) 750 continue; 751 752 /* 753 * A negative index indicates the event isn't mapped to a 754 * physical counter in the host, e.g. due to contention. 755 */ 756 hw_idx = pmc->perf_event->hw.idx; 757 if (hw_idx != pmc->idx && hw_idx > -1) 758 pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx); 759 } 760 } 761 762 struct kvm_pmu_ops intel_pmu_ops __initdata = { 763 .hw_event_available = intel_hw_event_available, 764 .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, 765 .rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc, 766 .msr_idx_to_pmc = intel_msr_idx_to_pmc, 767 .is_valid_rdpmc_ecx = intel_is_valid_rdpmc_ecx, 768 .is_valid_msr = intel_is_valid_msr, 769 .get_msr = intel_pmu_get_msr, 770 .set_msr = intel_pmu_set_msr, 771 .refresh = intel_pmu_refresh, 772 .init = intel_pmu_init, 773 .reset = intel_pmu_reset, 774 .deliver_pmi = intel_pmu_deliver_pmi, 775 .cleanup = intel_pmu_cleanup, 776 .EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT, 777 .MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC, 778 .MIN_NR_GP_COUNTERS = 1, 779 }; 780