1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) 19 20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 21 22 static LIST_HEAD(arm_pmus); 23 static DEFINE_MUTEX(arm_pmus_lock); 24 25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); 26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); 27 28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) 29 { 30 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); 31 } 32 33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) 34 { 35 return &vcpu->arch.pmu.pmc[cnt_idx]; 36 } 37 38 static u32 __kvm_pmu_event_mask(unsigned int pmuver) 39 { 40 switch (pmuver) { 41 case ID_AA64DFR0_EL1_PMUVer_IMP: 42 return GENMASK(9, 0); 43 case ID_AA64DFR0_EL1_PMUVer_V3P1: 44 case ID_AA64DFR0_EL1_PMUVer_V3P4: 45 case ID_AA64DFR0_EL1_PMUVer_V3P5: 46 case ID_AA64DFR0_EL1_PMUVer_V3P7: 47 return GENMASK(15, 0); 48 default: /* Shouldn't be here, just for sanity */ 49 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 50 return 0; 51 } 52 } 53 54 static u32 kvm_pmu_event_mask(struct kvm *kvm) 55 { 56 u64 dfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1); 57 u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0); 58 59 return __kvm_pmu_event_mask(pmuver); 60 } 61 62 u64 kvm_pmu_evtyper_mask(struct kvm *kvm) 63 { 64 u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 | 65 kvm_pmu_event_mask(kvm); 66 67 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL2, IMP)) 68 mask |= ARMV8_PMU_INCLUDE_EL2; 69 70 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP)) 71 mask |= ARMV8_PMU_EXCLUDE_NS_EL0 | 72 ARMV8_PMU_EXCLUDE_NS_EL1 | 73 ARMV8_PMU_EXCLUDE_EL3; 74 75 return mask; 76 } 77 78 /** 79 * kvm_pmc_is_64bit - determine if counter is 64bit 80 * @pmc: counter context 81 */ 82 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) 83 { 84 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 85 86 return (pmc->idx == ARMV8_PMU_CYCLE_IDX || 87 kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)); 88 } 89 90 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) 91 { 92 u64 val = kvm_vcpu_read_pmcr(kvm_pmc_to_vcpu(pmc)); 93 94 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || 95 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); 96 } 97 98 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) 99 { 100 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && 101 !kvm_pmc_has_64bit_overflow(pmc)); 102 } 103 104 static u32 counter_index_to_reg(u64 idx) 105 { 106 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; 107 } 108 109 static u32 counter_index_to_evtreg(u64 idx) 110 { 111 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; 112 } 113 114 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) 115 { 116 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 117 u64 counter, reg, enabled, running; 118 119 reg = counter_index_to_reg(pmc->idx); 120 counter = __vcpu_sys_reg(vcpu, reg); 121 122 /* 123 * The real counter value is equal to the value of counter register plus 124 * the value perf event counts. 125 */ 126 if (pmc->perf_event) 127 counter += perf_event_read_value(pmc->perf_event, &enabled, 128 &running); 129 130 if (!kvm_pmc_is_64bit(pmc)) 131 counter = lower_32_bits(counter); 132 133 return counter; 134 } 135 136 /** 137 * kvm_pmu_get_counter_value - get PMU counter value 138 * @vcpu: The vcpu pointer 139 * @select_idx: The counter index 140 */ 141 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 142 { 143 if (!kvm_vcpu_has_pmu(vcpu)) 144 return 0; 145 146 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); 147 } 148 149 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) 150 { 151 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 152 u64 reg; 153 154 kvm_pmu_release_perf_event(pmc); 155 156 reg = counter_index_to_reg(pmc->idx); 157 158 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && 159 !force) { 160 /* 161 * Even with PMUv3p5, AArch32 cannot write to the top 162 * 32bit of the counters. The only possible course of 163 * action is to use PMCR.P, which will reset them to 164 * 0 (the only use of the 'force' parameter). 165 */ 166 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); 167 val |= lower_32_bits(val); 168 } 169 170 __vcpu_sys_reg(vcpu, reg) = val; 171 172 /* Recreate the perf event to reflect the updated sample_period */ 173 kvm_pmu_create_perf_event(pmc); 174 } 175 176 /** 177 * kvm_pmu_set_counter_value - set PMU counter value 178 * @vcpu: The vcpu pointer 179 * @select_idx: The counter index 180 * @val: The counter value 181 */ 182 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 183 { 184 if (!kvm_vcpu_has_pmu(vcpu)) 185 return; 186 187 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); 188 } 189 190 /** 191 * kvm_pmu_release_perf_event - remove the perf event 192 * @pmc: The PMU counter pointer 193 */ 194 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 195 { 196 if (pmc->perf_event) { 197 perf_event_disable(pmc->perf_event); 198 perf_event_release_kernel(pmc->perf_event); 199 pmc->perf_event = NULL; 200 } 201 } 202 203 /** 204 * kvm_pmu_stop_counter - stop PMU counter 205 * @pmc: The PMU counter pointer 206 * 207 * If this counter has been configured to monitor some event, release it here. 208 */ 209 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) 210 { 211 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 212 u64 reg, val; 213 214 if (!pmc->perf_event) 215 return; 216 217 val = kvm_pmu_get_pmc_value(pmc); 218 219 reg = counter_index_to_reg(pmc->idx); 220 221 __vcpu_sys_reg(vcpu, reg) = val; 222 223 kvm_pmu_release_perf_event(pmc); 224 } 225 226 /** 227 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 228 * @vcpu: The vcpu pointer 229 * 230 */ 231 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 232 { 233 int i; 234 struct kvm_pmu *pmu = &vcpu->arch.pmu; 235 236 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) 237 pmu->pmc[i].idx = i; 238 } 239 240 /** 241 * kvm_pmu_vcpu_reset - reset pmu state for cpu 242 * @vcpu: The vcpu pointer 243 * 244 */ 245 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 246 { 247 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 248 int i; 249 250 for_each_set_bit(i, &mask, 32) 251 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); 252 } 253 254 /** 255 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 256 * @vcpu: The vcpu pointer 257 * 258 */ 259 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 260 { 261 int i; 262 263 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) 264 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); 265 irq_work_sync(&vcpu->arch.pmu.overflow_work); 266 } 267 268 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 269 { 270 u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu)); 271 272 if (val == 0) 273 return BIT(ARMV8_PMU_CYCLE_IDX); 274 else 275 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 276 } 277 278 /** 279 * kvm_pmu_enable_counter_mask - enable selected PMU counters 280 * @vcpu: The vcpu pointer 281 * @val: the value guest writes to PMCNTENSET register 282 * 283 * Call perf_event_enable to start counting the perf event 284 */ 285 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 286 { 287 int i; 288 if (!kvm_vcpu_has_pmu(vcpu)) 289 return; 290 291 if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val) 292 return; 293 294 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) { 295 struct kvm_pmc *pmc; 296 297 if (!(val & BIT(i))) 298 continue; 299 300 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 301 302 if (!pmc->perf_event) { 303 kvm_pmu_create_perf_event(pmc); 304 } else { 305 perf_event_enable(pmc->perf_event); 306 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 307 kvm_debug("fail to enable perf event\n"); 308 } 309 } 310 } 311 312 /** 313 * kvm_pmu_disable_counter_mask - disable selected PMU counters 314 * @vcpu: The vcpu pointer 315 * @val: the value guest writes to PMCNTENCLR register 316 * 317 * Call perf_event_disable to stop counting the perf event 318 */ 319 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 320 { 321 int i; 322 323 if (!kvm_vcpu_has_pmu(vcpu) || !val) 324 return; 325 326 for (i = 0; i < KVM_ARMV8_PMU_MAX_COUNTERS; i++) { 327 struct kvm_pmc *pmc; 328 329 if (!(val & BIT(i))) 330 continue; 331 332 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 333 334 if (pmc->perf_event) 335 perf_event_disable(pmc->perf_event); 336 } 337 } 338 339 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 340 { 341 u64 reg = 0; 342 343 if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { 344 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 345 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 346 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 347 } 348 349 return reg; 350 } 351 352 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 353 { 354 struct kvm_pmu *pmu = &vcpu->arch.pmu; 355 bool overflow; 356 357 if (!kvm_vcpu_has_pmu(vcpu)) 358 return; 359 360 overflow = !!kvm_pmu_overflow_status(vcpu); 361 if (pmu->irq_level == overflow) 362 return; 363 364 pmu->irq_level = overflow; 365 366 if (likely(irqchip_in_kernel(vcpu->kvm))) { 367 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, 368 pmu->irq_num, overflow, pmu); 369 WARN_ON(ret); 370 } 371 } 372 373 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 374 { 375 struct kvm_pmu *pmu = &vcpu->arch.pmu; 376 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 377 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 378 379 if (likely(irqchip_in_kernel(vcpu->kvm))) 380 return false; 381 382 return pmu->irq_level != run_level; 383 } 384 385 /* 386 * Reflect the PMU overflow interrupt output level into the kvm_run structure 387 */ 388 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 389 { 390 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 391 392 /* Populate the timer bitmap for user space */ 393 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 394 if (vcpu->arch.pmu.irq_level) 395 regs->device_irq_level |= KVM_ARM_DEV_PMU; 396 } 397 398 /** 399 * kvm_pmu_flush_hwstate - flush pmu state to cpu 400 * @vcpu: The vcpu pointer 401 * 402 * Check if the PMU has overflowed while we were running in the host, and inject 403 * an interrupt if that was the case. 404 */ 405 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 406 { 407 kvm_pmu_update_state(vcpu); 408 } 409 410 /** 411 * kvm_pmu_sync_hwstate - sync pmu state from cpu 412 * @vcpu: The vcpu pointer 413 * 414 * Check if the PMU has overflowed while we were running in the guest, and 415 * inject an interrupt if that was the case. 416 */ 417 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 418 { 419 kvm_pmu_update_state(vcpu); 420 } 421 422 /* 423 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 424 * to the event. 425 * This is why we need a callback to do it once outside of the NMI context. 426 */ 427 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 428 { 429 struct kvm_vcpu *vcpu; 430 431 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); 432 kvm_vcpu_kick(vcpu); 433 } 434 435 /* 436 * Perform an increment on any of the counters described in @mask, 437 * generating the overflow if required, and propagate it as a chained 438 * event if possible. 439 */ 440 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, 441 unsigned long mask, u32 event) 442 { 443 int i; 444 445 if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) 446 return; 447 448 /* Weed out disabled counters */ 449 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 450 451 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { 452 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 453 u64 type, reg; 454 455 /* Filter on event type */ 456 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); 457 type &= kvm_pmu_event_mask(vcpu->kvm); 458 if (type != event) 459 continue; 460 461 /* Increment this counter */ 462 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; 463 if (!kvm_pmc_is_64bit(pmc)) 464 reg = lower_32_bits(reg); 465 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; 466 467 /* No overflow? move on */ 468 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) 469 continue; 470 471 /* Mark overflow */ 472 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 473 474 if (kvm_pmu_counter_can_chain(pmc)) 475 kvm_pmu_counter_increment(vcpu, BIT(i + 1), 476 ARMV8_PMUV3_PERFCTR_CHAIN); 477 } 478 } 479 480 /* Compute the sample period for a given counter value */ 481 static u64 compute_period(struct kvm_pmc *pmc, u64 counter) 482 { 483 u64 val; 484 485 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) 486 val = (-counter) & GENMASK(63, 0); 487 else 488 val = (-counter) & GENMASK(31, 0); 489 490 return val; 491 } 492 493 /* 494 * When the perf event overflows, set the overflow status and inform the vcpu. 495 */ 496 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 497 struct perf_sample_data *data, 498 struct pt_regs *regs) 499 { 500 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 501 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 502 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 503 int idx = pmc->idx; 504 u64 period; 505 506 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 507 508 /* 509 * Reset the sample period to the architectural limit, 510 * i.e. the point where the counter overflows. 511 */ 512 period = compute_period(pmc, local64_read(&perf_event->count)); 513 514 local64_set(&perf_event->hw.period_left, 0); 515 perf_event->attr.sample_period = period; 516 perf_event->hw.sample_period = period; 517 518 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 519 520 if (kvm_pmu_counter_can_chain(pmc)) 521 kvm_pmu_counter_increment(vcpu, BIT(idx + 1), 522 ARMV8_PMUV3_PERFCTR_CHAIN); 523 524 if (kvm_pmu_overflow_status(vcpu)) { 525 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 526 527 if (!in_nmi()) 528 kvm_vcpu_kick(vcpu); 529 else 530 irq_work_queue(&vcpu->arch.pmu.overflow_work); 531 } 532 533 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 534 } 535 536 /** 537 * kvm_pmu_software_increment - do software increment 538 * @vcpu: The vcpu pointer 539 * @val: the value guest writes to PMSWINC register 540 */ 541 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 542 { 543 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); 544 } 545 546 /** 547 * kvm_pmu_handle_pmcr - handle PMCR register 548 * @vcpu: The vcpu pointer 549 * @val: the value guest writes to PMCR register 550 */ 551 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 552 { 553 int i; 554 555 if (!kvm_vcpu_has_pmu(vcpu)) 556 return; 557 558 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ 559 if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)) 560 val &= ~ARMV8_PMU_PMCR_LP; 561 562 /* The reset bits don't indicate any state, and shouldn't be saved. */ 563 __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); 564 565 if (val & ARMV8_PMU_PMCR_E) { 566 kvm_pmu_enable_counter_mask(vcpu, 567 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 568 } else { 569 kvm_pmu_disable_counter_mask(vcpu, 570 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 571 } 572 573 if (val & ARMV8_PMU_PMCR_C) 574 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 575 576 if (val & ARMV8_PMU_PMCR_P) { 577 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 578 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 579 for_each_set_bit(i, &mask, 32) 580 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); 581 } 582 kvm_vcpu_pmu_restore_guest(vcpu); 583 } 584 585 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) 586 { 587 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 588 return (kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) && 589 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); 590 } 591 592 /** 593 * kvm_pmu_create_perf_event - create a perf event for a counter 594 * @pmc: Counter context 595 */ 596 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) 597 { 598 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 599 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 600 struct perf_event *event; 601 struct perf_event_attr attr; 602 u64 eventsel, reg, data; 603 bool p, u, nsk, nsu; 604 605 reg = counter_index_to_evtreg(pmc->idx); 606 data = __vcpu_sys_reg(vcpu, reg); 607 608 kvm_pmu_stop_counter(pmc); 609 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 610 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 611 else 612 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 613 614 /* 615 * Neither SW increment nor chained events need to be backed 616 * by a perf event. 617 */ 618 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || 619 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) 620 return; 621 622 /* 623 * If we have a filter in place and that the event isn't allowed, do 624 * not install a perf event either. 625 */ 626 if (vcpu->kvm->arch.pmu_filter && 627 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 628 return; 629 630 p = data & ARMV8_PMU_EXCLUDE_EL1; 631 u = data & ARMV8_PMU_EXCLUDE_EL0; 632 nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1; 633 nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0; 634 635 memset(&attr, 0, sizeof(struct perf_event_attr)); 636 attr.type = arm_pmu->pmu.type; 637 attr.size = sizeof(attr); 638 attr.pinned = 1; 639 attr.disabled = !kvm_pmu_counter_is_enabled(pmc); 640 attr.exclude_user = (u != nsu); 641 attr.exclude_kernel = (p != nsk); 642 attr.exclude_hv = 1; /* Don't count EL2 events */ 643 attr.exclude_host = 1; /* Don't count host events */ 644 attr.config = eventsel; 645 646 /* 647 * If counting with a 64bit counter, advertise it to the perf 648 * code, carefully dealing with the initial sample period 649 * which also depends on the overflow. 650 */ 651 if (kvm_pmc_is_64bit(pmc)) 652 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; 653 654 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); 655 656 event = perf_event_create_kernel_counter(&attr, -1, current, 657 kvm_pmu_perf_overflow, pmc); 658 659 if (IS_ERR(event)) { 660 pr_err_once("kvm: pmu event creation failed %ld\n", 661 PTR_ERR(event)); 662 return; 663 } 664 665 pmc->perf_event = event; 666 } 667 668 /** 669 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 670 * @vcpu: The vcpu pointer 671 * @data: The data guest writes to PMXEVTYPER_EL0 672 * @select_idx: The number of selected counter 673 * 674 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 675 * event with given hardware event number. Here we call perf_event API to 676 * emulate this action and create a kernel perf event for it. 677 */ 678 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 679 u64 select_idx) 680 { 681 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); 682 u64 reg; 683 684 if (!kvm_vcpu_has_pmu(vcpu)) 685 return; 686 687 reg = counter_index_to_evtreg(pmc->idx); 688 __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); 689 690 kvm_pmu_create_perf_event(pmc); 691 } 692 693 void kvm_host_pmu_init(struct arm_pmu *pmu) 694 { 695 struct arm_pmu_entry *entry; 696 697 /* 698 * Check the sanitised PMU version for the system, as KVM does not 699 * support implementations where PMUv3 exists on a subset of CPUs. 700 */ 701 if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit())) 702 return; 703 704 mutex_lock(&arm_pmus_lock); 705 706 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 707 if (!entry) 708 goto out_unlock; 709 710 entry->arm_pmu = pmu; 711 list_add_tail(&entry->entry, &arm_pmus); 712 713 if (list_is_singular(&arm_pmus)) 714 static_branch_enable(&kvm_arm_pmu_available); 715 716 out_unlock: 717 mutex_unlock(&arm_pmus_lock); 718 } 719 720 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 721 { 722 struct arm_pmu *tmp, *pmu = NULL; 723 struct arm_pmu_entry *entry; 724 int cpu; 725 726 mutex_lock(&arm_pmus_lock); 727 728 /* 729 * It is safe to use a stale cpu to iterate the list of PMUs so long as 730 * the same value is used for the entirety of the loop. Given this, and 731 * the fact that no percpu data is used for the lookup there is no need 732 * to disable preemption. 733 * 734 * It is still necessary to get a valid cpu, though, to probe for the 735 * default PMU instance as userspace is not required to specify a PMU 736 * type. In order to uphold the preexisting behavior KVM selects the 737 * PMU instance for the core during vcpu init. A dependent use 738 * case would be a user with disdain of all things big.LITTLE that 739 * affines the VMM to a particular cluster of cores. 740 * 741 * In any case, userspace should just do the sane thing and use the UAPI 742 * to select a PMU type directly. But, be wary of the baggage being 743 * carried here. 744 */ 745 cpu = raw_smp_processor_id(); 746 list_for_each_entry(entry, &arm_pmus, entry) { 747 tmp = entry->arm_pmu; 748 749 if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { 750 pmu = tmp; 751 break; 752 } 753 } 754 755 mutex_unlock(&arm_pmus_lock); 756 757 return pmu; 758 } 759 760 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 761 { 762 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 763 u64 val, mask = 0; 764 int base, i, nr_events; 765 766 if (!kvm_vcpu_has_pmu(vcpu)) 767 return 0; 768 769 if (!pmceid1) { 770 val = read_sysreg(pmceid0_el0); 771 /* always support CHAIN */ 772 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); 773 base = 0; 774 } else { 775 val = read_sysreg(pmceid1_el0); 776 /* 777 * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled 778 * as RAZ 779 */ 780 val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) | 781 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) | 782 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32)); 783 base = 32; 784 } 785 786 if (!bmap) 787 return val; 788 789 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 790 791 for (i = 0; i < 32; i += 8) { 792 u64 byte; 793 794 byte = bitmap_get_value8(bmap, base + i); 795 mask |= byte << i; 796 if (nr_events >= (0x4000 + base + 32)) { 797 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 798 mask |= byte << (32 + i); 799 } 800 } 801 802 return val & mask; 803 } 804 805 void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) 806 { 807 u64 mask = kvm_pmu_valid_counter_mask(vcpu); 808 809 kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu)); 810 811 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; 812 __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; 813 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; 814 } 815 816 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 817 { 818 if (!kvm_vcpu_has_pmu(vcpu)) 819 return 0; 820 821 if (!vcpu->arch.pmu.created) 822 return -EINVAL; 823 824 /* 825 * A valid interrupt configuration for the PMU is either to have a 826 * properly configured interrupt number and using an in-kernel 827 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 828 */ 829 if (irqchip_in_kernel(vcpu->kvm)) { 830 int irq = vcpu->arch.pmu.irq_num; 831 /* 832 * If we are using an in-kernel vgic, at this point we know 833 * the vgic will be initialized, so we can check the PMU irq 834 * number against the dimensions of the vgic and make sure 835 * it's valid. 836 */ 837 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 838 return -EINVAL; 839 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 840 return -EINVAL; 841 } 842 843 /* One-off reload of the PMU on first run */ 844 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 845 846 return 0; 847 } 848 849 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 850 { 851 if (irqchip_in_kernel(vcpu->kvm)) { 852 int ret; 853 854 /* 855 * If using the PMU with an in-kernel virtual GIC 856 * implementation, we require the GIC to be already 857 * initialized when initializing the PMU. 858 */ 859 if (!vgic_initialized(vcpu->kvm)) 860 return -ENODEV; 861 862 if (!kvm_arm_pmu_irq_initialized(vcpu)) 863 return -ENXIO; 864 865 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 866 &vcpu->arch.pmu); 867 if (ret) 868 return ret; 869 } 870 871 init_irq_work(&vcpu->arch.pmu.overflow_work, 872 kvm_pmu_perf_overflow_notify_vcpu); 873 874 vcpu->arch.pmu.created = true; 875 return 0; 876 } 877 878 /* 879 * For one VM the interrupt type must be same for each vcpu. 880 * As a PPI, the interrupt number is the same for all vcpus, 881 * while as an SPI it must be a separate number per vcpu. 882 */ 883 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 884 { 885 unsigned long i; 886 struct kvm_vcpu *vcpu; 887 888 kvm_for_each_vcpu(i, vcpu, kvm) { 889 if (!kvm_arm_pmu_irq_initialized(vcpu)) 890 continue; 891 892 if (irq_is_ppi(irq)) { 893 if (vcpu->arch.pmu.irq_num != irq) 894 return false; 895 } else { 896 if (vcpu->arch.pmu.irq_num == irq) 897 return false; 898 } 899 } 900 901 return true; 902 } 903 904 /** 905 * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters. 906 * @kvm: The kvm pointer 907 */ 908 u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm) 909 { 910 struct arm_pmu *arm_pmu = kvm->arch.arm_pmu; 911 912 /* 913 * The arm_pmu->cntr_mask considers the fixed counter(s) as well. 914 * Ignore those and return only the general-purpose counters. 915 */ 916 return bitmap_weight(arm_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS); 917 } 918 919 static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu) 920 { 921 lockdep_assert_held(&kvm->arch.config_lock); 922 923 kvm->arch.arm_pmu = arm_pmu; 924 kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm); 925 } 926 927 /** 928 * kvm_arm_set_default_pmu - No PMU set, get the default one. 929 * @kvm: The kvm pointer 930 * 931 * The observant among you will notice that the supported_cpus 932 * mask does not get updated for the default PMU even though it 933 * is quite possible the selected instance supports only a 934 * subset of cores in the system. This is intentional, and 935 * upholds the preexisting behavior on heterogeneous systems 936 * where vCPUs can be scheduled on any core but the guest 937 * counters could stop working. 938 */ 939 int kvm_arm_set_default_pmu(struct kvm *kvm) 940 { 941 struct arm_pmu *arm_pmu = kvm_pmu_probe_armpmu(); 942 943 if (!arm_pmu) 944 return -ENODEV; 945 946 kvm_arm_set_pmu(kvm, arm_pmu); 947 return 0; 948 } 949 950 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 951 { 952 struct kvm *kvm = vcpu->kvm; 953 struct arm_pmu_entry *entry; 954 struct arm_pmu *arm_pmu; 955 int ret = -ENXIO; 956 957 lockdep_assert_held(&kvm->arch.config_lock); 958 mutex_lock(&arm_pmus_lock); 959 960 list_for_each_entry(entry, &arm_pmus, entry) { 961 arm_pmu = entry->arm_pmu; 962 if (arm_pmu->pmu.type == pmu_id) { 963 if (kvm_vm_has_ran_once(kvm) || 964 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 965 ret = -EBUSY; 966 break; 967 } 968 969 kvm_arm_set_pmu(kvm, arm_pmu); 970 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 971 ret = 0; 972 break; 973 } 974 } 975 976 mutex_unlock(&arm_pmus_lock); 977 return ret; 978 } 979 980 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 981 { 982 struct kvm *kvm = vcpu->kvm; 983 984 lockdep_assert_held(&kvm->arch.config_lock); 985 986 if (!kvm_vcpu_has_pmu(vcpu)) 987 return -ENODEV; 988 989 if (vcpu->arch.pmu.created) 990 return -EBUSY; 991 992 switch (attr->attr) { 993 case KVM_ARM_VCPU_PMU_V3_IRQ: { 994 int __user *uaddr = (int __user *)(long)attr->addr; 995 int irq; 996 997 if (!irqchip_in_kernel(kvm)) 998 return -EINVAL; 999 1000 if (get_user(irq, uaddr)) 1001 return -EFAULT; 1002 1003 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 1004 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 1005 return -EINVAL; 1006 1007 if (!pmu_irq_is_valid(kvm, irq)) 1008 return -EINVAL; 1009 1010 if (kvm_arm_pmu_irq_initialized(vcpu)) 1011 return -EBUSY; 1012 1013 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 1014 vcpu->arch.pmu.irq_num = irq; 1015 return 0; 1016 } 1017 case KVM_ARM_VCPU_PMU_V3_FILTER: { 1018 u8 pmuver = kvm_arm_pmu_get_pmuver_limit(); 1019 struct kvm_pmu_event_filter __user *uaddr; 1020 struct kvm_pmu_event_filter filter; 1021 int nr_events; 1022 1023 /* 1024 * Allow userspace to specify an event filter for the entire 1025 * event range supported by PMUVer of the hardware, rather 1026 * than the guest's PMUVer for KVM backward compatibility. 1027 */ 1028 nr_events = __kvm_pmu_event_mask(pmuver) + 1; 1029 1030 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 1031 1032 if (copy_from_user(&filter, uaddr, sizeof(filter))) 1033 return -EFAULT; 1034 1035 if (((u32)filter.base_event + filter.nevents) > nr_events || 1036 (filter.action != KVM_PMU_EVENT_ALLOW && 1037 filter.action != KVM_PMU_EVENT_DENY)) 1038 return -EINVAL; 1039 1040 if (kvm_vm_has_ran_once(kvm)) 1041 return -EBUSY; 1042 1043 if (!kvm->arch.pmu_filter) { 1044 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 1045 if (!kvm->arch.pmu_filter) 1046 return -ENOMEM; 1047 1048 /* 1049 * The default depends on the first applied filter. 1050 * If it allows events, the default is to deny. 1051 * Conversely, if the first filter denies a set of 1052 * events, the default is to allow. 1053 */ 1054 if (filter.action == KVM_PMU_EVENT_ALLOW) 1055 bitmap_zero(kvm->arch.pmu_filter, nr_events); 1056 else 1057 bitmap_fill(kvm->arch.pmu_filter, nr_events); 1058 } 1059 1060 if (filter.action == KVM_PMU_EVENT_ALLOW) 1061 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1062 else 1063 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1064 1065 return 0; 1066 } 1067 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1068 int __user *uaddr = (int __user *)(long)attr->addr; 1069 int pmu_id; 1070 1071 if (get_user(pmu_id, uaddr)) 1072 return -EFAULT; 1073 1074 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1075 } 1076 case KVM_ARM_VCPU_PMU_V3_INIT: 1077 return kvm_arm_pmu_v3_init(vcpu); 1078 } 1079 1080 return -ENXIO; 1081 } 1082 1083 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1084 { 1085 switch (attr->attr) { 1086 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1087 int __user *uaddr = (int __user *)(long)attr->addr; 1088 int irq; 1089 1090 if (!irqchip_in_kernel(vcpu->kvm)) 1091 return -EINVAL; 1092 1093 if (!kvm_vcpu_has_pmu(vcpu)) 1094 return -ENODEV; 1095 1096 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1097 return -ENXIO; 1098 1099 irq = vcpu->arch.pmu.irq_num; 1100 return put_user(irq, uaddr); 1101 } 1102 } 1103 1104 return -ENXIO; 1105 } 1106 1107 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1108 { 1109 switch (attr->attr) { 1110 case KVM_ARM_VCPU_PMU_V3_IRQ: 1111 case KVM_ARM_VCPU_PMU_V3_INIT: 1112 case KVM_ARM_VCPU_PMU_V3_FILTER: 1113 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1114 if (kvm_vcpu_has_pmu(vcpu)) 1115 return 0; 1116 } 1117 1118 return -ENXIO; 1119 } 1120 1121 u8 kvm_arm_pmu_get_pmuver_limit(void) 1122 { 1123 u64 tmp; 1124 1125 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); 1126 tmp = cpuid_feature_cap_perfmon_field(tmp, 1127 ID_AA64DFR0_EL1_PMUVer_SHIFT, 1128 ID_AA64DFR0_EL1_PMUVer_V3P5); 1129 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); 1130 } 1131 1132 /** 1133 * kvm_vcpu_read_pmcr - Read PMCR_EL0 register for the vCPU 1134 * @vcpu: The vcpu pointer 1135 */ 1136 u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) 1137 { 1138 u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); 1139 1140 return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N); 1141 } 1142