1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 19 20 static LIST_HEAD(arm_pmus); 21 static DEFINE_MUTEX(arm_pmus_lock); 22 23 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); 24 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); 25 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); 26 27 #define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 28 29 static u32 kvm_pmu_event_mask(struct kvm *kvm) 30 { 31 unsigned int pmuver; 32 33 pmuver = kvm->arch.arm_pmu->pmuver; 34 35 switch (pmuver) { 36 case ID_AA64DFR0_EL1_PMUVer_IMP: 37 return GENMASK(9, 0); 38 case ID_AA64DFR0_EL1_PMUVer_V3P1: 39 case ID_AA64DFR0_EL1_PMUVer_V3P4: 40 case ID_AA64DFR0_EL1_PMUVer_V3P5: 41 case ID_AA64DFR0_EL1_PMUVer_V3P7: 42 return GENMASK(15, 0); 43 default: /* Shouldn't be here, just for sanity */ 44 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 45 return 0; 46 } 47 } 48 49 /** 50 * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter 51 * @vcpu: The vcpu pointer 52 * @select_idx: The counter index 53 */ 54 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) 55 { 56 return (select_idx == ARMV8_PMU_CYCLE_IDX && 57 __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); 58 } 59 60 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) 61 { 62 struct kvm_pmu *pmu; 63 struct kvm_vcpu_arch *vcpu_arch; 64 65 pmc -= pmc->idx; 66 pmu = container_of(pmc, struct kvm_pmu, pmc[0]); 67 vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); 68 return container_of(vcpu_arch, struct kvm_vcpu, arch); 69 } 70 71 /** 72 * kvm_pmu_pmc_is_chained - determine if the pmc is chained 73 * @pmc: The PMU counter pointer 74 */ 75 static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) 76 { 77 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 78 79 return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 80 } 81 82 /** 83 * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter 84 * @select_idx: The counter index 85 */ 86 static bool kvm_pmu_idx_is_high_counter(u64 select_idx) 87 { 88 return select_idx & 0x1; 89 } 90 91 /** 92 * kvm_pmu_get_canonical_pmc - obtain the canonical pmc 93 * @pmc: The PMU counter pointer 94 * 95 * When a pair of PMCs are chained together we use the low counter (canonical) 96 * to hold the underlying perf event. 97 */ 98 static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) 99 { 100 if (kvm_pmu_pmc_is_chained(pmc) && 101 kvm_pmu_idx_is_high_counter(pmc->idx)) 102 return pmc - 1; 103 104 return pmc; 105 } 106 static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) 107 { 108 if (kvm_pmu_idx_is_high_counter(pmc->idx)) 109 return pmc - 1; 110 else 111 return pmc + 1; 112 } 113 114 /** 115 * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 u64 eventsel, reg; 122 123 select_idx |= 0x1; 124 125 if (select_idx == ARMV8_PMU_CYCLE_IDX) 126 return false; 127 128 reg = PMEVTYPER0_EL0 + select_idx; 129 eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); 130 131 return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; 132 } 133 134 /** 135 * kvm_pmu_get_pair_counter_value - get PMU counter value 136 * @vcpu: The vcpu pointer 137 * @pmc: The PMU counter pointer 138 */ 139 static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, 140 struct kvm_pmc *pmc) 141 { 142 u64 counter, counter_high, reg, enabled, running; 143 144 if (kvm_pmu_pmc_is_chained(pmc)) { 145 pmc = kvm_pmu_get_canonical_pmc(pmc); 146 reg = PMEVCNTR0_EL0 + pmc->idx; 147 148 counter = __vcpu_sys_reg(vcpu, reg); 149 counter_high = __vcpu_sys_reg(vcpu, reg + 1); 150 151 counter = lower_32_bits(counter) | (counter_high << 32); 152 } else { 153 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 154 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; 155 counter = __vcpu_sys_reg(vcpu, reg); 156 } 157 158 /* 159 * The real counter value is equal to the value of counter register plus 160 * the value perf event counts. 161 */ 162 if (pmc->perf_event) 163 counter += perf_event_read_value(pmc->perf_event, &enabled, 164 &running); 165 166 return counter; 167 } 168 169 /** 170 * kvm_pmu_get_counter_value - get PMU counter value 171 * @vcpu: The vcpu pointer 172 * @select_idx: The counter index 173 */ 174 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 175 { 176 u64 counter; 177 struct kvm_pmu *pmu = &vcpu->arch.pmu; 178 struct kvm_pmc *pmc = &pmu->pmc[select_idx]; 179 180 if (!kvm_vcpu_has_pmu(vcpu)) 181 return 0; 182 183 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 184 185 if (kvm_pmu_pmc_is_chained(pmc) && 186 kvm_pmu_idx_is_high_counter(select_idx)) 187 counter = upper_32_bits(counter); 188 else if (select_idx != ARMV8_PMU_CYCLE_IDX) 189 counter = lower_32_bits(counter); 190 191 return counter; 192 } 193 194 /** 195 * kvm_pmu_set_counter_value - set PMU counter value 196 * @vcpu: The vcpu pointer 197 * @select_idx: The counter index 198 * @val: The counter value 199 */ 200 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 201 { 202 u64 reg; 203 204 if (!kvm_vcpu_has_pmu(vcpu)) 205 return; 206 207 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 208 ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; 209 __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); 210 211 /* Recreate the perf event to reflect the updated sample_period */ 212 kvm_pmu_create_perf_event(vcpu, select_idx); 213 } 214 215 /** 216 * kvm_pmu_release_perf_event - remove the perf event 217 * @pmc: The PMU counter pointer 218 */ 219 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 220 { 221 pmc = kvm_pmu_get_canonical_pmc(pmc); 222 if (pmc->perf_event) { 223 perf_event_disable(pmc->perf_event); 224 perf_event_release_kernel(pmc->perf_event); 225 pmc->perf_event = NULL; 226 } 227 } 228 229 /** 230 * kvm_pmu_stop_counter - stop PMU counter 231 * @pmc: The PMU counter pointer 232 * 233 * If this counter has been configured to monitor some event, release it here. 234 */ 235 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) 236 { 237 u64 counter, reg, val; 238 239 pmc = kvm_pmu_get_canonical_pmc(pmc); 240 if (!pmc->perf_event) 241 return; 242 243 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 244 245 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { 246 reg = PMCCNTR_EL0; 247 val = counter; 248 } else { 249 reg = PMEVCNTR0_EL0 + pmc->idx; 250 val = lower_32_bits(counter); 251 } 252 253 __vcpu_sys_reg(vcpu, reg) = val; 254 255 if (kvm_pmu_pmc_is_chained(pmc)) 256 __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); 257 258 kvm_pmu_release_perf_event(pmc); 259 } 260 261 /** 262 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 263 * @vcpu: The vcpu pointer 264 * 265 */ 266 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 267 { 268 int i; 269 struct kvm_pmu *pmu = &vcpu->arch.pmu; 270 271 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 272 pmu->pmc[i].idx = i; 273 } 274 275 /** 276 * kvm_pmu_vcpu_reset - reset pmu state for cpu 277 * @vcpu: The vcpu pointer 278 * 279 */ 280 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 281 { 282 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 283 struct kvm_pmu *pmu = &vcpu->arch.pmu; 284 int i; 285 286 for_each_set_bit(i, &mask, 32) 287 kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); 288 289 bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); 290 } 291 292 /** 293 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 294 * @vcpu: The vcpu pointer 295 * 296 */ 297 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 298 { 299 int i; 300 struct kvm_pmu *pmu = &vcpu->arch.pmu; 301 302 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 303 kvm_pmu_release_perf_event(&pmu->pmc[i]); 304 irq_work_sync(&vcpu->arch.pmu.overflow_work); 305 } 306 307 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 308 { 309 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 310 311 val &= ARMV8_PMU_PMCR_N_MASK; 312 if (val == 0) 313 return BIT(ARMV8_PMU_CYCLE_IDX); 314 else 315 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 316 } 317 318 /** 319 * kvm_pmu_enable_counter_mask - enable selected PMU counters 320 * @vcpu: The vcpu pointer 321 * @val: the value guest writes to PMCNTENSET register 322 * 323 * Call perf_event_enable to start counting the perf event 324 */ 325 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 326 { 327 int i; 328 struct kvm_pmu *pmu = &vcpu->arch.pmu; 329 struct kvm_pmc *pmc; 330 331 if (!kvm_vcpu_has_pmu(vcpu)) 332 return; 333 334 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 335 return; 336 337 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 338 if (!(val & BIT(i))) 339 continue; 340 341 pmc = &pmu->pmc[i]; 342 343 /* A change in the enable state may affect the chain state */ 344 kvm_pmu_update_pmc_chained(vcpu, i); 345 kvm_pmu_create_perf_event(vcpu, i); 346 347 /* At this point, pmc must be the canonical */ 348 if (pmc->perf_event) { 349 perf_event_enable(pmc->perf_event); 350 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 351 kvm_debug("fail to enable perf event\n"); 352 } 353 } 354 } 355 356 /** 357 * kvm_pmu_disable_counter_mask - disable selected PMU counters 358 * @vcpu: The vcpu pointer 359 * @val: the value guest writes to PMCNTENCLR register 360 * 361 * Call perf_event_disable to stop counting the perf event 362 */ 363 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 364 { 365 int i; 366 struct kvm_pmu *pmu = &vcpu->arch.pmu; 367 struct kvm_pmc *pmc; 368 369 if (!kvm_vcpu_has_pmu(vcpu) || !val) 370 return; 371 372 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 373 if (!(val & BIT(i))) 374 continue; 375 376 pmc = &pmu->pmc[i]; 377 378 /* A change in the enable state may affect the chain state */ 379 kvm_pmu_update_pmc_chained(vcpu, i); 380 kvm_pmu_create_perf_event(vcpu, i); 381 382 /* At this point, pmc must be the canonical */ 383 if (pmc->perf_event) 384 perf_event_disable(pmc->perf_event); 385 } 386 } 387 388 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 389 { 390 u64 reg = 0; 391 392 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 393 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 394 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 395 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 396 } 397 398 return reg; 399 } 400 401 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 402 { 403 struct kvm_pmu *pmu = &vcpu->arch.pmu; 404 bool overflow; 405 406 if (!kvm_vcpu_has_pmu(vcpu)) 407 return; 408 409 overflow = !!kvm_pmu_overflow_status(vcpu); 410 if (pmu->irq_level == overflow) 411 return; 412 413 pmu->irq_level = overflow; 414 415 if (likely(irqchip_in_kernel(vcpu->kvm))) { 416 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 417 pmu->irq_num, overflow, pmu); 418 WARN_ON(ret); 419 } 420 } 421 422 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 423 { 424 struct kvm_pmu *pmu = &vcpu->arch.pmu; 425 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 426 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 427 428 if (likely(irqchip_in_kernel(vcpu->kvm))) 429 return false; 430 431 return pmu->irq_level != run_level; 432 } 433 434 /* 435 * Reflect the PMU overflow interrupt output level into the kvm_run structure 436 */ 437 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 438 { 439 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 440 441 /* Populate the timer bitmap for user space */ 442 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 443 if (vcpu->arch.pmu.irq_level) 444 regs->device_irq_level |= KVM_ARM_DEV_PMU; 445 } 446 447 /** 448 * kvm_pmu_flush_hwstate - flush pmu state to cpu 449 * @vcpu: The vcpu pointer 450 * 451 * Check if the PMU has overflowed while we were running in the host, and inject 452 * an interrupt if that was the case. 453 */ 454 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 455 { 456 kvm_pmu_update_state(vcpu); 457 } 458 459 /** 460 * kvm_pmu_sync_hwstate - sync pmu state from cpu 461 * @vcpu: The vcpu pointer 462 * 463 * Check if the PMU has overflowed while we were running in the guest, and 464 * inject an interrupt if that was the case. 465 */ 466 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 467 { 468 kvm_pmu_update_state(vcpu); 469 } 470 471 /** 472 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 473 * to the event. 474 * This is why we need a callback to do it once outside of the NMI context. 475 */ 476 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 477 { 478 struct kvm_vcpu *vcpu; 479 struct kvm_pmu *pmu; 480 481 pmu = container_of(work, struct kvm_pmu, overflow_work); 482 vcpu = kvm_pmc_to_vcpu(pmu->pmc); 483 484 kvm_vcpu_kick(vcpu); 485 } 486 487 /** 488 * When the perf event overflows, set the overflow status and inform the vcpu. 489 */ 490 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 491 struct perf_sample_data *data, 492 struct pt_regs *regs) 493 { 494 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 495 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 496 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 497 int idx = pmc->idx; 498 u64 period; 499 500 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 501 502 /* 503 * Reset the sample period to the architectural limit, 504 * i.e. the point where the counter overflows. 505 */ 506 period = -(local64_read(&perf_event->count)); 507 508 if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 509 period &= GENMASK(31, 0); 510 511 local64_set(&perf_event->hw.period_left, 0); 512 perf_event->attr.sample_period = period; 513 perf_event->hw.sample_period = period; 514 515 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 516 517 if (kvm_pmu_overflow_status(vcpu)) { 518 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 519 520 if (!in_nmi()) 521 kvm_vcpu_kick(vcpu); 522 else 523 irq_work_queue(&vcpu->arch.pmu.overflow_work); 524 } 525 526 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 527 } 528 529 /** 530 * kvm_pmu_software_increment - do software increment 531 * @vcpu: The vcpu pointer 532 * @val: the value guest writes to PMSWINC register 533 */ 534 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 535 { 536 struct kvm_pmu *pmu = &vcpu->arch.pmu; 537 int i; 538 539 if (!kvm_vcpu_has_pmu(vcpu)) 540 return; 541 542 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 543 return; 544 545 /* Weed out disabled counters */ 546 val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 547 548 for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { 549 u64 type, reg; 550 551 if (!(val & BIT(i))) 552 continue; 553 554 /* PMSWINC only applies to ... SW_INC! */ 555 type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); 556 type &= kvm_pmu_event_mask(vcpu->kvm); 557 if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) 558 continue; 559 560 /* increment this even SW_INC counter */ 561 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; 562 reg = lower_32_bits(reg); 563 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; 564 565 if (reg) /* no overflow on the low part */ 566 continue; 567 568 if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { 569 /* increment the high counter */ 570 reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; 571 reg = lower_32_bits(reg); 572 __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; 573 if (!reg) /* mark overflow on the high counter */ 574 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); 575 } else { 576 /* mark overflow on low counter */ 577 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 578 } 579 } 580 } 581 582 /** 583 * kvm_pmu_handle_pmcr - handle PMCR register 584 * @vcpu: The vcpu pointer 585 * @val: the value guest writes to PMCR register 586 */ 587 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 588 { 589 int i; 590 591 if (!kvm_vcpu_has_pmu(vcpu)) 592 return; 593 594 if (val & ARMV8_PMU_PMCR_E) { 595 kvm_pmu_enable_counter_mask(vcpu, 596 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 597 } else { 598 kvm_pmu_disable_counter_mask(vcpu, 599 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 600 } 601 602 if (val & ARMV8_PMU_PMCR_C) 603 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 604 605 if (val & ARMV8_PMU_PMCR_P) { 606 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 607 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 608 for_each_set_bit(i, &mask, 32) 609 kvm_pmu_set_counter_value(vcpu, i, 0); 610 } 611 } 612 613 static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) 614 { 615 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 616 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); 617 } 618 619 /** 620 * kvm_pmu_create_perf_event - create a perf event for a counter 621 * @vcpu: The vcpu pointer 622 * @select_idx: The number of selected counter 623 */ 624 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) 625 { 626 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 627 struct kvm_pmu *pmu = &vcpu->arch.pmu; 628 struct kvm_pmc *pmc; 629 struct perf_event *event; 630 struct perf_event_attr attr; 631 u64 eventsel, counter, reg, data; 632 633 /* 634 * For chained counters the event type and filtering attributes are 635 * obtained from the low/even counter. We also use this counter to 636 * determine if the event is enabled/disabled. 637 */ 638 pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); 639 640 reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) 641 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; 642 data = __vcpu_sys_reg(vcpu, reg); 643 644 kvm_pmu_stop_counter(vcpu, pmc); 645 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 646 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 647 else 648 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 649 650 /* Software increment event doesn't need to be backed by a perf event */ 651 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) 652 return; 653 654 /* 655 * If we have a filter in place and that the event isn't allowed, do 656 * not install a perf event either. 657 */ 658 if (vcpu->kvm->arch.pmu_filter && 659 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 660 return; 661 662 memset(&attr, 0, sizeof(struct perf_event_attr)); 663 attr.type = arm_pmu->pmu.type; 664 attr.size = sizeof(attr); 665 attr.pinned = 1; 666 attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); 667 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 668 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 669 attr.exclude_hv = 1; /* Don't count EL2 events */ 670 attr.exclude_host = 1; /* Don't count host events */ 671 attr.config = eventsel; 672 673 counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); 674 675 if (kvm_pmu_pmc_is_chained(pmc)) { 676 /** 677 * The initial sample period (overflow count) of an event. For 678 * chained counters we only support overflow interrupts on the 679 * high counter. 680 */ 681 attr.sample_period = (-counter) & GENMASK(63, 0); 682 attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; 683 684 event = perf_event_create_kernel_counter(&attr, -1, current, 685 kvm_pmu_perf_overflow, 686 pmc + 1); 687 } else { 688 /* The initial sample period (overflow count) of an event. */ 689 if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) 690 attr.sample_period = (-counter) & GENMASK(63, 0); 691 else 692 attr.sample_period = (-counter) & GENMASK(31, 0); 693 694 event = perf_event_create_kernel_counter(&attr, -1, current, 695 kvm_pmu_perf_overflow, pmc); 696 } 697 698 if (IS_ERR(event)) { 699 pr_err_once("kvm: pmu event creation failed %ld\n", 700 PTR_ERR(event)); 701 return; 702 } 703 704 pmc->perf_event = event; 705 } 706 707 /** 708 * kvm_pmu_update_pmc_chained - update chained bitmap 709 * @vcpu: The vcpu pointer 710 * @select_idx: The number of selected counter 711 * 712 * Update the chained bitmap based on the event type written in the 713 * typer register and the enable state of the odd register. 714 */ 715 static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) 716 { 717 struct kvm_pmu *pmu = &vcpu->arch.pmu; 718 struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; 719 bool new_state, old_state; 720 721 old_state = kvm_pmu_pmc_is_chained(pmc); 722 new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && 723 kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); 724 725 if (old_state == new_state) 726 return; 727 728 canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); 729 kvm_pmu_stop_counter(vcpu, canonical_pmc); 730 if (new_state) { 731 /* 732 * During promotion from !chained to chained we must ensure 733 * the adjacent counter is stopped and its event destroyed 734 */ 735 kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); 736 set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 737 return; 738 } 739 clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); 740 } 741 742 /** 743 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 744 * @vcpu: The vcpu pointer 745 * @data: The data guest writes to PMXEVTYPER_EL0 746 * @select_idx: The number of selected counter 747 * 748 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 749 * event with given hardware event number. Here we call perf_event API to 750 * emulate this action and create a kernel perf event for it. 751 */ 752 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 753 u64 select_idx) 754 { 755 u64 reg, mask; 756 757 if (!kvm_vcpu_has_pmu(vcpu)) 758 return; 759 760 mask = ARMV8_PMU_EVTYPE_MASK; 761 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 762 mask |= kvm_pmu_event_mask(vcpu->kvm); 763 764 reg = (select_idx == ARMV8_PMU_CYCLE_IDX) 765 ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; 766 767 __vcpu_sys_reg(vcpu, reg) = data & mask; 768 769 kvm_pmu_update_pmc_chained(vcpu, select_idx); 770 kvm_pmu_create_perf_event(vcpu, select_idx); 771 } 772 773 void kvm_host_pmu_init(struct arm_pmu *pmu) 774 { 775 struct arm_pmu_entry *entry; 776 777 if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 778 return; 779 780 mutex_lock(&arm_pmus_lock); 781 782 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 783 if (!entry) 784 goto out_unlock; 785 786 entry->arm_pmu = pmu; 787 list_add_tail(&entry->entry, &arm_pmus); 788 789 if (list_is_singular(&arm_pmus)) 790 static_branch_enable(&kvm_arm_pmu_available); 791 792 out_unlock: 793 mutex_unlock(&arm_pmus_lock); 794 } 795 796 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 797 { 798 struct perf_event_attr attr = { }; 799 struct perf_event *event; 800 struct arm_pmu *pmu = NULL; 801 802 /* 803 * Create a dummy event that only counts user cycles. As we'll never 804 * leave this function with the event being live, it will never 805 * count anything. But it allows us to probe some of the PMU 806 * details. Yes, this is terrible. 807 */ 808 attr.type = PERF_TYPE_RAW; 809 attr.size = sizeof(attr); 810 attr.pinned = 1; 811 attr.disabled = 0; 812 attr.exclude_user = 0; 813 attr.exclude_kernel = 1; 814 attr.exclude_hv = 1; 815 attr.exclude_host = 1; 816 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 817 attr.sample_period = GENMASK(63, 0); 818 819 event = perf_event_create_kernel_counter(&attr, -1, current, 820 kvm_pmu_perf_overflow, &attr); 821 822 if (IS_ERR(event)) { 823 pr_err_once("kvm: pmu event creation failed %ld\n", 824 PTR_ERR(event)); 825 return NULL; 826 } 827 828 if (event->pmu) { 829 pmu = to_arm_pmu(event->pmu); 830 if (pmu->pmuver == 0 || 831 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 832 pmu = NULL; 833 } 834 835 perf_event_disable(event); 836 perf_event_release_kernel(event); 837 838 return pmu; 839 } 840 841 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 842 { 843 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 844 u64 val, mask = 0; 845 int base, i, nr_events; 846 847 if (!kvm_vcpu_has_pmu(vcpu)) 848 return 0; 849 850 if (!pmceid1) { 851 val = read_sysreg(pmceid0_el0); 852 base = 0; 853 } else { 854 val = read_sysreg(pmceid1_el0); 855 /* 856 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 857 * as RAZ 858 */ 859 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) 860 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 861 base = 32; 862 } 863 864 if (!bmap) 865 return val; 866 867 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 868 869 for (i = 0; i < 32; i += 8) { 870 u64 byte; 871 872 byte = bitmap_get_value8(bmap, base + i); 873 mask |= byte << i; 874 if (nr_events >= (0x4000 + base + 32)) { 875 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 876 mask |= byte << (32 + i); 877 } 878 } 879 880 return val & mask; 881 } 882 883 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 884 { 885 if (!kvm_vcpu_has_pmu(vcpu)) 886 return 0; 887 888 if (!vcpu->arch.pmu.created) 889 return -EINVAL; 890 891 /* 892 * A valid interrupt configuration for the PMU is either to have a 893 * properly configured interrupt number and using an in-kernel 894 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 895 */ 896 if (irqchip_in_kernel(vcpu->kvm)) { 897 int irq = vcpu->arch.pmu.irq_num; 898 /* 899 * If we are using an in-kernel vgic, at this point we know 900 * the vgic will be initialized, so we can check the PMU irq 901 * number against the dimensions of the vgic and make sure 902 * it's valid. 903 */ 904 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 905 return -EINVAL; 906 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 907 return -EINVAL; 908 } 909 910 /* One-off reload of the PMU on first run */ 911 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 912 913 return 0; 914 } 915 916 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 917 { 918 if (irqchip_in_kernel(vcpu->kvm)) { 919 int ret; 920 921 /* 922 * If using the PMU with an in-kernel virtual GIC 923 * implementation, we require the GIC to be already 924 * initialized when initializing the PMU. 925 */ 926 if (!vgic_initialized(vcpu->kvm)) 927 return -ENODEV; 928 929 if (!kvm_arm_pmu_irq_initialized(vcpu)) 930 return -ENXIO; 931 932 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 933 &vcpu->arch.pmu); 934 if (ret) 935 return ret; 936 } 937 938 init_irq_work(&vcpu->arch.pmu.overflow_work, 939 kvm_pmu_perf_overflow_notify_vcpu); 940 941 vcpu->arch.pmu.created = true; 942 return 0; 943 } 944 945 /* 946 * For one VM the interrupt type must be same for each vcpu. 947 * As a PPI, the interrupt number is the same for all vcpus, 948 * while as an SPI it must be a separate number per vcpu. 949 */ 950 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 951 { 952 unsigned long i; 953 struct kvm_vcpu *vcpu; 954 955 kvm_for_each_vcpu(i, vcpu, kvm) { 956 if (!kvm_arm_pmu_irq_initialized(vcpu)) 957 continue; 958 959 if (irq_is_ppi(irq)) { 960 if (vcpu->arch.pmu.irq_num != irq) 961 return false; 962 } else { 963 if (vcpu->arch.pmu.irq_num == irq) 964 return false; 965 } 966 } 967 968 return true; 969 } 970 971 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 972 { 973 struct kvm *kvm = vcpu->kvm; 974 struct arm_pmu_entry *entry; 975 struct arm_pmu *arm_pmu; 976 int ret = -ENXIO; 977 978 mutex_lock(&kvm->lock); 979 mutex_lock(&arm_pmus_lock); 980 981 list_for_each_entry(entry, &arm_pmus, entry) { 982 arm_pmu = entry->arm_pmu; 983 if (arm_pmu->pmu.type == pmu_id) { 984 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) || 985 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 986 ret = -EBUSY; 987 break; 988 } 989 990 kvm->arch.arm_pmu = arm_pmu; 991 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 992 ret = 0; 993 break; 994 } 995 } 996 997 mutex_unlock(&arm_pmus_lock); 998 mutex_unlock(&kvm->lock); 999 return ret; 1000 } 1001 1002 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1003 { 1004 struct kvm *kvm = vcpu->kvm; 1005 1006 if (!kvm_vcpu_has_pmu(vcpu)) 1007 return -ENODEV; 1008 1009 if (vcpu->arch.pmu.created) 1010 return -EBUSY; 1011 1012 mutex_lock(&kvm->lock); 1013 if (!kvm->arch.arm_pmu) { 1014 /* No PMU set, get the default one */ 1015 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 1016 if (!kvm->arch.arm_pmu) { 1017 mutex_unlock(&kvm->lock); 1018 return -ENODEV; 1019 } 1020 } 1021 mutex_unlock(&kvm->lock); 1022 1023 switch (attr->attr) { 1024 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1025 int __user *uaddr = (int __user *)(long)attr->addr; 1026 int irq; 1027 1028 if (!irqchip_in_kernel(kvm)) 1029 return -EINVAL; 1030 1031 if (get_user(irq, uaddr)) 1032 return -EFAULT; 1033 1034 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 1035 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 1036 return -EINVAL; 1037 1038 if (!pmu_irq_is_valid(kvm, irq)) 1039 return -EINVAL; 1040 1041 if (kvm_arm_pmu_irq_initialized(vcpu)) 1042 return -EBUSY; 1043 1044 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 1045 vcpu->arch.pmu.irq_num = irq; 1046 return 0; 1047 } 1048 case KVM_ARM_VCPU_PMU_V3_FILTER: { 1049 struct kvm_pmu_event_filter __user *uaddr; 1050 struct kvm_pmu_event_filter filter; 1051 int nr_events; 1052 1053 nr_events = kvm_pmu_event_mask(kvm) + 1; 1054 1055 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 1056 1057 if (copy_from_user(&filter, uaddr, sizeof(filter))) 1058 return -EFAULT; 1059 1060 if (((u32)filter.base_event + filter.nevents) > nr_events || 1061 (filter.action != KVM_PMU_EVENT_ALLOW && 1062 filter.action != KVM_PMU_EVENT_DENY)) 1063 return -EINVAL; 1064 1065 mutex_lock(&kvm->lock); 1066 1067 if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags)) { 1068 mutex_unlock(&kvm->lock); 1069 return -EBUSY; 1070 } 1071 1072 if (!kvm->arch.pmu_filter) { 1073 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 1074 if (!kvm->arch.pmu_filter) { 1075 mutex_unlock(&kvm->lock); 1076 return -ENOMEM; 1077 } 1078 1079 /* 1080 * The default depends on the first applied filter. 1081 * If it allows events, the default is to deny. 1082 * Conversely, if the first filter denies a set of 1083 * events, the default is to allow. 1084 */ 1085 if (filter.action == KVM_PMU_EVENT_ALLOW) 1086 bitmap_zero(kvm->arch.pmu_filter, nr_events); 1087 else 1088 bitmap_fill(kvm->arch.pmu_filter, nr_events); 1089 } 1090 1091 if (filter.action == KVM_PMU_EVENT_ALLOW) 1092 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1093 else 1094 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1095 1096 mutex_unlock(&kvm->lock); 1097 1098 return 0; 1099 } 1100 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1101 int __user *uaddr = (int __user *)(long)attr->addr; 1102 int pmu_id; 1103 1104 if (get_user(pmu_id, uaddr)) 1105 return -EFAULT; 1106 1107 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1108 } 1109 case KVM_ARM_VCPU_PMU_V3_INIT: 1110 return kvm_arm_pmu_v3_init(vcpu); 1111 } 1112 1113 return -ENXIO; 1114 } 1115 1116 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1117 { 1118 switch (attr->attr) { 1119 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1120 int __user *uaddr = (int __user *)(long)attr->addr; 1121 int irq; 1122 1123 if (!irqchip_in_kernel(vcpu->kvm)) 1124 return -EINVAL; 1125 1126 if (!kvm_vcpu_has_pmu(vcpu)) 1127 return -ENODEV; 1128 1129 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1130 return -ENXIO; 1131 1132 irq = vcpu->arch.pmu.irq_num; 1133 return put_user(irq, uaddr); 1134 } 1135 } 1136 1137 return -ENXIO; 1138 } 1139 1140 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1141 { 1142 switch (attr->attr) { 1143 case KVM_ARM_VCPU_PMU_V3_IRQ: 1144 case KVM_ARM_VCPU_PMU_V3_INIT: 1145 case KVM_ARM_VCPU_PMU_V3_FILTER: 1146 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1147 if (kvm_vcpu_has_pmu(vcpu)) 1148 return 0; 1149 } 1150 1151 return -ENXIO; 1152 } 1153