1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 #include <asm/arm_pmuv3.h> 18 19 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) 20 21 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 22 23 static LIST_HEAD(arm_pmus); 24 static DEFINE_MUTEX(arm_pmus_lock); 25 26 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); 27 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); 28 29 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) 30 { 31 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); 32 } 33 34 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) 35 { 36 return &vcpu->arch.pmu.pmc[cnt_idx]; 37 } 38 39 static u32 __kvm_pmu_event_mask(unsigned int pmuver) 40 { 41 switch (pmuver) { 42 case ID_AA64DFR0_EL1_PMUVer_IMP: 43 return GENMASK(9, 0); 44 case ID_AA64DFR0_EL1_PMUVer_V3P1: 45 case ID_AA64DFR0_EL1_PMUVer_V3P4: 46 case ID_AA64DFR0_EL1_PMUVer_V3P5: 47 case ID_AA64DFR0_EL1_PMUVer_V3P7: 48 return GENMASK(15, 0); 49 default: /* Shouldn't be here, just for sanity */ 50 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 51 return 0; 52 } 53 } 54 55 static u32 kvm_pmu_event_mask(struct kvm *kvm) 56 { 57 u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1); 58 u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0); 59 60 return __kvm_pmu_event_mask(pmuver); 61 } 62 63 u64 kvm_pmu_evtyper_mask(struct kvm *kvm) 64 { 65 u64 mask = ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMU_EXCLUDE_EL0 | 66 kvm_pmu_event_mask(kvm); 67 68 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL2, IMP)) 69 mask |= ARMV8_PMU_INCLUDE_EL2; 70 71 if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL3, IMP)) 72 mask |= ARMV8_PMU_EXCLUDE_NS_EL0 | 73 ARMV8_PMU_EXCLUDE_NS_EL1 | 74 ARMV8_PMU_EXCLUDE_EL3; 75 76 return mask; 77 } 78 79 /** 80 * kvm_pmc_is_64bit - determine if counter is 64bit 81 * @pmc: counter context 82 */ 83 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) 84 { 85 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 86 87 return (pmc->idx == ARMV8_PMU_CYCLE_IDX || 88 kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)); 89 } 90 91 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) 92 { 93 u64 val = kvm_vcpu_read_pmcr(kvm_pmc_to_vcpu(pmc)); 94 95 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || 96 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); 97 } 98 99 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) 100 { 101 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && 102 !kvm_pmc_has_64bit_overflow(pmc)); 103 } 104 105 static u32 counter_index_to_reg(u64 idx) 106 { 107 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; 108 } 109 110 static u32 counter_index_to_evtreg(u64 idx) 111 { 112 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; 113 } 114 115 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) 116 { 117 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 118 u64 counter, reg, enabled, running; 119 120 reg = counter_index_to_reg(pmc->idx); 121 counter = __vcpu_sys_reg(vcpu, reg); 122 123 /* 124 * The real counter value is equal to the value of counter register plus 125 * the value perf event counts. 126 */ 127 if (pmc->perf_event) 128 counter += perf_event_read_value(pmc->perf_event, &enabled, 129 &running); 130 131 if (!kvm_pmc_is_64bit(pmc)) 132 counter = lower_32_bits(counter); 133 134 return counter; 135 } 136 137 /** 138 * kvm_pmu_get_counter_value - get PMU counter value 139 * @vcpu: The vcpu pointer 140 * @select_idx: The counter index 141 */ 142 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 143 { 144 if (!kvm_vcpu_has_pmu(vcpu)) 145 return 0; 146 147 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); 148 } 149 150 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) 151 { 152 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 153 u64 reg; 154 155 kvm_pmu_release_perf_event(pmc); 156 157 reg = counter_index_to_reg(pmc->idx); 158 159 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && 160 !force) { 161 /* 162 * Even with PMUv3p5, AArch32 cannot write to the top 163 * 32bit of the counters. The only possible course of 164 * action is to use PMCR.P, which will reset them to 165 * 0 (the only use of the 'force' parameter). 166 */ 167 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); 168 val |= lower_32_bits(val); 169 } 170 171 __vcpu_sys_reg(vcpu, reg) = val; 172 173 /* Recreate the perf event to reflect the updated sample_period */ 174 kvm_pmu_create_perf_event(pmc); 175 } 176 177 /** 178 * kvm_pmu_set_counter_value - set PMU counter value 179 * @vcpu: The vcpu pointer 180 * @select_idx: The counter index 181 * @val: The counter value 182 */ 183 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 184 { 185 if (!kvm_vcpu_has_pmu(vcpu)) 186 return; 187 188 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); 189 } 190 191 /** 192 * kvm_pmu_release_perf_event - remove the perf event 193 * @pmc: The PMU counter pointer 194 */ 195 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 196 { 197 if (pmc->perf_event) { 198 perf_event_disable(pmc->perf_event); 199 perf_event_release_kernel(pmc->perf_event); 200 pmc->perf_event = NULL; 201 } 202 } 203 204 /** 205 * kvm_pmu_stop_counter - stop PMU counter 206 * @pmc: The PMU counter pointer 207 * 208 * If this counter has been configured to monitor some event, release it here. 209 */ 210 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) 211 { 212 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 213 u64 reg, val; 214 215 if (!pmc->perf_event) 216 return; 217 218 val = kvm_pmu_get_pmc_value(pmc); 219 220 reg = counter_index_to_reg(pmc->idx); 221 222 __vcpu_sys_reg(vcpu, reg) = val; 223 224 kvm_pmu_release_perf_event(pmc); 225 } 226 227 /** 228 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 229 * @vcpu: The vcpu pointer 230 * 231 */ 232 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 233 { 234 int i; 235 struct kvm_pmu *pmu = &vcpu->arch.pmu; 236 237 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 238 pmu->pmc[i].idx = i; 239 } 240 241 /** 242 * kvm_pmu_vcpu_reset - reset pmu state for cpu 243 * @vcpu: The vcpu pointer 244 * 245 */ 246 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 247 { 248 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 249 int i; 250 251 for_each_set_bit(i, &mask, 32) 252 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); 253 } 254 255 /** 256 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 257 * @vcpu: The vcpu pointer 258 * 259 */ 260 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 261 { 262 int i; 263 264 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 265 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); 266 irq_work_sync(&vcpu->arch.pmu.overflow_work); 267 } 268 269 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 270 { 271 u64 val = FIELD_GET(ARMV8_PMU_PMCR_N, kvm_vcpu_read_pmcr(vcpu)); 272 273 if (val == 0) 274 return BIT(ARMV8_PMU_CYCLE_IDX); 275 else 276 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 277 } 278 279 /** 280 * kvm_pmu_enable_counter_mask - enable selected PMU counters 281 * @vcpu: The vcpu pointer 282 * @val: the value guest writes to PMCNTENSET register 283 * 284 * Call perf_event_enable to start counting the perf event 285 */ 286 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 287 { 288 int i; 289 if (!kvm_vcpu_has_pmu(vcpu)) 290 return; 291 292 if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) || !val) 293 return; 294 295 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 296 struct kvm_pmc *pmc; 297 298 if (!(val & BIT(i))) 299 continue; 300 301 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 302 303 if (!pmc->perf_event) { 304 kvm_pmu_create_perf_event(pmc); 305 } else { 306 perf_event_enable(pmc->perf_event); 307 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 308 kvm_debug("fail to enable perf event\n"); 309 } 310 } 311 } 312 313 /** 314 * kvm_pmu_disable_counter_mask - disable selected PMU counters 315 * @vcpu: The vcpu pointer 316 * @val: the value guest writes to PMCNTENCLR register 317 * 318 * Call perf_event_disable to stop counting the perf event 319 */ 320 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 321 { 322 int i; 323 324 if (!kvm_vcpu_has_pmu(vcpu) || !val) 325 return; 326 327 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 328 struct kvm_pmc *pmc; 329 330 if (!(val & BIT(i))) 331 continue; 332 333 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 334 335 if (pmc->perf_event) 336 perf_event_disable(pmc->perf_event); 337 } 338 } 339 340 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 341 { 342 u64 reg = 0; 343 344 if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { 345 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 346 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 347 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 348 } 349 350 return reg; 351 } 352 353 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 354 { 355 struct kvm_pmu *pmu = &vcpu->arch.pmu; 356 bool overflow; 357 358 if (!kvm_vcpu_has_pmu(vcpu)) 359 return; 360 361 overflow = !!kvm_pmu_overflow_status(vcpu); 362 if (pmu->irq_level == overflow) 363 return; 364 365 pmu->irq_level = overflow; 366 367 if (likely(irqchip_in_kernel(vcpu->kvm))) { 368 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, 369 pmu->irq_num, overflow, pmu); 370 WARN_ON(ret); 371 } 372 } 373 374 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 375 { 376 struct kvm_pmu *pmu = &vcpu->arch.pmu; 377 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 378 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 379 380 if (likely(irqchip_in_kernel(vcpu->kvm))) 381 return false; 382 383 return pmu->irq_level != run_level; 384 } 385 386 /* 387 * Reflect the PMU overflow interrupt output level into the kvm_run structure 388 */ 389 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 390 { 391 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 392 393 /* Populate the timer bitmap for user space */ 394 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 395 if (vcpu->arch.pmu.irq_level) 396 regs->device_irq_level |= KVM_ARM_DEV_PMU; 397 } 398 399 /** 400 * kvm_pmu_flush_hwstate - flush pmu state to cpu 401 * @vcpu: The vcpu pointer 402 * 403 * Check if the PMU has overflowed while we were running in the host, and inject 404 * an interrupt if that was the case. 405 */ 406 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 407 { 408 kvm_pmu_update_state(vcpu); 409 } 410 411 /** 412 * kvm_pmu_sync_hwstate - sync pmu state from cpu 413 * @vcpu: The vcpu pointer 414 * 415 * Check if the PMU has overflowed while we were running in the guest, and 416 * inject an interrupt if that was the case. 417 */ 418 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 419 { 420 kvm_pmu_update_state(vcpu); 421 } 422 423 /* 424 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 425 * to the event. 426 * This is why we need a callback to do it once outside of the NMI context. 427 */ 428 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 429 { 430 struct kvm_vcpu *vcpu; 431 432 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); 433 kvm_vcpu_kick(vcpu); 434 } 435 436 /* 437 * Perform an increment on any of the counters described in @mask, 438 * generating the overflow if required, and propagate it as a chained 439 * event if possible. 440 */ 441 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, 442 unsigned long mask, u32 event) 443 { 444 int i; 445 446 if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) 447 return; 448 449 /* Weed out disabled counters */ 450 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 451 452 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { 453 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 454 u64 type, reg; 455 456 /* Filter on event type */ 457 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); 458 type &= kvm_pmu_event_mask(vcpu->kvm); 459 if (type != event) 460 continue; 461 462 /* Increment this counter */ 463 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; 464 if (!kvm_pmc_is_64bit(pmc)) 465 reg = lower_32_bits(reg); 466 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; 467 468 /* No overflow? move on */ 469 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) 470 continue; 471 472 /* Mark overflow */ 473 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 474 475 if (kvm_pmu_counter_can_chain(pmc)) 476 kvm_pmu_counter_increment(vcpu, BIT(i + 1), 477 ARMV8_PMUV3_PERFCTR_CHAIN); 478 } 479 } 480 481 /* Compute the sample period for a given counter value */ 482 static u64 compute_period(struct kvm_pmc *pmc, u64 counter) 483 { 484 u64 val; 485 486 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) 487 val = (-counter) & GENMASK(63, 0); 488 else 489 val = (-counter) & GENMASK(31, 0); 490 491 return val; 492 } 493 494 /* 495 * When the perf event overflows, set the overflow status and inform the vcpu. 496 */ 497 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 498 struct perf_sample_data *data, 499 struct pt_regs *regs) 500 { 501 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 502 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 503 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 504 int idx = pmc->idx; 505 u64 period; 506 507 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 508 509 /* 510 * Reset the sample period to the architectural limit, 511 * i.e. the point where the counter overflows. 512 */ 513 period = compute_period(pmc, local64_read(&perf_event->count)); 514 515 local64_set(&perf_event->hw.period_left, 0); 516 perf_event->attr.sample_period = period; 517 perf_event->hw.sample_period = period; 518 519 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 520 521 if (kvm_pmu_counter_can_chain(pmc)) 522 kvm_pmu_counter_increment(vcpu, BIT(idx + 1), 523 ARMV8_PMUV3_PERFCTR_CHAIN); 524 525 if (kvm_pmu_overflow_status(vcpu)) { 526 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 527 528 if (!in_nmi()) 529 kvm_vcpu_kick(vcpu); 530 else 531 irq_work_queue(&vcpu->arch.pmu.overflow_work); 532 } 533 534 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 535 } 536 537 /** 538 * kvm_pmu_software_increment - do software increment 539 * @vcpu: The vcpu pointer 540 * @val: the value guest writes to PMSWINC register 541 */ 542 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 543 { 544 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); 545 } 546 547 /** 548 * kvm_pmu_handle_pmcr - handle PMCR register 549 * @vcpu: The vcpu pointer 550 * @val: the value guest writes to PMCR register 551 */ 552 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 553 { 554 int i; 555 556 if (!kvm_vcpu_has_pmu(vcpu)) 557 return; 558 559 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ 560 if (!kvm_has_feat(vcpu->kvm, ID_AA64DFR0_EL1, PMUVer, V3P5)) 561 val &= ~ARMV8_PMU_PMCR_LP; 562 563 /* The reset bits don't indicate any state, and shouldn't be saved. */ 564 __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); 565 566 if (val & ARMV8_PMU_PMCR_E) { 567 kvm_pmu_enable_counter_mask(vcpu, 568 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 569 } else { 570 kvm_pmu_disable_counter_mask(vcpu, 571 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 572 } 573 574 if (val & ARMV8_PMU_PMCR_C) 575 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 576 577 if (val & ARMV8_PMU_PMCR_P) { 578 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 579 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 580 for_each_set_bit(i, &mask, 32) 581 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); 582 } 583 kvm_vcpu_pmu_restore_guest(vcpu); 584 } 585 586 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) 587 { 588 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 589 return (kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E) && 590 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); 591 } 592 593 /** 594 * kvm_pmu_create_perf_event - create a perf event for a counter 595 * @pmc: Counter context 596 */ 597 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) 598 { 599 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 600 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 601 struct perf_event *event; 602 struct perf_event_attr attr; 603 u64 eventsel, reg, data; 604 bool p, u, nsk, nsu; 605 606 reg = counter_index_to_evtreg(pmc->idx); 607 data = __vcpu_sys_reg(vcpu, reg); 608 609 kvm_pmu_stop_counter(pmc); 610 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 611 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 612 else 613 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 614 615 /* 616 * Neither SW increment nor chained events need to be backed 617 * by a perf event. 618 */ 619 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || 620 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) 621 return; 622 623 /* 624 * If we have a filter in place and that the event isn't allowed, do 625 * not install a perf event either. 626 */ 627 if (vcpu->kvm->arch.pmu_filter && 628 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 629 return; 630 631 p = data & ARMV8_PMU_EXCLUDE_EL1; 632 u = data & ARMV8_PMU_EXCLUDE_EL0; 633 nsk = data & ARMV8_PMU_EXCLUDE_NS_EL1; 634 nsu = data & ARMV8_PMU_EXCLUDE_NS_EL0; 635 636 memset(&attr, 0, sizeof(struct perf_event_attr)); 637 attr.type = arm_pmu->pmu.type; 638 attr.size = sizeof(attr); 639 attr.pinned = 1; 640 attr.disabled = !kvm_pmu_counter_is_enabled(pmc); 641 attr.exclude_user = (u != nsu); 642 attr.exclude_kernel = (p != nsk); 643 attr.exclude_hv = 1; /* Don't count EL2 events */ 644 attr.exclude_host = 1; /* Don't count host events */ 645 attr.config = eventsel; 646 647 /* 648 * If counting with a 64bit counter, advertise it to the perf 649 * code, carefully dealing with the initial sample period 650 * which also depends on the overflow. 651 */ 652 if (kvm_pmc_is_64bit(pmc)) 653 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; 654 655 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); 656 657 event = perf_event_create_kernel_counter(&attr, -1, current, 658 kvm_pmu_perf_overflow, pmc); 659 660 if (IS_ERR(event)) { 661 pr_err_once("kvm: pmu event creation failed %ld\n", 662 PTR_ERR(event)); 663 return; 664 } 665 666 pmc->perf_event = event; 667 } 668 669 /** 670 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 671 * @vcpu: The vcpu pointer 672 * @data: The data guest writes to PMXEVTYPER_EL0 673 * @select_idx: The number of selected counter 674 * 675 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 676 * event with given hardware event number. Here we call perf_event API to 677 * emulate this action and create a kernel perf event for it. 678 */ 679 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 680 u64 select_idx) 681 { 682 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); 683 u64 reg; 684 685 if (!kvm_vcpu_has_pmu(vcpu)) 686 return; 687 688 reg = counter_index_to_evtreg(pmc->idx); 689 __vcpu_sys_reg(vcpu, reg) = data & kvm_pmu_evtyper_mask(vcpu->kvm); 690 691 kvm_pmu_create_perf_event(pmc); 692 } 693 694 void kvm_host_pmu_init(struct arm_pmu *pmu) 695 { 696 struct arm_pmu_entry *entry; 697 698 /* 699 * Check the sanitised PMU version for the system, as KVM does not 700 * support implementations where PMUv3 exists on a subset of CPUs. 701 */ 702 if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit())) 703 return; 704 705 mutex_lock(&arm_pmus_lock); 706 707 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 708 if (!entry) 709 goto out_unlock; 710 711 entry->arm_pmu = pmu; 712 list_add_tail(&entry->entry, &arm_pmus); 713 714 if (list_is_singular(&arm_pmus)) 715 static_branch_enable(&kvm_arm_pmu_available); 716 717 out_unlock: 718 mutex_unlock(&arm_pmus_lock); 719 } 720 721 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 722 { 723 struct arm_pmu *tmp, *pmu = NULL; 724 struct arm_pmu_entry *entry; 725 int cpu; 726 727 mutex_lock(&arm_pmus_lock); 728 729 /* 730 * It is safe to use a stale cpu to iterate the list of PMUs so long as 731 * the same value is used for the entirety of the loop. Given this, and 732 * the fact that no percpu data is used for the lookup there is no need 733 * to disable preemption. 734 * 735 * It is still necessary to get a valid cpu, though, to probe for the 736 * default PMU instance as userspace is not required to specify a PMU 737 * type. In order to uphold the preexisting behavior KVM selects the 738 * PMU instance for the core during vcpu init. A dependent use 739 * case would be a user with disdain of all things big.LITTLE that 740 * affines the VMM to a particular cluster of cores. 741 * 742 * In any case, userspace should just do the sane thing and use the UAPI 743 * to select a PMU type directly. But, be wary of the baggage being 744 * carried here. 745 */ 746 cpu = raw_smp_processor_id(); 747 list_for_each_entry(entry, &arm_pmus, entry) { 748 tmp = entry->arm_pmu; 749 750 if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { 751 pmu = tmp; 752 break; 753 } 754 } 755 756 mutex_unlock(&arm_pmus_lock); 757 758 return pmu; 759 } 760 761 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 762 { 763 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 764 u64 val, mask = 0; 765 int base, i, nr_events; 766 767 if (!kvm_vcpu_has_pmu(vcpu)) 768 return 0; 769 770 if (!pmceid1) { 771 val = read_sysreg(pmceid0_el0); 772 /* always support CHAIN */ 773 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); 774 base = 0; 775 } else { 776 val = read_sysreg(pmceid1_el0); 777 /* 778 * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled 779 * as RAZ 780 */ 781 val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) | 782 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) | 783 BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32)); 784 base = 32; 785 } 786 787 if (!bmap) 788 return val; 789 790 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 791 792 for (i = 0; i < 32; i += 8) { 793 u64 byte; 794 795 byte = bitmap_get_value8(bmap, base + i); 796 mask |= byte << i; 797 if (nr_events >= (0x4000 + base + 32)) { 798 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 799 mask |= byte << (32 + i); 800 } 801 } 802 803 return val & mask; 804 } 805 806 void kvm_vcpu_reload_pmu(struct kvm_vcpu *vcpu) 807 { 808 u64 mask = kvm_pmu_valid_counter_mask(vcpu); 809 810 kvm_pmu_handle_pmcr(vcpu, kvm_vcpu_read_pmcr(vcpu)); 811 812 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= mask; 813 __vcpu_sys_reg(vcpu, PMINTENSET_EL1) &= mask; 814 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= mask; 815 } 816 817 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 818 { 819 if (!kvm_vcpu_has_pmu(vcpu)) 820 return 0; 821 822 if (!vcpu->arch.pmu.created) 823 return -EINVAL; 824 825 /* 826 * A valid interrupt configuration for the PMU is either to have a 827 * properly configured interrupt number and using an in-kernel 828 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 829 */ 830 if (irqchip_in_kernel(vcpu->kvm)) { 831 int irq = vcpu->arch.pmu.irq_num; 832 /* 833 * If we are using an in-kernel vgic, at this point we know 834 * the vgic will be initialized, so we can check the PMU irq 835 * number against the dimensions of the vgic and make sure 836 * it's valid. 837 */ 838 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 839 return -EINVAL; 840 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 841 return -EINVAL; 842 } 843 844 /* One-off reload of the PMU on first run */ 845 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 846 847 return 0; 848 } 849 850 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 851 { 852 if (irqchip_in_kernel(vcpu->kvm)) { 853 int ret; 854 855 /* 856 * If using the PMU with an in-kernel virtual GIC 857 * implementation, we require the GIC to be already 858 * initialized when initializing the PMU. 859 */ 860 if (!vgic_initialized(vcpu->kvm)) 861 return -ENODEV; 862 863 if (!kvm_arm_pmu_irq_initialized(vcpu)) 864 return -ENXIO; 865 866 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 867 &vcpu->arch.pmu); 868 if (ret) 869 return ret; 870 } 871 872 init_irq_work(&vcpu->arch.pmu.overflow_work, 873 kvm_pmu_perf_overflow_notify_vcpu); 874 875 vcpu->arch.pmu.created = true; 876 return 0; 877 } 878 879 /* 880 * For one VM the interrupt type must be same for each vcpu. 881 * As a PPI, the interrupt number is the same for all vcpus, 882 * while as an SPI it must be a separate number per vcpu. 883 */ 884 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 885 { 886 unsigned long i; 887 struct kvm_vcpu *vcpu; 888 889 kvm_for_each_vcpu(i, vcpu, kvm) { 890 if (!kvm_arm_pmu_irq_initialized(vcpu)) 891 continue; 892 893 if (irq_is_ppi(irq)) { 894 if (vcpu->arch.pmu.irq_num != irq) 895 return false; 896 } else { 897 if (vcpu->arch.pmu.irq_num == irq) 898 return false; 899 } 900 } 901 902 return true; 903 } 904 905 /** 906 * kvm_arm_pmu_get_max_counters - Return the max number of PMU counters. 907 * @kvm: The kvm pointer 908 */ 909 u8 kvm_arm_pmu_get_max_counters(struct kvm *kvm) 910 { 911 struct arm_pmu *arm_pmu = kvm->arch.arm_pmu; 912 913 /* 914 * The arm_pmu->num_events considers the cycle counter as well. 915 * Ignore that and return only the general-purpose counters. 916 */ 917 return arm_pmu->num_events - 1; 918 } 919 920 static void kvm_arm_set_pmu(struct kvm *kvm, struct arm_pmu *arm_pmu) 921 { 922 lockdep_assert_held(&kvm->arch.config_lock); 923 924 kvm->arch.arm_pmu = arm_pmu; 925 kvm->arch.pmcr_n = kvm_arm_pmu_get_max_counters(kvm); 926 } 927 928 /** 929 * kvm_arm_set_default_pmu - No PMU set, get the default one. 930 * @kvm: The kvm pointer 931 * 932 * The observant among you will notice that the supported_cpus 933 * mask does not get updated for the default PMU even though it 934 * is quite possible the selected instance supports only a 935 * subset of cores in the system. This is intentional, and 936 * upholds the preexisting behavior on heterogeneous systems 937 * where vCPUs can be scheduled on any core but the guest 938 * counters could stop working. 939 */ 940 int kvm_arm_set_default_pmu(struct kvm *kvm) 941 { 942 struct arm_pmu *arm_pmu = kvm_pmu_probe_armpmu(); 943 944 if (!arm_pmu) 945 return -ENODEV; 946 947 kvm_arm_set_pmu(kvm, arm_pmu); 948 return 0; 949 } 950 951 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 952 { 953 struct kvm *kvm = vcpu->kvm; 954 struct arm_pmu_entry *entry; 955 struct arm_pmu *arm_pmu; 956 int ret = -ENXIO; 957 958 lockdep_assert_held(&kvm->arch.config_lock); 959 mutex_lock(&arm_pmus_lock); 960 961 list_for_each_entry(entry, &arm_pmus, entry) { 962 arm_pmu = entry->arm_pmu; 963 if (arm_pmu->pmu.type == pmu_id) { 964 if (kvm_vm_has_ran_once(kvm) || 965 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 966 ret = -EBUSY; 967 break; 968 } 969 970 kvm_arm_set_pmu(kvm, arm_pmu); 971 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 972 ret = 0; 973 break; 974 } 975 } 976 977 mutex_unlock(&arm_pmus_lock); 978 return ret; 979 } 980 981 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 982 { 983 struct kvm *kvm = vcpu->kvm; 984 985 lockdep_assert_held(&kvm->arch.config_lock); 986 987 if (!kvm_vcpu_has_pmu(vcpu)) 988 return -ENODEV; 989 990 if (vcpu->arch.pmu.created) 991 return -EBUSY; 992 993 switch (attr->attr) { 994 case KVM_ARM_VCPU_PMU_V3_IRQ: { 995 int __user *uaddr = (int __user *)(long)attr->addr; 996 int irq; 997 998 if (!irqchip_in_kernel(kvm)) 999 return -EINVAL; 1000 1001 if (get_user(irq, uaddr)) 1002 return -EFAULT; 1003 1004 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 1005 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 1006 return -EINVAL; 1007 1008 if (!pmu_irq_is_valid(kvm, irq)) 1009 return -EINVAL; 1010 1011 if (kvm_arm_pmu_irq_initialized(vcpu)) 1012 return -EBUSY; 1013 1014 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 1015 vcpu->arch.pmu.irq_num = irq; 1016 return 0; 1017 } 1018 case KVM_ARM_VCPU_PMU_V3_FILTER: { 1019 u8 pmuver = kvm_arm_pmu_get_pmuver_limit(); 1020 struct kvm_pmu_event_filter __user *uaddr; 1021 struct kvm_pmu_event_filter filter; 1022 int nr_events; 1023 1024 /* 1025 * Allow userspace to specify an event filter for the entire 1026 * event range supported by PMUVer of the hardware, rather 1027 * than the guest's PMUVer for KVM backward compatibility. 1028 */ 1029 nr_events = __kvm_pmu_event_mask(pmuver) + 1; 1030 1031 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 1032 1033 if (copy_from_user(&filter, uaddr, sizeof(filter))) 1034 return -EFAULT; 1035 1036 if (((u32)filter.base_event + filter.nevents) > nr_events || 1037 (filter.action != KVM_PMU_EVENT_ALLOW && 1038 filter.action != KVM_PMU_EVENT_DENY)) 1039 return -EINVAL; 1040 1041 if (kvm_vm_has_ran_once(kvm)) 1042 return -EBUSY; 1043 1044 if (!kvm->arch.pmu_filter) { 1045 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 1046 if (!kvm->arch.pmu_filter) 1047 return -ENOMEM; 1048 1049 /* 1050 * The default depends on the first applied filter. 1051 * If it allows events, the default is to deny. 1052 * Conversely, if the first filter denies a set of 1053 * events, the default is to allow. 1054 */ 1055 if (filter.action == KVM_PMU_EVENT_ALLOW) 1056 bitmap_zero(kvm->arch.pmu_filter, nr_events); 1057 else 1058 bitmap_fill(kvm->arch.pmu_filter, nr_events); 1059 } 1060 1061 if (filter.action == KVM_PMU_EVENT_ALLOW) 1062 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1063 else 1064 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 1065 1066 return 0; 1067 } 1068 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 1069 int __user *uaddr = (int __user *)(long)attr->addr; 1070 int pmu_id; 1071 1072 if (get_user(pmu_id, uaddr)) 1073 return -EFAULT; 1074 1075 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 1076 } 1077 case KVM_ARM_VCPU_PMU_V3_INIT: 1078 return kvm_arm_pmu_v3_init(vcpu); 1079 } 1080 1081 return -ENXIO; 1082 } 1083 1084 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1085 { 1086 switch (attr->attr) { 1087 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1088 int __user *uaddr = (int __user *)(long)attr->addr; 1089 int irq; 1090 1091 if (!irqchip_in_kernel(vcpu->kvm)) 1092 return -EINVAL; 1093 1094 if (!kvm_vcpu_has_pmu(vcpu)) 1095 return -ENODEV; 1096 1097 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1098 return -ENXIO; 1099 1100 irq = vcpu->arch.pmu.irq_num; 1101 return put_user(irq, uaddr); 1102 } 1103 } 1104 1105 return -ENXIO; 1106 } 1107 1108 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1109 { 1110 switch (attr->attr) { 1111 case KVM_ARM_VCPU_PMU_V3_IRQ: 1112 case KVM_ARM_VCPU_PMU_V3_INIT: 1113 case KVM_ARM_VCPU_PMU_V3_FILTER: 1114 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1115 if (kvm_vcpu_has_pmu(vcpu)) 1116 return 0; 1117 } 1118 1119 return -ENXIO; 1120 } 1121 1122 u8 kvm_arm_pmu_get_pmuver_limit(void) 1123 { 1124 u64 tmp; 1125 1126 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); 1127 tmp = cpuid_feature_cap_perfmon_field(tmp, 1128 ID_AA64DFR0_EL1_PMUVer_SHIFT, 1129 ID_AA64DFR0_EL1_PMUVer_V3P5); 1130 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); 1131 } 1132 1133 /** 1134 * kvm_vcpu_read_pmcr - Read PMCR_EL0 register for the vCPU 1135 * @vcpu: The vcpu pointer 1136 */ 1137 u64 kvm_vcpu_read_pmcr(struct kvm_vcpu *vcpu) 1138 { 1139 u64 pmcr = __vcpu_sys_reg(vcpu, PMCR_EL0); 1140 1141 return u64_replace_bits(pmcr, vcpu->kvm->arch.pmcr_n, ARMV8_PMU_PMCR_N); 1142 } 1143