1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015 Linaro Ltd. 4 * Author: Shannon Zhao <shannon.zhao@linaro.org> 5 */ 6 7 #include <linux/cpu.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list.h> 11 #include <linux/perf_event.h> 12 #include <linux/perf/arm_pmu.h> 13 #include <linux/uaccess.h> 14 #include <asm/kvm_emulate.h> 15 #include <kvm/arm_pmu.h> 16 #include <kvm/arm_vgic.h> 17 18 #define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) 19 20 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); 21 22 static LIST_HEAD(arm_pmus); 23 static DEFINE_MUTEX(arm_pmus_lock); 24 25 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); 26 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); 27 28 static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) 29 { 30 return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); 31 } 32 33 static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) 34 { 35 return &vcpu->arch.pmu.pmc[cnt_idx]; 36 } 37 38 static u32 kvm_pmu_event_mask(struct kvm *kvm) 39 { 40 unsigned int pmuver; 41 42 pmuver = kvm->arch.arm_pmu->pmuver; 43 44 switch (pmuver) { 45 case ID_AA64DFR0_EL1_PMUVer_IMP: 46 return GENMASK(9, 0); 47 case ID_AA64DFR0_EL1_PMUVer_V3P1: 48 case ID_AA64DFR0_EL1_PMUVer_V3P4: 49 case ID_AA64DFR0_EL1_PMUVer_V3P5: 50 case ID_AA64DFR0_EL1_PMUVer_V3P7: 51 return GENMASK(15, 0); 52 default: /* Shouldn't be here, just for sanity */ 53 WARN_ONCE(1, "Unknown PMU version %d\n", pmuver); 54 return 0; 55 } 56 } 57 58 /** 59 * kvm_pmc_is_64bit - determine if counter is 64bit 60 * @pmc: counter context 61 */ 62 static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) 63 { 64 return (pmc->idx == ARMV8_PMU_CYCLE_IDX || 65 kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); 66 } 67 68 static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) 69 { 70 u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); 71 72 return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || 73 (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); 74 } 75 76 static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) 77 { 78 return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && 79 !kvm_pmc_has_64bit_overflow(pmc)); 80 } 81 82 static u32 counter_index_to_reg(u64 idx) 83 { 84 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; 85 } 86 87 static u32 counter_index_to_evtreg(u64 idx) 88 { 89 return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; 90 } 91 92 static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) 93 { 94 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 95 u64 counter, reg, enabled, running; 96 97 reg = counter_index_to_reg(pmc->idx); 98 counter = __vcpu_sys_reg(vcpu, reg); 99 100 /* 101 * The real counter value is equal to the value of counter register plus 102 * the value perf event counts. 103 */ 104 if (pmc->perf_event) 105 counter += perf_event_read_value(pmc->perf_event, &enabled, 106 &running); 107 108 if (!kvm_pmc_is_64bit(pmc)) 109 counter = lower_32_bits(counter); 110 111 return counter; 112 } 113 114 /** 115 * kvm_pmu_get_counter_value - get PMU counter value 116 * @vcpu: The vcpu pointer 117 * @select_idx: The counter index 118 */ 119 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) 120 { 121 if (!kvm_vcpu_has_pmu(vcpu)) 122 return 0; 123 124 return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); 125 } 126 127 static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) 128 { 129 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 130 u64 reg; 131 132 kvm_pmu_release_perf_event(pmc); 133 134 reg = counter_index_to_reg(pmc->idx); 135 136 if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && 137 !force) { 138 /* 139 * Even with PMUv3p5, AArch32 cannot write to the top 140 * 32bit of the counters. The only possible course of 141 * action is to use PMCR.P, which will reset them to 142 * 0 (the only use of the 'force' parameter). 143 */ 144 val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); 145 val |= lower_32_bits(val); 146 } 147 148 __vcpu_sys_reg(vcpu, reg) = val; 149 150 /* Recreate the perf event to reflect the updated sample_period */ 151 kvm_pmu_create_perf_event(pmc); 152 } 153 154 /** 155 * kvm_pmu_set_counter_value - set PMU counter value 156 * @vcpu: The vcpu pointer 157 * @select_idx: The counter index 158 * @val: The counter value 159 */ 160 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) 161 { 162 if (!kvm_vcpu_has_pmu(vcpu)) 163 return; 164 165 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); 166 } 167 168 /** 169 * kvm_pmu_release_perf_event - remove the perf event 170 * @pmc: The PMU counter pointer 171 */ 172 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) 173 { 174 if (pmc->perf_event) { 175 perf_event_disable(pmc->perf_event); 176 perf_event_release_kernel(pmc->perf_event); 177 pmc->perf_event = NULL; 178 } 179 } 180 181 /** 182 * kvm_pmu_stop_counter - stop PMU counter 183 * @pmc: The PMU counter pointer 184 * 185 * If this counter has been configured to monitor some event, release it here. 186 */ 187 static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) 188 { 189 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 190 u64 reg, val; 191 192 if (!pmc->perf_event) 193 return; 194 195 val = kvm_pmu_get_pmc_value(pmc); 196 197 reg = counter_index_to_reg(pmc->idx); 198 199 __vcpu_sys_reg(vcpu, reg) = val; 200 201 kvm_pmu_release_perf_event(pmc); 202 } 203 204 /** 205 * kvm_pmu_vcpu_init - assign pmu counter idx for cpu 206 * @vcpu: The vcpu pointer 207 * 208 */ 209 void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) 210 { 211 int i; 212 struct kvm_pmu *pmu = &vcpu->arch.pmu; 213 214 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 215 pmu->pmc[i].idx = i; 216 } 217 218 /** 219 * kvm_pmu_vcpu_reset - reset pmu state for cpu 220 * @vcpu: The vcpu pointer 221 * 222 */ 223 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) 224 { 225 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 226 int i; 227 228 for_each_set_bit(i, &mask, 32) 229 kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); 230 } 231 232 /** 233 * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu 234 * @vcpu: The vcpu pointer 235 * 236 */ 237 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) 238 { 239 int i; 240 241 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) 242 kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); 243 irq_work_sync(&vcpu->arch.pmu.overflow_work); 244 } 245 246 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) 247 { 248 u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; 249 250 val &= ARMV8_PMU_PMCR_N_MASK; 251 if (val == 0) 252 return BIT(ARMV8_PMU_CYCLE_IDX); 253 else 254 return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); 255 } 256 257 /** 258 * kvm_pmu_enable_counter_mask - enable selected PMU counters 259 * @vcpu: The vcpu pointer 260 * @val: the value guest writes to PMCNTENSET register 261 * 262 * Call perf_event_enable to start counting the perf event 263 */ 264 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 265 { 266 int i; 267 if (!kvm_vcpu_has_pmu(vcpu)) 268 return; 269 270 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) 271 return; 272 273 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 274 struct kvm_pmc *pmc; 275 276 if (!(val & BIT(i))) 277 continue; 278 279 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 280 281 if (!pmc->perf_event) { 282 kvm_pmu_create_perf_event(pmc); 283 } else { 284 perf_event_enable(pmc->perf_event); 285 if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) 286 kvm_debug("fail to enable perf event\n"); 287 } 288 } 289 } 290 291 /** 292 * kvm_pmu_disable_counter_mask - disable selected PMU counters 293 * @vcpu: The vcpu pointer 294 * @val: the value guest writes to PMCNTENCLR register 295 * 296 * Call perf_event_disable to stop counting the perf event 297 */ 298 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) 299 { 300 int i; 301 302 if (!kvm_vcpu_has_pmu(vcpu) || !val) 303 return; 304 305 for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { 306 struct kvm_pmc *pmc; 307 308 if (!(val & BIT(i))) 309 continue; 310 311 pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 312 313 if (pmc->perf_event) 314 perf_event_disable(pmc->perf_event); 315 } 316 } 317 318 static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) 319 { 320 u64 reg = 0; 321 322 if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { 323 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); 324 reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 325 reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); 326 } 327 328 return reg; 329 } 330 331 static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) 332 { 333 struct kvm_pmu *pmu = &vcpu->arch.pmu; 334 bool overflow; 335 336 if (!kvm_vcpu_has_pmu(vcpu)) 337 return; 338 339 overflow = !!kvm_pmu_overflow_status(vcpu); 340 if (pmu->irq_level == overflow) 341 return; 342 343 pmu->irq_level = overflow; 344 345 if (likely(irqchip_in_kernel(vcpu->kvm))) { 346 int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, 347 pmu->irq_num, overflow, pmu); 348 WARN_ON(ret); 349 } 350 } 351 352 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) 353 { 354 struct kvm_pmu *pmu = &vcpu->arch.pmu; 355 struct kvm_sync_regs *sregs = &vcpu->run->s.regs; 356 bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; 357 358 if (likely(irqchip_in_kernel(vcpu->kvm))) 359 return false; 360 361 return pmu->irq_level != run_level; 362 } 363 364 /* 365 * Reflect the PMU overflow interrupt output level into the kvm_run structure 366 */ 367 void kvm_pmu_update_run(struct kvm_vcpu *vcpu) 368 { 369 struct kvm_sync_regs *regs = &vcpu->run->s.regs; 370 371 /* Populate the timer bitmap for user space */ 372 regs->device_irq_level &= ~KVM_ARM_DEV_PMU; 373 if (vcpu->arch.pmu.irq_level) 374 regs->device_irq_level |= KVM_ARM_DEV_PMU; 375 } 376 377 /** 378 * kvm_pmu_flush_hwstate - flush pmu state to cpu 379 * @vcpu: The vcpu pointer 380 * 381 * Check if the PMU has overflowed while we were running in the host, and inject 382 * an interrupt if that was the case. 383 */ 384 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) 385 { 386 kvm_pmu_update_state(vcpu); 387 } 388 389 /** 390 * kvm_pmu_sync_hwstate - sync pmu state from cpu 391 * @vcpu: The vcpu pointer 392 * 393 * Check if the PMU has overflowed while we were running in the guest, and 394 * inject an interrupt if that was the case. 395 */ 396 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) 397 { 398 kvm_pmu_update_state(vcpu); 399 } 400 401 /** 402 * When perf interrupt is an NMI, we cannot safely notify the vcpu corresponding 403 * to the event. 404 * This is why we need a callback to do it once outside of the NMI context. 405 */ 406 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) 407 { 408 struct kvm_vcpu *vcpu; 409 410 vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); 411 kvm_vcpu_kick(vcpu); 412 } 413 414 /* 415 * Perform an increment on any of the counters described in @mask, 416 * generating the overflow if required, and propagate it as a chained 417 * event if possible. 418 */ 419 static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, 420 unsigned long mask, u32 event) 421 { 422 int i; 423 424 if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) 425 return; 426 427 /* Weed out disabled counters */ 428 mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); 429 430 for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { 431 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); 432 u64 type, reg; 433 434 /* Filter on event type */ 435 type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); 436 type &= kvm_pmu_event_mask(vcpu->kvm); 437 if (type != event) 438 continue; 439 440 /* Increment this counter */ 441 reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; 442 if (!kvm_pmc_is_64bit(pmc)) 443 reg = lower_32_bits(reg); 444 __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; 445 446 /* No overflow? move on */ 447 if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) 448 continue; 449 450 /* Mark overflow */ 451 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); 452 453 if (kvm_pmu_counter_can_chain(pmc)) 454 kvm_pmu_counter_increment(vcpu, BIT(i + 1), 455 ARMV8_PMUV3_PERFCTR_CHAIN); 456 } 457 } 458 459 /* Compute the sample period for a given counter value */ 460 static u64 compute_period(struct kvm_pmc *pmc, u64 counter) 461 { 462 u64 val; 463 464 if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) 465 val = (-counter) & GENMASK(63, 0); 466 else 467 val = (-counter) & GENMASK(31, 0); 468 469 return val; 470 } 471 472 /** 473 * When the perf event overflows, set the overflow status and inform the vcpu. 474 */ 475 static void kvm_pmu_perf_overflow(struct perf_event *perf_event, 476 struct perf_sample_data *data, 477 struct pt_regs *regs) 478 { 479 struct kvm_pmc *pmc = perf_event->overflow_handler_context; 480 struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); 481 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 482 int idx = pmc->idx; 483 u64 period; 484 485 cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); 486 487 /* 488 * Reset the sample period to the architectural limit, 489 * i.e. the point where the counter overflows. 490 */ 491 period = compute_period(pmc, local64_read(&perf_event->count)); 492 493 local64_set(&perf_event->hw.period_left, 0); 494 perf_event->attr.sample_period = period; 495 perf_event->hw.sample_period = period; 496 497 __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); 498 499 if (kvm_pmu_counter_can_chain(pmc)) 500 kvm_pmu_counter_increment(vcpu, BIT(idx + 1), 501 ARMV8_PMUV3_PERFCTR_CHAIN); 502 503 if (kvm_pmu_overflow_status(vcpu)) { 504 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 505 506 if (!in_nmi()) 507 kvm_vcpu_kick(vcpu); 508 else 509 irq_work_queue(&vcpu->arch.pmu.overflow_work); 510 } 511 512 cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); 513 } 514 515 /** 516 * kvm_pmu_software_increment - do software increment 517 * @vcpu: The vcpu pointer 518 * @val: the value guest writes to PMSWINC register 519 */ 520 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) 521 { 522 kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); 523 } 524 525 /** 526 * kvm_pmu_handle_pmcr - handle PMCR register 527 * @vcpu: The vcpu pointer 528 * @val: the value guest writes to PMCR register 529 */ 530 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) 531 { 532 int i; 533 534 if (!kvm_vcpu_has_pmu(vcpu)) 535 return; 536 537 /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ 538 if (!kvm_pmu_is_3p5(vcpu)) 539 val &= ~ARMV8_PMU_PMCR_LP; 540 541 /* The reset bits don't indicate any state, and shouldn't be saved. */ 542 __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P); 543 544 if (val & ARMV8_PMU_PMCR_E) { 545 kvm_pmu_enable_counter_mask(vcpu, 546 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 547 } else { 548 kvm_pmu_disable_counter_mask(vcpu, 549 __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); 550 } 551 552 if (val & ARMV8_PMU_PMCR_C) 553 kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); 554 555 if (val & ARMV8_PMU_PMCR_P) { 556 unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); 557 mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); 558 for_each_set_bit(i, &mask, 32) 559 kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); 560 } 561 kvm_vcpu_pmu_restore_guest(vcpu); 562 } 563 564 static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) 565 { 566 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 567 return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && 568 (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); 569 } 570 571 /** 572 * kvm_pmu_create_perf_event - create a perf event for a counter 573 * @pmc: Counter context 574 */ 575 static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) 576 { 577 struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); 578 struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; 579 struct perf_event *event; 580 struct perf_event_attr attr; 581 u64 eventsel, reg, data; 582 583 reg = counter_index_to_evtreg(pmc->idx); 584 data = __vcpu_sys_reg(vcpu, reg); 585 586 kvm_pmu_stop_counter(pmc); 587 if (pmc->idx == ARMV8_PMU_CYCLE_IDX) 588 eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 589 else 590 eventsel = data & kvm_pmu_event_mask(vcpu->kvm); 591 592 /* 593 * Neither SW increment nor chained events need to be backed 594 * by a perf event. 595 */ 596 if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || 597 eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) 598 return; 599 600 /* 601 * If we have a filter in place and that the event isn't allowed, do 602 * not install a perf event either. 603 */ 604 if (vcpu->kvm->arch.pmu_filter && 605 !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) 606 return; 607 608 memset(&attr, 0, sizeof(struct perf_event_attr)); 609 attr.type = arm_pmu->pmu.type; 610 attr.size = sizeof(attr); 611 attr.pinned = 1; 612 attr.disabled = !kvm_pmu_counter_is_enabled(pmc); 613 attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; 614 attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; 615 attr.exclude_hv = 1; /* Don't count EL2 events */ 616 attr.exclude_host = 1; /* Don't count host events */ 617 attr.config = eventsel; 618 619 /* 620 * If counting with a 64bit counter, advertise it to the perf 621 * code, carefully dealing with the initial sample period 622 * which also depends on the overflow. 623 */ 624 if (kvm_pmc_is_64bit(pmc)) 625 attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; 626 627 attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); 628 629 event = perf_event_create_kernel_counter(&attr, -1, current, 630 kvm_pmu_perf_overflow, pmc); 631 632 if (IS_ERR(event)) { 633 pr_err_once("kvm: pmu event creation failed %ld\n", 634 PTR_ERR(event)); 635 return; 636 } 637 638 pmc->perf_event = event; 639 } 640 641 /** 642 * kvm_pmu_set_counter_event_type - set selected counter to monitor some event 643 * @vcpu: The vcpu pointer 644 * @data: The data guest writes to PMXEVTYPER_EL0 645 * @select_idx: The number of selected counter 646 * 647 * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an 648 * event with given hardware event number. Here we call perf_event API to 649 * emulate this action and create a kernel perf event for it. 650 */ 651 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, 652 u64 select_idx) 653 { 654 struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); 655 u64 reg, mask; 656 657 if (!kvm_vcpu_has_pmu(vcpu)) 658 return; 659 660 mask = ARMV8_PMU_EVTYPE_MASK; 661 mask &= ~ARMV8_PMU_EVTYPE_EVENT; 662 mask |= kvm_pmu_event_mask(vcpu->kvm); 663 664 reg = counter_index_to_evtreg(pmc->idx); 665 666 __vcpu_sys_reg(vcpu, reg) = data & mask; 667 668 kvm_pmu_create_perf_event(pmc); 669 } 670 671 void kvm_host_pmu_init(struct arm_pmu *pmu) 672 { 673 struct arm_pmu_entry *entry; 674 675 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 676 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 677 return; 678 679 mutex_lock(&arm_pmus_lock); 680 681 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 682 if (!entry) 683 goto out_unlock; 684 685 entry->arm_pmu = pmu; 686 list_add_tail(&entry->entry, &arm_pmus); 687 688 if (list_is_singular(&arm_pmus)) 689 static_branch_enable(&kvm_arm_pmu_available); 690 691 out_unlock: 692 mutex_unlock(&arm_pmus_lock); 693 } 694 695 static struct arm_pmu *kvm_pmu_probe_armpmu(void) 696 { 697 struct perf_event_attr attr = { }; 698 struct perf_event *event; 699 struct arm_pmu *pmu = NULL; 700 701 /* 702 * Create a dummy event that only counts user cycles. As we'll never 703 * leave this function with the event being live, it will never 704 * count anything. But it allows us to probe some of the PMU 705 * details. Yes, this is terrible. 706 */ 707 attr.type = PERF_TYPE_RAW; 708 attr.size = sizeof(attr); 709 attr.pinned = 1; 710 attr.disabled = 0; 711 attr.exclude_user = 0; 712 attr.exclude_kernel = 1; 713 attr.exclude_hv = 1; 714 attr.exclude_host = 1; 715 attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; 716 attr.sample_period = GENMASK(63, 0); 717 718 event = perf_event_create_kernel_counter(&attr, -1, current, 719 kvm_pmu_perf_overflow, &attr); 720 721 if (IS_ERR(event)) { 722 pr_err_once("kvm: pmu event creation failed %ld\n", 723 PTR_ERR(event)); 724 return NULL; 725 } 726 727 if (event->pmu) { 728 pmu = to_arm_pmu(event->pmu); 729 if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || 730 pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) 731 pmu = NULL; 732 } 733 734 perf_event_disable(event); 735 perf_event_release_kernel(event); 736 737 return pmu; 738 } 739 740 u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) 741 { 742 unsigned long *bmap = vcpu->kvm->arch.pmu_filter; 743 u64 val, mask = 0; 744 int base, i, nr_events; 745 746 if (!kvm_vcpu_has_pmu(vcpu)) 747 return 0; 748 749 if (!pmceid1) { 750 val = read_sysreg(pmceid0_el0); 751 /* always support CHAIN */ 752 val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); 753 base = 0; 754 } else { 755 val = read_sysreg(pmceid1_el0); 756 /* 757 * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled 758 * as RAZ 759 */ 760 if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4) 761 val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32); 762 base = 32; 763 } 764 765 if (!bmap) 766 return val; 767 768 nr_events = kvm_pmu_event_mask(vcpu->kvm) + 1; 769 770 for (i = 0; i < 32; i += 8) { 771 u64 byte; 772 773 byte = bitmap_get_value8(bmap, base + i); 774 mask |= byte << i; 775 if (nr_events >= (0x4000 + base + 32)) { 776 byte = bitmap_get_value8(bmap, 0x4000 + base + i); 777 mask |= byte << (32 + i); 778 } 779 } 780 781 return val & mask; 782 } 783 784 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) 785 { 786 if (!kvm_vcpu_has_pmu(vcpu)) 787 return 0; 788 789 if (!vcpu->arch.pmu.created) 790 return -EINVAL; 791 792 /* 793 * A valid interrupt configuration for the PMU is either to have a 794 * properly configured interrupt number and using an in-kernel 795 * irqchip, or to not have an in-kernel GIC and not set an IRQ. 796 */ 797 if (irqchip_in_kernel(vcpu->kvm)) { 798 int irq = vcpu->arch.pmu.irq_num; 799 /* 800 * If we are using an in-kernel vgic, at this point we know 801 * the vgic will be initialized, so we can check the PMU irq 802 * number against the dimensions of the vgic and make sure 803 * it's valid. 804 */ 805 if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) 806 return -EINVAL; 807 } else if (kvm_arm_pmu_irq_initialized(vcpu)) { 808 return -EINVAL; 809 } 810 811 /* One-off reload of the PMU on first run */ 812 kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); 813 814 return 0; 815 } 816 817 static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) 818 { 819 if (irqchip_in_kernel(vcpu->kvm)) { 820 int ret; 821 822 /* 823 * If using the PMU with an in-kernel virtual GIC 824 * implementation, we require the GIC to be already 825 * initialized when initializing the PMU. 826 */ 827 if (!vgic_initialized(vcpu->kvm)) 828 return -ENODEV; 829 830 if (!kvm_arm_pmu_irq_initialized(vcpu)) 831 return -ENXIO; 832 833 ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, 834 &vcpu->arch.pmu); 835 if (ret) 836 return ret; 837 } 838 839 init_irq_work(&vcpu->arch.pmu.overflow_work, 840 kvm_pmu_perf_overflow_notify_vcpu); 841 842 vcpu->arch.pmu.created = true; 843 return 0; 844 } 845 846 /* 847 * For one VM the interrupt type must be same for each vcpu. 848 * As a PPI, the interrupt number is the same for all vcpus, 849 * while as an SPI it must be a separate number per vcpu. 850 */ 851 static bool pmu_irq_is_valid(struct kvm *kvm, int irq) 852 { 853 unsigned long i; 854 struct kvm_vcpu *vcpu; 855 856 kvm_for_each_vcpu(i, vcpu, kvm) { 857 if (!kvm_arm_pmu_irq_initialized(vcpu)) 858 continue; 859 860 if (irq_is_ppi(irq)) { 861 if (vcpu->arch.pmu.irq_num != irq) 862 return false; 863 } else { 864 if (vcpu->arch.pmu.irq_num == irq) 865 return false; 866 } 867 } 868 869 return true; 870 } 871 872 static int kvm_arm_pmu_v3_set_pmu(struct kvm_vcpu *vcpu, int pmu_id) 873 { 874 struct kvm *kvm = vcpu->kvm; 875 struct arm_pmu_entry *entry; 876 struct arm_pmu *arm_pmu; 877 int ret = -ENXIO; 878 879 lockdep_assert_held(&kvm->arch.config_lock); 880 mutex_lock(&arm_pmus_lock); 881 882 list_for_each_entry(entry, &arm_pmus, entry) { 883 arm_pmu = entry->arm_pmu; 884 if (arm_pmu->pmu.type == pmu_id) { 885 if (kvm_vm_has_ran_once(kvm) || 886 (kvm->arch.pmu_filter && kvm->arch.arm_pmu != arm_pmu)) { 887 ret = -EBUSY; 888 break; 889 } 890 891 kvm->arch.arm_pmu = arm_pmu; 892 cpumask_copy(kvm->arch.supported_cpus, &arm_pmu->supported_cpus); 893 ret = 0; 894 break; 895 } 896 } 897 898 mutex_unlock(&arm_pmus_lock); 899 return ret; 900 } 901 902 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 903 { 904 struct kvm *kvm = vcpu->kvm; 905 906 lockdep_assert_held(&kvm->arch.config_lock); 907 908 if (!kvm_vcpu_has_pmu(vcpu)) 909 return -ENODEV; 910 911 if (vcpu->arch.pmu.created) 912 return -EBUSY; 913 914 if (!kvm->arch.arm_pmu) { 915 /* No PMU set, get the default one */ 916 kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); 917 if (!kvm->arch.arm_pmu) 918 return -ENODEV; 919 } 920 921 switch (attr->attr) { 922 case KVM_ARM_VCPU_PMU_V3_IRQ: { 923 int __user *uaddr = (int __user *)(long)attr->addr; 924 int irq; 925 926 if (!irqchip_in_kernel(kvm)) 927 return -EINVAL; 928 929 if (get_user(irq, uaddr)) 930 return -EFAULT; 931 932 /* The PMU overflow interrupt can be a PPI or a valid SPI. */ 933 if (!(irq_is_ppi(irq) || irq_is_spi(irq))) 934 return -EINVAL; 935 936 if (!pmu_irq_is_valid(kvm, irq)) 937 return -EINVAL; 938 939 if (kvm_arm_pmu_irq_initialized(vcpu)) 940 return -EBUSY; 941 942 kvm_debug("Set kvm ARM PMU irq: %d\n", irq); 943 vcpu->arch.pmu.irq_num = irq; 944 return 0; 945 } 946 case KVM_ARM_VCPU_PMU_V3_FILTER: { 947 struct kvm_pmu_event_filter __user *uaddr; 948 struct kvm_pmu_event_filter filter; 949 int nr_events; 950 951 nr_events = kvm_pmu_event_mask(kvm) + 1; 952 953 uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr; 954 955 if (copy_from_user(&filter, uaddr, sizeof(filter))) 956 return -EFAULT; 957 958 if (((u32)filter.base_event + filter.nevents) > nr_events || 959 (filter.action != KVM_PMU_EVENT_ALLOW && 960 filter.action != KVM_PMU_EVENT_DENY)) 961 return -EINVAL; 962 963 if (kvm_vm_has_ran_once(kvm)) 964 return -EBUSY; 965 966 if (!kvm->arch.pmu_filter) { 967 kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); 968 if (!kvm->arch.pmu_filter) 969 return -ENOMEM; 970 971 /* 972 * The default depends on the first applied filter. 973 * If it allows events, the default is to deny. 974 * Conversely, if the first filter denies a set of 975 * events, the default is to allow. 976 */ 977 if (filter.action == KVM_PMU_EVENT_ALLOW) 978 bitmap_zero(kvm->arch.pmu_filter, nr_events); 979 else 980 bitmap_fill(kvm->arch.pmu_filter, nr_events); 981 } 982 983 if (filter.action == KVM_PMU_EVENT_ALLOW) 984 bitmap_set(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 985 else 986 bitmap_clear(kvm->arch.pmu_filter, filter.base_event, filter.nevents); 987 988 return 0; 989 } 990 case KVM_ARM_VCPU_PMU_V3_SET_PMU: { 991 int __user *uaddr = (int __user *)(long)attr->addr; 992 int pmu_id; 993 994 if (get_user(pmu_id, uaddr)) 995 return -EFAULT; 996 997 return kvm_arm_pmu_v3_set_pmu(vcpu, pmu_id); 998 } 999 case KVM_ARM_VCPU_PMU_V3_INIT: 1000 return kvm_arm_pmu_v3_init(vcpu); 1001 } 1002 1003 return -ENXIO; 1004 } 1005 1006 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1007 { 1008 switch (attr->attr) { 1009 case KVM_ARM_VCPU_PMU_V3_IRQ: { 1010 int __user *uaddr = (int __user *)(long)attr->addr; 1011 int irq; 1012 1013 if (!irqchip_in_kernel(vcpu->kvm)) 1014 return -EINVAL; 1015 1016 if (!kvm_vcpu_has_pmu(vcpu)) 1017 return -ENODEV; 1018 1019 if (!kvm_arm_pmu_irq_initialized(vcpu)) 1020 return -ENXIO; 1021 1022 irq = vcpu->arch.pmu.irq_num; 1023 return put_user(irq, uaddr); 1024 } 1025 } 1026 1027 return -ENXIO; 1028 } 1029 1030 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) 1031 { 1032 switch (attr->attr) { 1033 case KVM_ARM_VCPU_PMU_V3_IRQ: 1034 case KVM_ARM_VCPU_PMU_V3_INIT: 1035 case KVM_ARM_VCPU_PMU_V3_FILTER: 1036 case KVM_ARM_VCPU_PMU_V3_SET_PMU: 1037 if (kvm_vcpu_has_pmu(vcpu)) 1038 return 0; 1039 } 1040 1041 return -ENXIO; 1042 } 1043 1044 u8 kvm_arm_pmu_get_pmuver_limit(void) 1045 { 1046 u64 tmp; 1047 1048 tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); 1049 tmp = cpuid_feature_cap_perfmon_field(tmp, 1050 ID_AA64DFR0_EL1_PMUVer_SHIFT, 1051 ID_AA64DFR0_EL1_PMUVer_V3P5); 1052 return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); 1053 } 1054