1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/irqchip/arm-gic-v3.h> 4 #include <linux/irq.h> 5 #include <linux/irqdomain.h> 6 #include <linux/kstrtox.h> 7 #include <linux/kvm.h> 8 #include <linux/kvm_host.h> 9 #include <linux/string_choices.h> 10 #include <kvm/arm_vgic.h> 11 #include <asm/kvm_hyp.h> 12 #include <asm/kvm_mmu.h> 13 #include <asm/kvm_asm.h> 14 15 #include "vgic-mmio.h" 16 #include "vgic.h" 17 18 static bool group0_trap; 19 static bool group1_trap; 20 static bool common_trap; 21 static bool dir_trap; 22 static bool gicv4_enable; 23 24 void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, 25 struct ap_list_summary *als) 26 { 27 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; 28 29 if (!irqchip_in_kernel(vcpu->kvm)) 30 return; 31 32 cpuif->vgic_hcr = ICH_HCR_EL2_En; 33 34 if (irqs_pending_outside_lrs(als)) 35 cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE; 36 if (irqs_active_outside_lrs(als)) 37 cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE; 38 if (irqs_outside_lrs(als)) 39 cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; 40 41 if (!als->nr_sgi) 42 cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount; 43 44 cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG0_MASK) ? 45 ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE; 46 cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG1_MASK) ? 47 ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE; 48 49 /* 50 * Dealing with EOImode=1 is a massive source of headache. Not 51 * only do we need to track that we have active interrupts 52 * outside of the LRs and force DIR to be trapped, we also 53 * need to deal with SPIs that can be deactivated on another 54 * CPU. 55 * 56 * On systems that do not implement TDIR, force the bit in the 57 * shadow state anyway to avoid IPI-ing on these poor sods. 58 * 59 * Note that we set the trap irrespective of EOIMode, as that 60 * can change behind our back without any warning... 61 */ 62 if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) || 63 irqs_active_outside_lrs(als) || 64 atomic_read(&vcpu->kvm->arch.vgic.active_spis)) 65 cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR; 66 } 67 68 static bool lr_signals_eoi_mi(u64 lr_val) 69 { 70 return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && 71 !(lr_val & ICH_LR_HW); 72 } 73 74 static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val) 75 { 76 struct vgic_irq *irq; 77 bool is_v2_sgi = false; 78 bool deactivated; 79 u32 intid; 80 81 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { 82 intid = val & ICH_LR_VIRTUAL_ID_MASK; 83 } else { 84 intid = val & GICH_LR_VIRTUALID; 85 is_v2_sgi = vgic_irq_is_sgi(intid); 86 } 87 88 irq = vgic_get_vcpu_irq(vcpu, intid); 89 if (!irq) /* An LPI could have been unmapped. */ 90 return; 91 92 scoped_guard(raw_spinlock, &irq->irq_lock) { 93 /* Always preserve the active bit for !LPIs, note deactivation */ 94 if (irq->intid >= VGIC_MIN_LPI) 95 val &= ~ICH_LR_ACTIVE_BIT; 96 deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT); 97 irq->active = !!(val & ICH_LR_ACTIVE_BIT); 98 99 /* Edge is the only case where we preserve the pending bit */ 100 if (irq->config == VGIC_CONFIG_EDGE && 101 (val & ICH_LR_PENDING_BIT)) 102 irq->pending_latch = true; 103 104 /* 105 * Clear soft pending state when level irqs have been acked. 106 */ 107 if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) 108 irq->pending_latch = false; 109 110 if (is_v2_sgi) { 111 u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val); 112 113 if (irq->active) 114 irq->active_source = cpuid; 115 116 if (val & ICH_LR_PENDING_BIT) 117 irq->source |= BIT(cpuid); 118 } 119 120 /* Handle resampling for mapped interrupts if required */ 121 vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT); 122 123 irq->on_lr = false; 124 } 125 126 /* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */ 127 if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) { 128 kvm_notify_acked_irq(vcpu->kvm, 0, 129 intid - VGIC_NR_PRIVATE_IRQS); 130 atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis); 131 } 132 133 vgic_put_irq(vcpu->kvm, irq); 134 } 135 136 static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq); 137 138 static void vgic_v3_deactivate_phys(u32 intid) 139 { 140 if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY)) 141 gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI); 142 else 143 gic_write_dir(intid); 144 } 145 146 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) 147 { 148 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 149 struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 150 u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr); 151 struct vgic_irq *irq; 152 153 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 154 155 for (int lr = 0; lr < cpuif->used_lrs; lr++) 156 vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]); 157 158 /* 159 * EOIMode=0: use EOIcount to emulate deactivation. We are 160 * guaranteed to deactivate in reverse order of the activation, so 161 * just pick one active interrupt after the other in the ap_list, 162 * and replay the deactivation as if the CPU was doing it. We also 163 * rely on priority drop to have taken place, and the list to be 164 * sorted by priority. 165 */ 166 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 167 u64 lr; 168 169 /* 170 * I would have loved to write this using a scoped_guard(), 171 * but using 'continue' here is a total train wreck. 172 */ 173 if (!eoicount) { 174 break; 175 } else { 176 guard(raw_spinlock)(&irq->irq_lock); 177 178 if (!(likely(vgic_target_oracle(irq) == vcpu) && 179 irq->active)) 180 continue; 181 182 lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; 183 } 184 185 if (lr & ICH_LR_HW) 186 vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 187 188 vgic_v3_fold_lr(vcpu, lr); 189 eoicount--; 190 } 191 192 cpuif->used_lrs = 0; 193 } 194 195 void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val) 196 { 197 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 198 struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; 199 u32 model = vcpu->kvm->arch.vgic.vgic_model; 200 struct kvm_vcpu *target_vcpu = NULL; 201 bool mmio = false, is_v2_sgi; 202 struct vgic_irq *irq; 203 unsigned long flags; 204 u64 lr = 0; 205 u8 cpuid; 206 207 /* Snapshot CPUID, and remove it from the INTID */ 208 cpuid = FIELD_GET(GENMASK_ULL(12, 10), val); 209 val &= ~GENMASK_ULL(12, 10); 210 211 is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 && 212 val < VGIC_NR_SGIS); 213 214 /* 215 * We only deal with DIR when EOIMode==1, and only for SGI, 216 * PPI or SPI. 217 */ 218 if (!(cpuif->vgic_vmcr & ICH_VMCR_EL2_VEOIM_MASK) || 219 val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS) 220 return; 221 222 /* Make sure we're in the same context as LR handling */ 223 local_irq_save(flags); 224 225 irq = vgic_get_vcpu_irq(vcpu, val); 226 if (WARN_ON_ONCE(!irq)) 227 goto out; 228 229 /* 230 * EOIMode=1: we must rely on traps to handle deactivate of 231 * overflowing interrupts, as there is no ordering guarantee and 232 * EOIcount isn't being incremented. Priority drop will have taken 233 * place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs. 234 * 235 * Three possibities: 236 * 237 * - The irq is not queued on any CPU, and there is nothing to 238 * do, 239 * 240 * - Or the irq is in an LR, meaning that its state is not 241 * directly observable. Treat it bluntly by making it as if 242 * this was a write to GICD_ICACTIVER, which will force an 243 * exit on all vcpus. If it hurts, don't do that. 244 * 245 * - Or the irq is active, but not in an LR, and we can 246 * directly deactivate it by building a pseudo-LR, fold it, 247 * and queue a request to prune the resulting ap_list, 248 * 249 * Special care must be taken to match the source CPUID when 250 * deactivating a GICv2 SGI. 251 */ 252 scoped_guard(raw_spinlock, &irq->irq_lock) { 253 target_vcpu = irq->vcpu; 254 255 /* Not on any ap_list? */ 256 if (!target_vcpu) 257 goto put; 258 259 /* 260 * Urgh. We're deactivating something that we cannot 261 * observe yet... Big hammer time. 262 */ 263 if (irq->on_lr) { 264 mmio = true; 265 goto put; 266 } 267 268 /* GICv2 SGI: check that the cpuid matches */ 269 if (is_v2_sgi && irq->active_source != cpuid) { 270 target_vcpu = NULL; 271 goto put; 272 } 273 274 /* (with a Dalek voice) DEACTIVATE!!!! */ 275 lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT; 276 } 277 278 if (lr & ICH_LR_HW) 279 vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); 280 281 vgic_v3_fold_lr(vcpu, lr); 282 283 put: 284 vgic_put_irq(vcpu->kvm, irq); 285 286 out: 287 local_irq_restore(flags); 288 289 if (mmio) 290 vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32)); 291 292 /* Force the ap_list to be pruned */ 293 if (target_vcpu) 294 kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu); 295 } 296 297 /* Requires the irq to be locked already */ 298 static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq) 299 { 300 u32 model = vcpu->kvm->arch.vgic.vgic_model; 301 u64 val = irq->intid; 302 bool allow_pending = true, is_v2_sgi; 303 304 WARN_ON(irq->on_lr); 305 306 is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && 307 model == KVM_DEV_TYPE_ARM_VGIC_V2); 308 309 if (irq->active) { 310 val |= ICH_LR_ACTIVE_BIT; 311 if (is_v2_sgi) 312 val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; 313 if (vgic_irq_is_multi_sgi(irq)) { 314 allow_pending = false; 315 val |= ICH_LR_EOI; 316 } 317 } 318 319 if (irq->hw && !vgic_irq_needs_resampling(irq)) { 320 val |= ICH_LR_HW; 321 val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; 322 /* 323 * Never set pending+active on a HW interrupt, as the 324 * pending state is kept at the physical distributor 325 * level. 326 */ 327 if (irq->active) 328 allow_pending = false; 329 } else { 330 if (irq->config == VGIC_CONFIG_LEVEL) { 331 val |= ICH_LR_EOI; 332 333 /* 334 * Software resampling doesn't work very well 335 * if we allow P+A, so let's not do that. 336 */ 337 if (irq->active) 338 allow_pending = false; 339 } 340 } 341 342 if (allow_pending && irq_is_pending(irq)) { 343 val |= ICH_LR_PENDING_BIT; 344 345 if (is_v2_sgi) { 346 u32 src = ffs(irq->source); 347 348 if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", 349 irq->intid)) 350 return 0; 351 352 val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; 353 if (irq->source & ~BIT(src - 1)) 354 val |= ICH_LR_EOI; 355 } 356 } 357 358 if (irq->group) 359 val |= ICH_LR_GROUP; 360 361 val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; 362 363 return val; 364 } 365 366 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) 367 { 368 u32 model = vcpu->kvm->arch.vgic.vgic_model; 369 u64 val = vgic_v3_compute_lr(vcpu, irq); 370 371 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; 372 373 if (val & ICH_LR_PENDING_BIT) { 374 if (irq->config == VGIC_CONFIG_EDGE) 375 irq->pending_latch = false; 376 377 if (vgic_irq_is_sgi(irq->intid) && 378 model == KVM_DEV_TYPE_ARM_VGIC_V2) { 379 u32 src = ffs(irq->source); 380 381 irq->source &= ~BIT(src - 1); 382 if (irq->source) 383 irq->pending_latch = true; 384 } 385 } 386 387 /* 388 * Level-triggered mapped IRQs are special because we only observe 389 * rising edges as input to the VGIC. We therefore lower the line 390 * level here, so that we can take new virtual IRQs. See 391 * vgic_v3_fold_lr_state for more info. 392 */ 393 if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) 394 irq->line_level = false; 395 396 irq->on_lr = true; 397 } 398 399 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) 400 { 401 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; 402 } 403 404 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) 405 { 406 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 407 u32 model = vcpu->kvm->arch.vgic.vgic_model; 408 u32 vmcr; 409 410 if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { 411 vmcr = FIELD_PREP(ICH_VMCR_EL2_VAckCtl, vmcrp->ackctl); 412 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VFIQEn, vmcrp->fiqen); 413 } else { 414 /* 415 * When emulating GICv3 on GICv3 with SRE=1 on the 416 * VFIQEn bit is RES1 and the VAckCtl bit is RES0. 417 */ 418 vmcr = ICH_VMCR_EL2_VFIQEn_MASK; 419 } 420 421 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VCBPR, vmcrp->cbpr); 422 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VEOIM, vmcrp->eoim); 423 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR1, vmcrp->abpr); 424 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR0, vmcrp->bpr); 425 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VPMR, vmcrp->pmr); 426 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG0, vmcrp->grpen0); 427 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG1, vmcrp->grpen1); 428 429 cpu_if->vgic_vmcr = vmcr; 430 } 431 432 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) 433 { 434 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 435 u32 model = vcpu->kvm->arch.vgic.vgic_model; 436 u32 vmcr; 437 438 vmcr = cpu_if->vgic_vmcr; 439 440 if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { 441 vmcrp->ackctl = FIELD_GET(ICH_VMCR_EL2_VAckCtl, vmcr); 442 vmcrp->fiqen = FIELD_GET(ICH_VMCR_EL2_VFIQEn, vmcr); 443 } else { 444 /* 445 * When emulating GICv3 on GICv3 with SRE=1 on the 446 * VFIQEn bit is RES1 and the VAckCtl bit is RES0. 447 */ 448 vmcrp->fiqen = 1; 449 vmcrp->ackctl = 0; 450 } 451 452 vmcrp->cbpr = FIELD_GET(ICH_VMCR_EL2_VCBPR, vmcr); 453 vmcrp->eoim = FIELD_GET(ICH_VMCR_EL2_VEOIM, vmcr); 454 vmcrp->abpr = FIELD_GET(ICH_VMCR_EL2_VBPR1, vmcr); 455 vmcrp->bpr = FIELD_GET(ICH_VMCR_EL2_VBPR0, vmcr); 456 vmcrp->pmr = FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr); 457 vmcrp->grpen0 = FIELD_GET(ICH_VMCR_EL2_VENG0, vmcr); 458 vmcrp->grpen1 = FIELD_GET(ICH_VMCR_EL2_VENG1, vmcr); 459 } 460 461 #define INITIAL_PENDBASER_VALUE \ 462 (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ 463 GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ 464 GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) 465 466 void vgic_v3_reset(struct kvm_vcpu *vcpu) 467 { 468 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; 469 470 /* 471 * By forcing VMCR to zero, the GIC will restore the binary 472 * points to their reset values. Anything else resets to zero 473 * anyway. 474 */ 475 vgic_v3->vgic_vmcr = 0; 476 477 /* 478 * If we are emulating a GICv3, we do it in an non-GICv2-compatible 479 * way, so we force SRE to 1 to demonstrate this to the guest. 480 * Also, we don't support any form of IRQ/FIQ bypass. 481 * This goes with the spec allowing the value to be RAO/WI. 482 */ 483 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { 484 vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | 485 ICC_SRE_EL1_DFB | 486 ICC_SRE_EL1_SRE); 487 vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; 488 } else { 489 vgic_v3->vgic_sre = 0; 490 } 491 492 vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits, 493 kvm_vgic_global_state.ich_vtr_el2); 494 vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits, 495 kvm_vgic_global_state.ich_vtr_el2) + 1; 496 } 497 498 void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) 499 { 500 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; 501 502 if (!vgic_is_v3(vcpu->kvm)) 503 return; 504 505 /* Hide GICv3 sysreg if necessary */ 506 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || 507 !irqchip_in_kernel(vcpu->kvm)) 508 vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | 509 ICH_HCR_EL2_TC); 510 } 511 512 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) 513 { 514 struct kvm_vcpu *vcpu; 515 int byte_offset, bit_nr; 516 gpa_t pendbase, ptr; 517 bool status; 518 u8 val; 519 int ret; 520 unsigned long flags; 521 522 retry: 523 vcpu = irq->target_vcpu; 524 if (!vcpu) 525 return 0; 526 527 pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); 528 529 byte_offset = irq->intid / BITS_PER_BYTE; 530 bit_nr = irq->intid % BITS_PER_BYTE; 531 ptr = pendbase + byte_offset; 532 533 ret = kvm_read_guest_lock(kvm, ptr, &val, 1); 534 if (ret) 535 return ret; 536 537 status = val & (1 << bit_nr); 538 539 raw_spin_lock_irqsave(&irq->irq_lock, flags); 540 if (irq->target_vcpu != vcpu) { 541 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 542 goto retry; 543 } 544 irq->pending_latch = status; 545 vgic_queue_irq_unlock(vcpu->kvm, irq, flags); 546 547 if (status) { 548 /* clear consumed data */ 549 val &= ~(1 << bit_nr); 550 ret = vgic_write_guest_lock(kvm, ptr, &val, 1); 551 if (ret) 552 return ret; 553 } 554 return 0; 555 } 556 557 /* 558 * The deactivation of the doorbell interrupt will trigger the 559 * unmapping of the associated vPE. 560 */ 561 static void unmap_all_vpes(struct kvm *kvm) 562 { 563 struct vgic_dist *dist = &kvm->arch.vgic; 564 int i; 565 566 for (i = 0; i < dist->its_vm.nr_vpes; i++) 567 free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i)); 568 } 569 570 static void map_all_vpes(struct kvm *kvm) 571 { 572 struct vgic_dist *dist = &kvm->arch.vgic; 573 int i; 574 575 for (i = 0; i < dist->its_vm.nr_vpes; i++) 576 WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i), 577 dist->its_vm.vpes[i]->irq)); 578 } 579 580 /* 581 * vgic_v3_save_pending_tables - Save the pending tables into guest RAM 582 * kvm lock and all vcpu lock must be held 583 */ 584 int vgic_v3_save_pending_tables(struct kvm *kvm) 585 { 586 struct vgic_dist *dist = &kvm->arch.vgic; 587 struct vgic_irq *irq; 588 gpa_t last_ptr = ~(gpa_t)0; 589 bool vlpi_avail = false; 590 unsigned long index; 591 int ret = 0; 592 u8 val; 593 594 if (unlikely(!vgic_initialized(kvm))) 595 return -ENXIO; 596 597 /* 598 * A preparation for getting any VLPI states. 599 * The above vgic initialized check also ensures that the allocation 600 * and enabling of the doorbells have already been done. 601 */ 602 if (kvm_vgic_global_state.has_gicv4_1) { 603 unmap_all_vpes(kvm); 604 vlpi_avail = true; 605 } 606 607 xa_for_each(&dist->lpi_xa, index, irq) { 608 int byte_offset, bit_nr; 609 struct kvm_vcpu *vcpu; 610 gpa_t pendbase, ptr; 611 bool is_pending; 612 bool stored; 613 614 vcpu = irq->target_vcpu; 615 if (!vcpu) 616 continue; 617 618 pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); 619 620 byte_offset = irq->intid / BITS_PER_BYTE; 621 bit_nr = irq->intid % BITS_PER_BYTE; 622 ptr = pendbase + byte_offset; 623 624 if (ptr != last_ptr) { 625 ret = kvm_read_guest_lock(kvm, ptr, &val, 1); 626 if (ret) 627 goto out; 628 last_ptr = ptr; 629 } 630 631 stored = val & (1U << bit_nr); 632 633 is_pending = irq->pending_latch; 634 635 if (irq->hw && vlpi_avail) 636 vgic_v4_get_vlpi_state(irq, &is_pending); 637 638 if (stored == is_pending) 639 continue; 640 641 if (is_pending) 642 val |= 1 << bit_nr; 643 else 644 val &= ~(1 << bit_nr); 645 646 ret = vgic_write_guest_lock(kvm, ptr, &val, 1); 647 if (ret) 648 goto out; 649 } 650 651 out: 652 if (vlpi_avail) 653 map_all_vpes(kvm); 654 655 return ret; 656 } 657 658 /** 659 * vgic_v3_rdist_overlap - check if a region overlaps with any 660 * existing redistributor region 661 * 662 * @kvm: kvm handle 663 * @base: base of the region 664 * @size: size of region 665 * 666 * Return: true if there is an overlap 667 */ 668 bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) 669 { 670 struct vgic_dist *d = &kvm->arch.vgic; 671 struct vgic_redist_region *rdreg; 672 673 list_for_each_entry(rdreg, &d->rd_regions, list) { 674 if ((base + size > rdreg->base) && 675 (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) 676 return true; 677 } 678 return false; 679 } 680 681 /* 682 * Check for overlapping regions and for regions crossing the end of memory 683 * for base addresses which have already been set. 684 */ 685 bool vgic_v3_check_base(struct kvm *kvm) 686 { 687 struct vgic_dist *d = &kvm->arch.vgic; 688 struct vgic_redist_region *rdreg; 689 690 if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && 691 d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) 692 return false; 693 694 list_for_each_entry(rdreg, &d->rd_regions, list) { 695 size_t sz = vgic_v3_rd_region_size(kvm, rdreg); 696 697 if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF, 698 rdreg->base, SZ_64K, sz)) 699 return false; 700 } 701 702 if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) 703 return true; 704 705 return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, 706 KVM_VGIC_V3_DIST_SIZE); 707 } 708 709 /** 710 * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one 711 * which has free space to put a new rdist region. 712 * 713 * @rd_regions: redistributor region list head 714 * 715 * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). 716 * Stride between redistributors is 0 and regions are filled in the index order. 717 * 718 * Return: the redist region handle, if any, that has space to map a new rdist 719 * region. 720 */ 721 struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) 722 { 723 struct vgic_redist_region *rdreg; 724 725 list_for_each_entry(rdreg, rd_regions, list) { 726 if (!vgic_v3_redist_region_full(rdreg)) 727 return rdreg; 728 } 729 return NULL; 730 } 731 732 struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, 733 u32 index) 734 { 735 struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; 736 struct vgic_redist_region *rdreg; 737 738 list_for_each_entry(rdreg, rd_regions, list) { 739 if (rdreg->index == index) 740 return rdreg; 741 } 742 return NULL; 743 } 744 745 746 int vgic_v3_map_resources(struct kvm *kvm) 747 { 748 struct vgic_dist *dist = &kvm->arch.vgic; 749 struct kvm_vcpu *vcpu; 750 unsigned long c; 751 752 kvm_for_each_vcpu(c, vcpu, kvm) { 753 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 754 755 if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { 756 kvm_debug("vcpu %ld redistributor base not set\n", c); 757 return -ENXIO; 758 } 759 } 760 761 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { 762 kvm_debug("Need to set vgic distributor addresses first\n"); 763 return -ENXIO; 764 } 765 766 if (!vgic_v3_check_base(kvm)) { 767 kvm_debug("VGIC redist and dist frames overlap\n"); 768 return -EINVAL; 769 } 770 771 /* 772 * For a VGICv3 we require the userland to explicitly initialize 773 * the VGIC before we need to use it. 774 */ 775 if (!vgic_initialized(kvm)) { 776 return -EBUSY; 777 } 778 779 if (kvm_vgic_global_state.has_gicv4_1) 780 vgic_v4_configure_vsgis(kvm); 781 782 return 0; 783 } 784 785 DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); 786 DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat); 787 788 static int __init early_group0_trap_cfg(char *buf) 789 { 790 return kstrtobool(buf, &group0_trap); 791 } 792 early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); 793 794 static int __init early_group1_trap_cfg(char *buf) 795 { 796 return kstrtobool(buf, &group1_trap); 797 } 798 early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); 799 800 static int __init early_common_trap_cfg(char *buf) 801 { 802 return kstrtobool(buf, &common_trap); 803 } 804 early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); 805 806 static int __init early_gicv4_enable(char *buf) 807 { 808 return kstrtobool(buf, &gicv4_enable); 809 } 810 early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); 811 812 static const struct midr_range broken_seis[] = { 813 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), 814 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), 815 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO), 816 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO), 817 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX), 818 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX), 819 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), 820 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), 821 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), 822 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), 823 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), 824 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), 825 {}, 826 }; 827 828 static bool vgic_v3_broken_seis(void) 829 { 830 return (is_kernel_in_hyp_mode() && 831 is_midr_in_range_list(broken_seis) && 832 (read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS)); 833 } 834 835 void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt, 836 __le32 *origptr, __le32 *updptr, 837 int nr_inst) 838 { 839 u32 insn, oinsn, rd; 840 u64 hcr = 0; 841 842 if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) { 843 group0_trap = true; 844 group1_trap = true; 845 } 846 847 if (vgic_v3_broken_seis()) { 848 /* We know that these machines have ICH_HCR_EL2.TDIR */ 849 group0_trap = true; 850 group1_trap = true; 851 dir_trap = true; 852 } 853 854 if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR)) 855 common_trap = true; 856 857 if (group0_trap) 858 hcr |= ICH_HCR_EL2_TALL0; 859 if (group1_trap) 860 hcr |= ICH_HCR_EL2_TALL1; 861 if (common_trap) 862 hcr |= ICH_HCR_EL2_TC; 863 if (dir_trap) 864 hcr |= ICH_HCR_EL2_TDIR; 865 866 /* Compute target register */ 867 oinsn = le32_to_cpu(*origptr); 868 rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); 869 870 /* movz rd, #(val & 0xffff) */ 871 insn = aarch64_insn_gen_movewide(rd, 872 (u16)hcr, 873 0, 874 AARCH64_INSN_VARIANT_64BIT, 875 AARCH64_INSN_MOVEWIDE_ZERO); 876 *updptr = cpu_to_le32(insn); 877 } 878 879 void vgic_v3_enable_cpuif_traps(void) 880 { 881 u64 traps = vgic_ich_hcr_trap_bits(); 882 883 if (traps) { 884 kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", 885 (traps & ICH_HCR_EL2_TALL0) ? "G0" : "", 886 (traps & ICH_HCR_EL2_TALL1) ? "G1" : "", 887 (traps & ICH_HCR_EL2_TC) ? "C" : "", 888 (traps & ICH_HCR_EL2_TDIR) ? "D" : ""); 889 static_branch_enable(&vgic_v3_cpuif_trap); 890 } 891 } 892 893 /** 894 * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller 895 * @info: pointer to the GIC description 896 * 897 * Returns 0 if the VGICv3 has been probed successfully, returns an error code 898 * otherwise 899 */ 900 int vgic_v3_probe(const struct gic_kvm_info *info) 901 { 902 u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config); 903 bool has_v2; 904 int ret; 905 906 has_v2 = ich_vtr_el2 >> 63; 907 ich_vtr_el2 = (u32)ich_vtr_el2; 908 909 /* 910 * The ListRegs field is 5 bits, but there is an architectural 911 * maximum of 16 list registers. Just ignore bit 4... 912 */ 913 kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; 914 kvm_vgic_global_state.can_emulate_gicv2 = false; 915 kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; 916 917 /* GICv4 support? */ 918 if (info->has_v4) { 919 kvm_vgic_global_state.has_gicv4 = gicv4_enable; 920 kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable; 921 kvm_info("GICv4%s support %s\n", 922 kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", 923 str_enabled_disabled(gicv4_enable)); 924 } 925 926 kvm_vgic_global_state.vcpu_base = 0; 927 928 if (!info->vcpu.start) { 929 kvm_info("GICv3: no GICV resource entry\n"); 930 } else if (!has_v2) { 931 pr_warn(FW_BUG "CPU interface incapable of MMIO access\n"); 932 } else if (!PAGE_ALIGNED(info->vcpu.start)) { 933 pr_warn("GICV physical address 0x%llx not page aligned\n", 934 (unsigned long long)info->vcpu.start); 935 } else if (kvm_get_mode() != KVM_MODE_PROTECTED) { 936 kvm_vgic_global_state.vcpu_base = info->vcpu.start; 937 kvm_vgic_global_state.can_emulate_gicv2 = true; 938 ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); 939 if (ret) { 940 kvm_err("Cannot register GICv2 KVM device.\n"); 941 return ret; 942 } 943 kvm_info("vgic-v2@%llx\n", info->vcpu.start); 944 } 945 ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); 946 if (ret) { 947 kvm_err("Cannot register GICv3 KVM device.\n"); 948 kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); 949 return ret; 950 } 951 952 if (kvm_vgic_global_state.vcpu_base == 0) 953 kvm_info("disabling GICv2 emulation\n"); 954 955 /* 956 * Flip the static branch if the HW supports v2, even if we're 957 * not using it (such as in protected mode). 958 */ 959 if (has_v2) 960 static_branch_enable(&vgic_v3_has_v2_compat); 961 962 if (vgic_v3_broken_seis()) { 963 kvm_info("GICv3 with broken locally generated SEI\n"); 964 kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; 965 } 966 967 vgic_v3_enable_cpuif_traps(); 968 969 kvm_vgic_global_state.vctrl_base = NULL; 970 kvm_vgic_global_state.type = VGIC_V3; 971 kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; 972 973 return 0; 974 } 975 976 void vgic_v3_load(struct kvm_vcpu *vcpu) 977 { 978 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 979 980 /* If the vgic is nested, perform the full state loading */ 981 if (vgic_state_is_nested(vcpu)) { 982 vgic_v3_load_nested(vcpu); 983 return; 984 } 985 986 if (likely(!is_protected_kvm_enabled())) 987 kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); 988 989 if (has_vhe()) 990 __vgic_v3_activate_traps(cpu_if); 991 992 WARN_ON(vgic_v4_load(vcpu)); 993 } 994 995 void vgic_v3_put(struct kvm_vcpu *vcpu) 996 { 997 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; 998 999 if (vgic_state_is_nested(vcpu)) { 1000 vgic_v3_put_nested(vcpu); 1001 return; 1002 } 1003 1004 if (likely(!is_protected_kvm_enabled())) 1005 kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); 1006 WARN_ON(vgic_v4_put(vcpu)); 1007 1008 if (has_vhe()) 1009 __vgic_v3_deactivate_traps(cpu_if); 1010 } 1011