1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015, 2016 ARM Ltd. 4 */ 5 6 #include <linux/interrupt.h> 7 #include <linux/irq.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list_sort.h> 11 #include <linux/nospec.h> 12 13 #include <asm/kvm_hyp.h> 14 15 #include "vgic.h" 16 17 #define CREATE_TRACE_POINTS 18 #include "trace.h" 19 20 struct vgic_global kvm_vgic_global_state __ro_after_init = { 21 .gicv3_cpuif = STATIC_KEY_FALSE_INIT, 22 }; 23 24 /* 25 * Locking order is always: 26 * kvm->lock (mutex) 27 * vcpu->mutex (mutex) 28 * kvm->arch.config_lock (mutex) 29 * its->cmd_lock (mutex) 30 * its->its_lock (mutex) 31 * vgic_dist->lpi_xa.xa_lock 32 * vgic_cpu->ap_list_lock must be taken with IRQs disabled 33 * vgic_irq->irq_lock must be taken with IRQs disabled 34 * 35 * As the ap_list_lock might be taken from the timer interrupt handler, 36 * we have to disable IRQs before taking this lock and everything lower 37 * than it. 38 * 39 * The config_lock has additional ordering requirements: 40 * kvm->slots_lock 41 * kvm->srcu 42 * kvm->arch.config_lock 43 * 44 * If you need to take multiple locks, always take the upper lock first, 45 * then the lower ones, e.g. first take the its_lock, then the irq_lock. 46 * If you are already holding a lock and need to take a higher one, you 47 * have to drop the lower ranking lock first and re-acquire it after having 48 * taken the upper one. 49 * 50 * When taking more than one ap_list_lock at the same time, always take the 51 * lowest numbered VCPU's ap_list_lock first, so: 52 * vcpuX->vcpu_id < vcpuY->vcpu_id: 53 * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); 54 * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); 55 * 56 * Since the VGIC must support injecting virtual interrupts from ISRs, we have 57 * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer 58 * spinlocks for any lock that may be taken while injecting an interrupt. 59 */ 60 61 /* 62 * Index the VM's xarray of mapped LPIs and return a reference to the IRQ 63 * structure. The caller is expected to call vgic_put_irq() later once it's 64 * finished with the IRQ. 65 */ 66 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) 67 { 68 struct vgic_dist *dist = &kvm->arch.vgic; 69 struct vgic_irq *irq = NULL; 70 71 rcu_read_lock(); 72 73 irq = xa_load(&dist->lpi_xa, intid); 74 if (!vgic_try_get_irq_ref(irq)) 75 irq = NULL; 76 77 rcu_read_unlock(); 78 79 return irq; 80 } 81 82 /* 83 * This looks up the virtual interrupt ID to get the corresponding 84 * struct vgic_irq. It also increases the refcount, so any caller is expected 85 * to call vgic_put_irq() once it's finished with this IRQ. 86 */ 87 struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid) 88 { 89 /* SPIs */ 90 if (intid >= VGIC_NR_PRIVATE_IRQS && 91 intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { 92 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); 93 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; 94 } 95 96 /* LPIs */ 97 if (intid >= VGIC_MIN_LPI) 98 return vgic_get_lpi(kvm, intid); 99 100 return NULL; 101 } 102 103 struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) 104 { 105 if (WARN_ON(!vcpu)) 106 return NULL; 107 108 /* SGIs and PPIs */ 109 if (intid < VGIC_NR_PRIVATE_IRQS) { 110 intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS); 111 return &vcpu->arch.vgic_cpu.private_irqs[intid]; 112 } 113 114 return vgic_get_irq(vcpu->kvm, intid); 115 } 116 117 static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq) 118 { 119 lockdep_assert_held(&dist->lpi_xa.xa_lock); 120 __xa_erase(&dist->lpi_xa, irq->intid); 121 kfree_rcu(irq, rcu); 122 } 123 124 static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) 125 { 126 if (irq->intid < VGIC_MIN_LPI) 127 return false; 128 129 return refcount_dec_and_test(&irq->refcount); 130 } 131 132 static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq) 133 { 134 if (!__vgic_put_irq(kvm, irq)) 135 return false; 136 137 irq->pending_release = true; 138 return true; 139 } 140 141 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) 142 { 143 struct vgic_dist *dist = &kvm->arch.vgic; 144 145 if (irq->intid >= VGIC_MIN_LPI) 146 might_lock(&dist->lpi_xa.xa_lock); 147 148 if (!__vgic_put_irq(kvm, irq)) 149 return; 150 151 xa_lock(&dist->lpi_xa); 152 vgic_release_lpi_locked(dist, irq); 153 xa_unlock(&dist->lpi_xa); 154 } 155 156 static void vgic_release_deleted_lpis(struct kvm *kvm) 157 { 158 struct vgic_dist *dist = &kvm->arch.vgic; 159 unsigned long intid; 160 struct vgic_irq *irq; 161 162 xa_lock(&dist->lpi_xa); 163 164 xa_for_each(&dist->lpi_xa, intid, irq) { 165 if (irq->pending_release) 166 vgic_release_lpi_locked(dist, irq); 167 } 168 169 xa_unlock(&dist->lpi_xa); 170 } 171 172 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) 173 { 174 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 175 struct vgic_irq *irq, *tmp; 176 bool deleted = false; 177 unsigned long flags; 178 179 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 180 181 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 182 if (irq->intid >= VGIC_MIN_LPI) { 183 raw_spin_lock(&irq->irq_lock); 184 list_del(&irq->ap_list); 185 irq->vcpu = NULL; 186 raw_spin_unlock(&irq->irq_lock); 187 deleted |= vgic_put_irq_norelease(vcpu->kvm, irq); 188 } 189 } 190 191 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); 192 193 if (deleted) 194 vgic_release_deleted_lpis(vcpu->kvm); 195 } 196 197 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) 198 { 199 WARN_ON(irq_set_irqchip_state(irq->host_irq, 200 IRQCHIP_STATE_PENDING, 201 pending)); 202 } 203 204 bool vgic_get_phys_line_level(struct vgic_irq *irq) 205 { 206 bool line_level; 207 208 BUG_ON(!irq->hw); 209 210 if (irq->ops && irq->ops->get_input_level) 211 return irq->ops->get_input_level(irq->intid); 212 213 WARN_ON(irq_get_irqchip_state(irq->host_irq, 214 IRQCHIP_STATE_PENDING, 215 &line_level)); 216 return line_level; 217 } 218 219 /* Set/Clear the physical active state */ 220 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) 221 { 222 223 BUG_ON(!irq->hw); 224 WARN_ON(irq_set_irqchip_state(irq->host_irq, 225 IRQCHIP_STATE_ACTIVE, 226 active)); 227 } 228 229 /** 230 * vgic_target_oracle - compute the target vcpu for an irq 231 * 232 * @irq: The irq to route. Must be already locked. 233 * 234 * Based on the current state of the interrupt (enabled, pending, 235 * active, vcpu and target_vcpu), compute the next vcpu this should be 236 * given to. Return NULL if this shouldn't be injected at all. 237 * 238 * Requires the IRQ lock to be held. 239 */ 240 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 241 { 242 lockdep_assert_held(&irq->irq_lock); 243 244 /* If the interrupt is active, it must stay on the current vcpu */ 245 if (irq->active) 246 return irq->vcpu ? : irq->target_vcpu; 247 248 /* 249 * If the IRQ is not active but enabled and pending, we should direct 250 * it to its configured target VCPU. 251 * If the distributor is disabled, pending interrupts shouldn't be 252 * forwarded. 253 */ 254 if (irq->enabled && irq_is_pending(irq)) { 255 if (unlikely(irq->target_vcpu && 256 !irq->target_vcpu->kvm->arch.vgic.enabled)) 257 return NULL; 258 259 return irq->target_vcpu; 260 } 261 262 /* If neither active nor pending and enabled, then this IRQ should not 263 * be queued to any VCPU. 264 */ 265 return NULL; 266 } 267 268 /* 269 * The order of items in the ap_lists defines how we'll pack things in LRs as 270 * well, the first items in the list being the first things populated in the 271 * LRs. 272 * 273 * A hard rule is that active interrupts can never be pushed out of the LRs 274 * (and therefore take priority) since we cannot reliably trap on deactivation 275 * of IRQs and therefore they have to be present in the LRs. 276 * 277 * Otherwise things should be sorted by the priority field and the GIC 278 * hardware support will take care of preemption of priority groups etc. 279 * 280 * Return negative if "a" sorts before "b", 0 to preserve order, and positive 281 * to sort "b" before "a". 282 */ 283 static int vgic_irq_cmp(void *priv, const struct list_head *a, 284 const struct list_head *b) 285 { 286 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); 287 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); 288 bool penda, pendb; 289 int ret; 290 291 /* 292 * list_sort may call this function with the same element when 293 * the list is fairly long. 294 */ 295 if (unlikely(irqa == irqb)) 296 return 0; 297 298 raw_spin_lock(&irqa->irq_lock); 299 raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); 300 301 if (irqa->active || irqb->active) { 302 ret = (int)irqb->active - (int)irqa->active; 303 goto out; 304 } 305 306 penda = irqa->enabled && irq_is_pending(irqa); 307 pendb = irqb->enabled && irq_is_pending(irqb); 308 309 if (!penda || !pendb) { 310 ret = (int)pendb - (int)penda; 311 goto out; 312 } 313 314 /* Both pending and enabled, sort by priority */ 315 ret = irqa->priority - irqb->priority; 316 out: 317 raw_spin_unlock(&irqb->irq_lock); 318 raw_spin_unlock(&irqa->irq_lock); 319 return ret; 320 } 321 322 /* Must be called with the ap_list_lock held */ 323 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) 324 { 325 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 326 327 lockdep_assert_held(&vgic_cpu->ap_list_lock); 328 329 list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); 330 } 331 332 /* 333 * Only valid injection if changing level for level-triggered IRQs or for a 334 * rising edge, and in-kernel connected IRQ lines can only be controlled by 335 * their owner. 336 */ 337 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) 338 { 339 if (irq->owner != owner) 340 return false; 341 342 switch (irq->config) { 343 case VGIC_CONFIG_LEVEL: 344 return irq->line_level != level; 345 case VGIC_CONFIG_EDGE: 346 return level; 347 } 348 349 return false; 350 } 351 352 /* 353 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. 354 * Do the queuing if necessary, taking the right locks in the right order. 355 * Returns true when the IRQ was queued, false otherwise. 356 * 357 * Needs to be entered with the IRQ lock already held, but will return 358 * with all locks dropped. 359 */ 360 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, 361 unsigned long flags) __releases(&irq->irq_lock) 362 { 363 struct kvm_vcpu *vcpu; 364 365 lockdep_assert_held(&irq->irq_lock); 366 367 retry: 368 vcpu = vgic_target_oracle(irq); 369 if (irq->vcpu || !vcpu) { 370 /* 371 * If this IRQ is already on a VCPU's ap_list, then it 372 * cannot be moved or modified and there is no more work for 373 * us to do. 374 * 375 * Otherwise, if the irq is not pending and enabled, it does 376 * not need to be inserted into an ap_list and there is also 377 * no more work for us to do. 378 */ 379 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 380 381 /* 382 * We have to kick the VCPU here, because we could be 383 * queueing an edge-triggered interrupt for which we 384 * get no EOI maintenance interrupt. In that case, 385 * while the IRQ is already on the VCPU's AP list, the 386 * VCPU could have EOI'ed the original interrupt and 387 * won't see this one until it exits for some other 388 * reason. 389 */ 390 if (vcpu) { 391 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 392 kvm_vcpu_kick(vcpu); 393 } 394 return false; 395 } 396 397 /* 398 * We must unlock the irq lock to take the ap_list_lock where 399 * we are going to insert this new pending interrupt. 400 */ 401 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 402 403 /* someone can do stuff here, which we re-check below */ 404 405 raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 406 raw_spin_lock(&irq->irq_lock); 407 408 /* 409 * Did something change behind our backs? 410 * 411 * There are two cases: 412 * 1) The irq lost its pending state or was disabled behind our 413 * backs and/or it was queued to another VCPU's ap_list. 414 * 2) Someone changed the affinity on this irq behind our 415 * backs and we are now holding the wrong ap_list_lock. 416 * 417 * In both cases, drop the locks and retry. 418 */ 419 420 if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { 421 raw_spin_unlock(&irq->irq_lock); 422 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, 423 flags); 424 425 raw_spin_lock_irqsave(&irq->irq_lock, flags); 426 goto retry; 427 } 428 429 /* 430 * Grab a reference to the irq to reflect the fact that it is 431 * now in the ap_list. This is safe as the caller must already hold a 432 * reference on the irq. 433 */ 434 vgic_get_irq_ref(irq); 435 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); 436 irq->vcpu = vcpu; 437 438 raw_spin_unlock(&irq->irq_lock); 439 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 440 441 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 442 kvm_vcpu_kick(vcpu); 443 444 return true; 445 } 446 447 /** 448 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 449 * @kvm: The VM structure pointer 450 * @vcpu: The CPU for PPIs or NULL for global interrupts 451 * @intid: The INTID to inject a new state to. 452 * @level: Edge-triggered: true: to trigger the interrupt 453 * false: to ignore the call 454 * Level-sensitive true: raise the input signal 455 * false: lower the input signal 456 * @owner: The opaque pointer to the owner of the IRQ being raised to verify 457 * that the caller is allowed to inject this IRQ. Userspace 458 * injections will have owner == NULL. 459 * 460 * The VGIC is not concerned with devices being active-LOW or active-HIGH for 461 * level-sensitive interrupts. You can think of the level parameter as 1 462 * being HIGH and 0 being LOW and all devices being active-HIGH. 463 */ 464 int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 465 unsigned int intid, bool level, void *owner) 466 { 467 struct vgic_irq *irq; 468 unsigned long flags; 469 int ret; 470 471 ret = vgic_lazy_init(kvm); 472 if (ret) 473 return ret; 474 475 if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) 476 return -EINVAL; 477 478 trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); 479 480 if (intid < VGIC_NR_PRIVATE_IRQS) 481 irq = vgic_get_vcpu_irq(vcpu, intid); 482 else 483 irq = vgic_get_irq(kvm, intid); 484 if (!irq) 485 return -EINVAL; 486 487 raw_spin_lock_irqsave(&irq->irq_lock, flags); 488 489 if (!vgic_validate_injection(irq, level, owner)) { 490 /* Nothing to see here, move along... */ 491 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 492 vgic_put_irq(kvm, irq); 493 return 0; 494 } 495 496 if (irq->config == VGIC_CONFIG_LEVEL) 497 irq->line_level = level; 498 else 499 irq->pending_latch = true; 500 501 vgic_queue_irq_unlock(kvm, irq, flags); 502 vgic_put_irq(kvm, irq); 503 504 return 0; 505 } 506 507 /* @irq->irq_lock must be held */ 508 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 509 unsigned int host_irq, 510 struct irq_ops *ops) 511 { 512 struct irq_desc *desc; 513 struct irq_data *data; 514 515 /* 516 * Find the physical IRQ number corresponding to @host_irq 517 */ 518 desc = irq_to_desc(host_irq); 519 if (!desc) { 520 kvm_err("%s: no interrupt descriptor\n", __func__); 521 return -EINVAL; 522 } 523 data = irq_desc_get_irq_data(desc); 524 while (data->parent_data) 525 data = data->parent_data; 526 527 irq->hw = true; 528 irq->host_irq = host_irq; 529 irq->hwintid = data->hwirq; 530 irq->ops = ops; 531 return 0; 532 } 533 534 /* @irq->irq_lock must be held */ 535 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) 536 { 537 irq->hw = false; 538 irq->hwintid = 0; 539 irq->ops = NULL; 540 } 541 542 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 543 u32 vintid, struct irq_ops *ops) 544 { 545 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); 546 unsigned long flags; 547 int ret; 548 549 BUG_ON(!irq); 550 551 raw_spin_lock_irqsave(&irq->irq_lock, flags); 552 ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); 553 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 554 vgic_put_irq(vcpu->kvm, irq); 555 556 return ret; 557 } 558 559 /** 560 * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ 561 * @vcpu: The VCPU pointer 562 * @vintid: The INTID of the interrupt 563 * 564 * Reset the active and pending states of a mapped interrupt. Kernel 565 * subsystems injecting mapped interrupts should reset their interrupt lines 566 * when we are doing a reset of the VM. 567 */ 568 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) 569 { 570 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); 571 unsigned long flags; 572 573 if (!irq->hw) 574 goto out; 575 576 raw_spin_lock_irqsave(&irq->irq_lock, flags); 577 irq->active = false; 578 irq->pending_latch = false; 579 irq->line_level = false; 580 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 581 out: 582 vgic_put_irq(vcpu->kvm, irq); 583 } 584 585 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) 586 { 587 struct vgic_irq *irq; 588 unsigned long flags; 589 590 if (!vgic_initialized(vcpu->kvm)) 591 return -EAGAIN; 592 593 irq = vgic_get_vcpu_irq(vcpu, vintid); 594 BUG_ON(!irq); 595 596 raw_spin_lock_irqsave(&irq->irq_lock, flags); 597 kvm_vgic_unmap_irq(irq); 598 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 599 vgic_put_irq(vcpu->kvm, irq); 600 601 return 0; 602 } 603 604 int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) 605 { 606 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); 607 unsigned long flags; 608 int ret = -1; 609 610 raw_spin_lock_irqsave(&irq->irq_lock, flags); 611 if (irq->hw) 612 ret = irq->hwintid; 613 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 614 615 vgic_put_irq(vcpu->kvm, irq); 616 return ret; 617 } 618 619 /** 620 * kvm_vgic_set_owner - Set the owner of an interrupt for a VM 621 * 622 * @vcpu: Pointer to the VCPU (used for PPIs) 623 * @intid: The virtual INTID identifying the interrupt (PPI or SPI) 624 * @owner: Opaque pointer to the owner 625 * 626 * Returns 0 if intid is not already used by another in-kernel device and the 627 * owner is set, otherwise returns an error code. 628 */ 629 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) 630 { 631 struct vgic_irq *irq; 632 unsigned long flags; 633 int ret = 0; 634 635 if (!vgic_initialized(vcpu->kvm)) 636 return -EAGAIN; 637 638 /* SGIs and LPIs cannot be wired up to any device */ 639 if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) 640 return -EINVAL; 641 642 irq = vgic_get_vcpu_irq(vcpu, intid); 643 raw_spin_lock_irqsave(&irq->irq_lock, flags); 644 if (irq->owner && irq->owner != owner) 645 ret = -EEXIST; 646 else 647 irq->owner = owner; 648 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 649 650 return ret; 651 } 652 653 /** 654 * vgic_prune_ap_list - Remove non-relevant interrupts from the list 655 * 656 * @vcpu: The VCPU pointer 657 * 658 * Go over the list of "interesting" interrupts, and prune those that we 659 * won't have to consider in the near future. 660 */ 661 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) 662 { 663 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 664 struct vgic_irq *irq, *tmp; 665 bool deleted_lpis = false; 666 667 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 668 669 retry: 670 raw_spin_lock(&vgic_cpu->ap_list_lock); 671 672 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 673 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; 674 bool target_vcpu_needs_kick = false; 675 676 raw_spin_lock(&irq->irq_lock); 677 678 BUG_ON(vcpu != irq->vcpu); 679 680 target_vcpu = vgic_target_oracle(irq); 681 682 if (!target_vcpu) { 683 /* 684 * We don't need to process this interrupt any 685 * further, move it off the list. 686 */ 687 list_del(&irq->ap_list); 688 irq->vcpu = NULL; 689 raw_spin_unlock(&irq->irq_lock); 690 691 /* 692 * This vgic_put_irq call matches the 693 * vgic_get_irq_ref in vgic_queue_irq_unlock, 694 * where we added the LPI to the ap_list. As 695 * we remove the irq from the list, we drop 696 * also drop the refcount. 697 */ 698 deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq); 699 continue; 700 } 701 702 if (target_vcpu == vcpu) { 703 /* We're on the right CPU */ 704 raw_spin_unlock(&irq->irq_lock); 705 continue; 706 } 707 708 /* This interrupt looks like it has to be migrated. */ 709 710 raw_spin_unlock(&irq->irq_lock); 711 raw_spin_unlock(&vgic_cpu->ap_list_lock); 712 713 /* 714 * Ensure locking order by always locking the smallest 715 * ID first. 716 */ 717 if (vcpu->vcpu_id < target_vcpu->vcpu_id) { 718 vcpuA = vcpu; 719 vcpuB = target_vcpu; 720 } else { 721 vcpuA = target_vcpu; 722 vcpuB = vcpu; 723 } 724 725 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); 726 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, 727 SINGLE_DEPTH_NESTING); 728 raw_spin_lock(&irq->irq_lock); 729 730 /* 731 * If the affinity has been preserved, move the 732 * interrupt around. Otherwise, it means things have 733 * changed while the interrupt was unlocked, and we 734 * need to replay this. 735 * 736 * In all cases, we cannot trust the list not to have 737 * changed, so we restart from the beginning. 738 */ 739 if (target_vcpu == vgic_target_oracle(irq)) { 740 struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; 741 742 list_del(&irq->ap_list); 743 irq->vcpu = target_vcpu; 744 list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); 745 target_vcpu_needs_kick = true; 746 } 747 748 raw_spin_unlock(&irq->irq_lock); 749 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); 750 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); 751 752 if (target_vcpu_needs_kick) { 753 kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); 754 kvm_vcpu_kick(target_vcpu); 755 } 756 757 goto retry; 758 } 759 760 raw_spin_unlock(&vgic_cpu->ap_list_lock); 761 762 if (unlikely(deleted_lpis)) 763 vgic_release_deleted_lpis(vcpu->kvm); 764 } 765 766 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) 767 { 768 if (kvm_vgic_global_state.type == VGIC_V2) 769 vgic_v2_fold_lr_state(vcpu); 770 else 771 vgic_v3_fold_lr_state(vcpu); 772 } 773 774 /* Requires the irq_lock to be held. */ 775 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, 776 struct vgic_irq *irq, int lr) 777 { 778 lockdep_assert_held(&irq->irq_lock); 779 780 if (kvm_vgic_global_state.type == VGIC_V2) 781 vgic_v2_populate_lr(vcpu, irq, lr); 782 else 783 vgic_v3_populate_lr(vcpu, irq, lr); 784 } 785 786 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) 787 { 788 if (kvm_vgic_global_state.type == VGIC_V2) 789 vgic_v2_clear_lr(vcpu, lr); 790 else 791 vgic_v3_clear_lr(vcpu, lr); 792 } 793 794 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) 795 { 796 if (kvm_vgic_global_state.type == VGIC_V2) 797 vgic_v2_set_underflow(vcpu); 798 else 799 vgic_v3_set_underflow(vcpu); 800 } 801 802 /* Requires the ap_list_lock to be held. */ 803 static int compute_ap_list_depth(struct kvm_vcpu *vcpu, 804 bool *multi_sgi) 805 { 806 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 807 struct vgic_irq *irq; 808 int count = 0; 809 810 *multi_sgi = false; 811 812 lockdep_assert_held(&vgic_cpu->ap_list_lock); 813 814 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 815 int w; 816 817 raw_spin_lock(&irq->irq_lock); 818 /* GICv2 SGIs can count for more than one... */ 819 w = vgic_irq_get_lr_count(irq); 820 raw_spin_unlock(&irq->irq_lock); 821 822 count += w; 823 *multi_sgi |= (w > 1); 824 } 825 return count; 826 } 827 828 /* Requires the VCPU's ap_list_lock to be held. */ 829 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) 830 { 831 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 832 struct vgic_irq *irq; 833 int count; 834 bool multi_sgi; 835 u8 prio = 0xff; 836 int i = 0; 837 838 lockdep_assert_held(&vgic_cpu->ap_list_lock); 839 840 count = compute_ap_list_depth(vcpu, &multi_sgi); 841 if (count > kvm_vgic_global_state.nr_lr || multi_sgi) 842 vgic_sort_ap_list(vcpu); 843 844 count = 0; 845 846 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 847 raw_spin_lock(&irq->irq_lock); 848 849 /* 850 * If we have multi-SGIs in the pipeline, we need to 851 * guarantee that they are all seen before any IRQ of 852 * lower priority. In that case, we need to filter out 853 * these interrupts by exiting early. This is easy as 854 * the AP list has been sorted already. 855 */ 856 if (multi_sgi && irq->priority > prio) { 857 raw_spin_unlock(&irq->irq_lock); 858 break; 859 } 860 861 if (likely(vgic_target_oracle(irq) == vcpu)) { 862 vgic_populate_lr(vcpu, irq, count++); 863 864 if (irq->source) 865 prio = irq->priority; 866 } 867 868 raw_spin_unlock(&irq->irq_lock); 869 870 if (count == kvm_vgic_global_state.nr_lr) { 871 if (!list_is_last(&irq->ap_list, 872 &vgic_cpu->ap_list_head)) 873 vgic_set_underflow(vcpu); 874 break; 875 } 876 } 877 878 /* Nuke remaining LRs */ 879 for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) 880 vgic_clear_lr(vcpu, i); 881 882 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 883 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; 884 else 885 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; 886 } 887 888 static inline bool can_access_vgic_from_kernel(void) 889 { 890 /* 891 * GICv2 can always be accessed from the kernel because it is 892 * memory-mapped, and VHE systems can access GICv3 EL2 system 893 * registers. 894 */ 895 return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); 896 } 897 898 static inline void vgic_save_state(struct kvm_vcpu *vcpu) 899 { 900 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 901 vgic_v2_save_state(vcpu); 902 else 903 __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); 904 } 905 906 /* Sync back the hardware VGIC state into our emulation after a guest's run. */ 907 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 908 { 909 int used_lrs; 910 911 /* If nesting, emulate the HW effect from L0 to L1 */ 912 if (vgic_state_is_nested(vcpu)) { 913 vgic_v3_sync_nested(vcpu); 914 return; 915 } 916 917 if (vcpu_has_nv(vcpu)) 918 vgic_v3_nested_update_mi(vcpu); 919 920 /* An empty ap_list_head implies used_lrs == 0 */ 921 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) 922 return; 923 924 if (can_access_vgic_from_kernel()) 925 vgic_save_state(vcpu); 926 927 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 928 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 929 else 930 used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 931 932 if (used_lrs) 933 vgic_fold_lr_state(vcpu); 934 vgic_prune_ap_list(vcpu); 935 } 936 937 static inline void vgic_restore_state(struct kvm_vcpu *vcpu) 938 { 939 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 940 vgic_v2_restore_state(vcpu); 941 else 942 __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); 943 } 944 945 /* Flush our emulation state into the GIC hardware before entering the guest. */ 946 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 947 { 948 /* 949 * If in a nested state, we must return early. Two possibilities: 950 * 951 * - If we have any pending IRQ for the guest and the guest 952 * expects IRQs to be handled in its virtual EL2 mode (the 953 * virtual IMO bit is set) and it is not already running in 954 * virtual EL2 mode, then we have to emulate an IRQ 955 * exception to virtual EL2. 956 * 957 * We do that by placing a request to ourselves which will 958 * abort the entry procedure and inject the exception at the 959 * beginning of the run loop. 960 * 961 * - Otherwise, do exactly *NOTHING*. The guest state is 962 * already loaded, and we can carry on with running it. 963 * 964 * If we have NV, but are not in a nested state, compute the 965 * maintenance interrupt state, as it may fire. 966 */ 967 if (vgic_state_is_nested(vcpu)) { 968 if (kvm_vgic_vcpu_pending_irq(vcpu)) 969 kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); 970 971 return; 972 } 973 974 if (vcpu_has_nv(vcpu)) 975 vgic_v3_nested_update_mi(vcpu); 976 977 /* 978 * If there are no virtual interrupts active or pending for this 979 * VCPU, then there is no work to do and we can bail out without 980 * taking any lock. There is a potential race with someone injecting 981 * interrupts to the VCPU, but it is a benign race as the VCPU will 982 * either observe the new interrupt before or after doing this check, 983 * and introducing additional synchronization mechanism doesn't change 984 * this. 985 * 986 * Note that we still need to go through the whole thing if anything 987 * can be directly injected (GICv4). 988 */ 989 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && 990 !vgic_supports_direct_irqs(vcpu->kvm)) 991 return; 992 993 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 994 995 if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { 996 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 997 vgic_flush_lr_state(vcpu); 998 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 999 } 1000 1001 if (can_access_vgic_from_kernel()) 1002 vgic_restore_state(vcpu); 1003 1004 if (vgic_supports_direct_irqs(vcpu->kvm)) 1005 vgic_v4_commit(vcpu); 1006 } 1007 1008 void kvm_vgic_load(struct kvm_vcpu *vcpu) 1009 { 1010 if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { 1011 if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 1012 __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); 1013 return; 1014 } 1015 1016 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 1017 vgic_v2_load(vcpu); 1018 else 1019 vgic_v3_load(vcpu); 1020 } 1021 1022 void kvm_vgic_put(struct kvm_vcpu *vcpu) 1023 { 1024 if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) { 1025 if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 1026 __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); 1027 return; 1028 } 1029 1030 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 1031 vgic_v2_put(vcpu); 1032 else 1033 vgic_v3_put(vcpu); 1034 } 1035 1036 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 1037 { 1038 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 1039 struct vgic_irq *irq; 1040 bool pending = false; 1041 unsigned long flags; 1042 struct vgic_vmcr vmcr; 1043 1044 if (!vcpu->kvm->arch.vgic.enabled) 1045 return false; 1046 1047 if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) 1048 return true; 1049 1050 vgic_get_vmcr(vcpu, &vmcr); 1051 1052 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 1053 1054 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 1055 raw_spin_lock(&irq->irq_lock); 1056 pending = irq_is_pending(irq) && irq->enabled && 1057 !irq->active && 1058 irq->priority < vmcr.pmr; 1059 raw_spin_unlock(&irq->irq_lock); 1060 1061 if (pending) 1062 break; 1063 } 1064 1065 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); 1066 1067 return pending; 1068 } 1069 1070 void vgic_kick_vcpus(struct kvm *kvm) 1071 { 1072 struct kvm_vcpu *vcpu; 1073 unsigned long c; 1074 1075 /* 1076 * We've injected an interrupt, time to find out who deserves 1077 * a good kick... 1078 */ 1079 kvm_for_each_vcpu(c, vcpu, kvm) { 1080 if (kvm_vgic_vcpu_pending_irq(vcpu)) { 1081 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 1082 kvm_vcpu_kick(vcpu); 1083 } 1084 } 1085 } 1086 1087 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) 1088 { 1089 struct vgic_irq *irq; 1090 bool map_is_active; 1091 unsigned long flags; 1092 1093 if (!vgic_initialized(vcpu->kvm)) 1094 return false; 1095 1096 irq = vgic_get_vcpu_irq(vcpu, vintid); 1097 raw_spin_lock_irqsave(&irq->irq_lock, flags); 1098 map_is_active = irq->hw && irq->active; 1099 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 1100 vgic_put_irq(vcpu->kvm, irq); 1101 1102 return map_is_active; 1103 } 1104 1105 /* 1106 * Level-triggered mapped IRQs are special because we only observe rising 1107 * edges as input to the VGIC. 1108 * 1109 * If the guest never acked the interrupt we have to sample the physical 1110 * line and set the line level, because the device state could have changed 1111 * or we simply need to process the still pending interrupt later. 1112 * 1113 * We could also have entered the guest with the interrupt active+pending. 1114 * On the next exit, we need to re-evaluate the pending state, as it could 1115 * otherwise result in a spurious interrupt by injecting a now potentially 1116 * stale pending state. 1117 * 1118 * If this causes us to lower the level, we have to also clear the physical 1119 * active state, since we will otherwise never be told when the interrupt 1120 * becomes asserted again. 1121 * 1122 * Another case is when the interrupt requires a helping hand on 1123 * deactivation (no HW deactivation, for example). 1124 */ 1125 void vgic_irq_handle_resampling(struct vgic_irq *irq, 1126 bool lr_deactivated, bool lr_pending) 1127 { 1128 if (vgic_irq_is_mapped_level(irq)) { 1129 bool resample = false; 1130 1131 if (unlikely(vgic_irq_needs_resampling(irq))) { 1132 resample = !(irq->active || irq->pending_latch); 1133 } else if (lr_pending || (lr_deactivated && irq->line_level)) { 1134 irq->line_level = vgic_get_phys_line_level(irq); 1135 resample = !irq->line_level; 1136 } 1137 1138 if (resample) 1139 vgic_irq_set_phys_active(irq, false); 1140 } 1141 } 1142