1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2015, 2016 ARM Ltd. 4 */ 5 6 #include <linux/interrupt.h> 7 #include <linux/irq.h> 8 #include <linux/kvm.h> 9 #include <linux/kvm_host.h> 10 #include <linux/list_sort.h> 11 #include <linux/nospec.h> 12 13 #include <asm/kvm_hyp.h> 14 15 #include "vgic.h" 16 17 #define CREATE_TRACE_POINTS 18 #include "trace.h" 19 20 struct vgic_global kvm_vgic_global_state __ro_after_init = { 21 .gicv3_cpuif = STATIC_KEY_FALSE_INIT, 22 }; 23 24 /* 25 * Locking order is always: 26 * kvm->lock (mutex) 27 * vcpu->mutex (mutex) 28 * kvm->arch.config_lock (mutex) 29 * its->cmd_lock (mutex) 30 * its->its_lock (mutex) 31 * vgic_cpu->ap_list_lock must be taken with IRQs disabled 32 * kvm->lpi_list_lock must be taken with IRQs disabled 33 * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled 34 * vgic_irq->irq_lock must be taken with IRQs disabled 35 * 36 * As the ap_list_lock might be taken from the timer interrupt handler, 37 * we have to disable IRQs before taking this lock and everything lower 38 * than it. 39 * 40 * If you need to take multiple locks, always take the upper lock first, 41 * then the lower ones, e.g. first take the its_lock, then the irq_lock. 42 * If you are already holding a lock and need to take a higher one, you 43 * have to drop the lower ranking lock first and re-acquire it after having 44 * taken the upper one. 45 * 46 * When taking more than one ap_list_lock at the same time, always take the 47 * lowest numbered VCPU's ap_list_lock first, so: 48 * vcpuX->vcpu_id < vcpuY->vcpu_id: 49 * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); 50 * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); 51 * 52 * Since the VGIC must support injecting virtual interrupts from ISRs, we have 53 * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer 54 * spinlocks for any lock that may be taken while injecting an interrupt. 55 */ 56 57 /* 58 * Index the VM's xarray of mapped LPIs and return a reference to the IRQ 59 * structure. The caller is expected to call vgic_put_irq() later once it's 60 * finished with the IRQ. 61 */ 62 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) 63 { 64 struct vgic_dist *dist = &kvm->arch.vgic; 65 struct vgic_irq *irq = NULL; 66 67 rcu_read_lock(); 68 69 irq = xa_load(&dist->lpi_xa, intid); 70 if (!vgic_try_get_irq_kref(irq)) 71 irq = NULL; 72 73 rcu_read_unlock(); 74 75 return irq; 76 } 77 78 /* 79 * This looks up the virtual interrupt ID to get the corresponding 80 * struct vgic_irq. It also increases the refcount, so any caller is expected 81 * to call vgic_put_irq() once it's finished with this IRQ. 82 */ 83 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 84 u32 intid) 85 { 86 /* SGIs and PPIs */ 87 if (intid <= VGIC_MAX_PRIVATE) { 88 intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); 89 return &vcpu->arch.vgic_cpu.private_irqs[intid]; 90 } 91 92 /* SPIs */ 93 if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { 94 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); 95 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; 96 } 97 98 /* LPIs */ 99 if (intid >= VGIC_MIN_LPI) 100 return vgic_get_lpi(kvm, intid); 101 102 return NULL; 103 } 104 105 /* 106 * We can't do anything in here, because we lack the kvm pointer to 107 * lock and remove the item from the lpi_list. So we keep this function 108 * empty and use the return value of kref_put() to trigger the freeing. 109 */ 110 static void vgic_irq_release(struct kref *ref) 111 { 112 } 113 114 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) 115 { 116 struct vgic_dist *dist = &kvm->arch.vgic; 117 unsigned long flags; 118 119 if (irq->intid < VGIC_MIN_LPI) 120 return; 121 122 if (!kref_put(&irq->refcount, vgic_irq_release)) 123 return; 124 125 xa_lock_irqsave(&dist->lpi_xa, flags); 126 __xa_erase(&dist->lpi_xa, irq->intid); 127 xa_unlock_irqrestore(&dist->lpi_xa, flags); 128 129 atomic_dec(&dist->lpi_count); 130 kfree_rcu(irq, rcu); 131 } 132 133 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) 134 { 135 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 136 struct vgic_irq *irq, *tmp; 137 unsigned long flags; 138 139 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 140 141 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 142 if (irq->intid >= VGIC_MIN_LPI) { 143 raw_spin_lock(&irq->irq_lock); 144 list_del(&irq->ap_list); 145 irq->vcpu = NULL; 146 raw_spin_unlock(&irq->irq_lock); 147 vgic_put_irq(vcpu->kvm, irq); 148 } 149 } 150 151 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); 152 } 153 154 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) 155 { 156 WARN_ON(irq_set_irqchip_state(irq->host_irq, 157 IRQCHIP_STATE_PENDING, 158 pending)); 159 } 160 161 bool vgic_get_phys_line_level(struct vgic_irq *irq) 162 { 163 bool line_level; 164 165 BUG_ON(!irq->hw); 166 167 if (irq->ops && irq->ops->get_input_level) 168 return irq->ops->get_input_level(irq->intid); 169 170 WARN_ON(irq_get_irqchip_state(irq->host_irq, 171 IRQCHIP_STATE_PENDING, 172 &line_level)); 173 return line_level; 174 } 175 176 /* Set/Clear the physical active state */ 177 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) 178 { 179 180 BUG_ON(!irq->hw); 181 WARN_ON(irq_set_irqchip_state(irq->host_irq, 182 IRQCHIP_STATE_ACTIVE, 183 active)); 184 } 185 186 /** 187 * vgic_target_oracle - compute the target vcpu for an irq 188 * 189 * @irq: The irq to route. Must be already locked. 190 * 191 * Based on the current state of the interrupt (enabled, pending, 192 * active, vcpu and target_vcpu), compute the next vcpu this should be 193 * given to. Return NULL if this shouldn't be injected at all. 194 * 195 * Requires the IRQ lock to be held. 196 */ 197 static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) 198 { 199 lockdep_assert_held(&irq->irq_lock); 200 201 /* If the interrupt is active, it must stay on the current vcpu */ 202 if (irq->active) 203 return irq->vcpu ? : irq->target_vcpu; 204 205 /* 206 * If the IRQ is not active but enabled and pending, we should direct 207 * it to its configured target VCPU. 208 * If the distributor is disabled, pending interrupts shouldn't be 209 * forwarded. 210 */ 211 if (irq->enabled && irq_is_pending(irq)) { 212 if (unlikely(irq->target_vcpu && 213 !irq->target_vcpu->kvm->arch.vgic.enabled)) 214 return NULL; 215 216 return irq->target_vcpu; 217 } 218 219 /* If neither active nor pending and enabled, then this IRQ should not 220 * be queued to any VCPU. 221 */ 222 return NULL; 223 } 224 225 /* 226 * The order of items in the ap_lists defines how we'll pack things in LRs as 227 * well, the first items in the list being the first things populated in the 228 * LRs. 229 * 230 * A hard rule is that active interrupts can never be pushed out of the LRs 231 * (and therefore take priority) since we cannot reliably trap on deactivation 232 * of IRQs and therefore they have to be present in the LRs. 233 * 234 * Otherwise things should be sorted by the priority field and the GIC 235 * hardware support will take care of preemption of priority groups etc. 236 * 237 * Return negative if "a" sorts before "b", 0 to preserve order, and positive 238 * to sort "b" before "a". 239 */ 240 static int vgic_irq_cmp(void *priv, const struct list_head *a, 241 const struct list_head *b) 242 { 243 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); 244 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); 245 bool penda, pendb; 246 int ret; 247 248 /* 249 * list_sort may call this function with the same element when 250 * the list is fairly long. 251 */ 252 if (unlikely(irqa == irqb)) 253 return 0; 254 255 raw_spin_lock(&irqa->irq_lock); 256 raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); 257 258 if (irqa->active || irqb->active) { 259 ret = (int)irqb->active - (int)irqa->active; 260 goto out; 261 } 262 263 penda = irqa->enabled && irq_is_pending(irqa); 264 pendb = irqb->enabled && irq_is_pending(irqb); 265 266 if (!penda || !pendb) { 267 ret = (int)pendb - (int)penda; 268 goto out; 269 } 270 271 /* Both pending and enabled, sort by priority */ 272 ret = irqa->priority - irqb->priority; 273 out: 274 raw_spin_unlock(&irqb->irq_lock); 275 raw_spin_unlock(&irqa->irq_lock); 276 return ret; 277 } 278 279 /* Must be called with the ap_list_lock held */ 280 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) 281 { 282 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 283 284 lockdep_assert_held(&vgic_cpu->ap_list_lock); 285 286 list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); 287 } 288 289 /* 290 * Only valid injection if changing level for level-triggered IRQs or for a 291 * rising edge, and in-kernel connected IRQ lines can only be controlled by 292 * their owner. 293 */ 294 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) 295 { 296 if (irq->owner != owner) 297 return false; 298 299 switch (irq->config) { 300 case VGIC_CONFIG_LEVEL: 301 return irq->line_level != level; 302 case VGIC_CONFIG_EDGE: 303 return level; 304 } 305 306 return false; 307 } 308 309 /* 310 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. 311 * Do the queuing if necessary, taking the right locks in the right order. 312 * Returns true when the IRQ was queued, false otherwise. 313 * 314 * Needs to be entered with the IRQ lock already held, but will return 315 * with all locks dropped. 316 */ 317 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, 318 unsigned long flags) 319 { 320 struct kvm_vcpu *vcpu; 321 322 lockdep_assert_held(&irq->irq_lock); 323 324 retry: 325 vcpu = vgic_target_oracle(irq); 326 if (irq->vcpu || !vcpu) { 327 /* 328 * If this IRQ is already on a VCPU's ap_list, then it 329 * cannot be moved or modified and there is no more work for 330 * us to do. 331 * 332 * Otherwise, if the irq is not pending and enabled, it does 333 * not need to be inserted into an ap_list and there is also 334 * no more work for us to do. 335 */ 336 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 337 338 /* 339 * We have to kick the VCPU here, because we could be 340 * queueing an edge-triggered interrupt for which we 341 * get no EOI maintenance interrupt. In that case, 342 * while the IRQ is already on the VCPU's AP list, the 343 * VCPU could have EOI'ed the original interrupt and 344 * won't see this one until it exits for some other 345 * reason. 346 */ 347 if (vcpu) { 348 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 349 kvm_vcpu_kick(vcpu); 350 } 351 return false; 352 } 353 354 /* 355 * We must unlock the irq lock to take the ap_list_lock where 356 * we are going to insert this new pending interrupt. 357 */ 358 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 359 360 /* someone can do stuff here, which we re-check below */ 361 362 raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 363 raw_spin_lock(&irq->irq_lock); 364 365 /* 366 * Did something change behind our backs? 367 * 368 * There are two cases: 369 * 1) The irq lost its pending state or was disabled behind our 370 * backs and/or it was queued to another VCPU's ap_list. 371 * 2) Someone changed the affinity on this irq behind our 372 * backs and we are now holding the wrong ap_list_lock. 373 * 374 * In both cases, drop the locks and retry. 375 */ 376 377 if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { 378 raw_spin_unlock(&irq->irq_lock); 379 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, 380 flags); 381 382 raw_spin_lock_irqsave(&irq->irq_lock, flags); 383 goto retry; 384 } 385 386 /* 387 * Grab a reference to the irq to reflect the fact that it is 388 * now in the ap_list. This is safe as the caller must already hold a 389 * reference on the irq. 390 */ 391 vgic_get_irq_kref(irq); 392 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); 393 irq->vcpu = vcpu; 394 395 raw_spin_unlock(&irq->irq_lock); 396 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); 397 398 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 399 kvm_vcpu_kick(vcpu); 400 401 return true; 402 } 403 404 /** 405 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic 406 * @kvm: The VM structure pointer 407 * @vcpu: The CPU for PPIs or NULL for global interrupts 408 * @intid: The INTID to inject a new state to. 409 * @level: Edge-triggered: true: to trigger the interrupt 410 * false: to ignore the call 411 * Level-sensitive true: raise the input signal 412 * false: lower the input signal 413 * @owner: The opaque pointer to the owner of the IRQ being raised to verify 414 * that the caller is allowed to inject this IRQ. Userspace 415 * injections will have owner == NULL. 416 * 417 * The VGIC is not concerned with devices being active-LOW or active-HIGH for 418 * level-sensitive interrupts. You can think of the level parameter as 1 419 * being HIGH and 0 being LOW and all devices being active-HIGH. 420 */ 421 int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, 422 unsigned int intid, bool level, void *owner) 423 { 424 struct vgic_irq *irq; 425 unsigned long flags; 426 int ret; 427 428 ret = vgic_lazy_init(kvm); 429 if (ret) 430 return ret; 431 432 if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) 433 return -EINVAL; 434 435 trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); 436 437 irq = vgic_get_irq(kvm, vcpu, intid); 438 if (!irq) 439 return -EINVAL; 440 441 raw_spin_lock_irqsave(&irq->irq_lock, flags); 442 443 if (!vgic_validate_injection(irq, level, owner)) { 444 /* Nothing to see here, move along... */ 445 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 446 vgic_put_irq(kvm, irq); 447 return 0; 448 } 449 450 if (irq->config == VGIC_CONFIG_LEVEL) 451 irq->line_level = level; 452 else 453 irq->pending_latch = true; 454 455 vgic_queue_irq_unlock(kvm, irq, flags); 456 vgic_put_irq(kvm, irq); 457 458 return 0; 459 } 460 461 /* @irq->irq_lock must be held */ 462 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, 463 unsigned int host_irq, 464 struct irq_ops *ops) 465 { 466 struct irq_desc *desc; 467 struct irq_data *data; 468 469 /* 470 * Find the physical IRQ number corresponding to @host_irq 471 */ 472 desc = irq_to_desc(host_irq); 473 if (!desc) { 474 kvm_err("%s: no interrupt descriptor\n", __func__); 475 return -EINVAL; 476 } 477 data = irq_desc_get_irq_data(desc); 478 while (data->parent_data) 479 data = data->parent_data; 480 481 irq->hw = true; 482 irq->host_irq = host_irq; 483 irq->hwintid = data->hwirq; 484 irq->ops = ops; 485 return 0; 486 } 487 488 /* @irq->irq_lock must be held */ 489 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) 490 { 491 irq->hw = false; 492 irq->hwintid = 0; 493 irq->ops = NULL; 494 } 495 496 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, 497 u32 vintid, struct irq_ops *ops) 498 { 499 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 500 unsigned long flags; 501 int ret; 502 503 BUG_ON(!irq); 504 505 raw_spin_lock_irqsave(&irq->irq_lock, flags); 506 ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); 507 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 508 vgic_put_irq(vcpu->kvm, irq); 509 510 return ret; 511 } 512 513 /** 514 * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ 515 * @vcpu: The VCPU pointer 516 * @vintid: The INTID of the interrupt 517 * 518 * Reset the active and pending states of a mapped interrupt. Kernel 519 * subsystems injecting mapped interrupts should reset their interrupt lines 520 * when we are doing a reset of the VM. 521 */ 522 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) 523 { 524 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 525 unsigned long flags; 526 527 if (!irq->hw) 528 goto out; 529 530 raw_spin_lock_irqsave(&irq->irq_lock, flags); 531 irq->active = false; 532 irq->pending_latch = false; 533 irq->line_level = false; 534 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 535 out: 536 vgic_put_irq(vcpu->kvm, irq); 537 } 538 539 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) 540 { 541 struct vgic_irq *irq; 542 unsigned long flags; 543 544 if (!vgic_initialized(vcpu->kvm)) 545 return -EAGAIN; 546 547 irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 548 BUG_ON(!irq); 549 550 raw_spin_lock_irqsave(&irq->irq_lock, flags); 551 kvm_vgic_unmap_irq(irq); 552 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 553 vgic_put_irq(vcpu->kvm, irq); 554 555 return 0; 556 } 557 558 int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) 559 { 560 struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 561 unsigned long flags; 562 int ret = -1; 563 564 raw_spin_lock_irqsave(&irq->irq_lock, flags); 565 if (irq->hw) 566 ret = irq->hwintid; 567 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 568 569 vgic_put_irq(vcpu->kvm, irq); 570 return ret; 571 } 572 573 /** 574 * kvm_vgic_set_owner - Set the owner of an interrupt for a VM 575 * 576 * @vcpu: Pointer to the VCPU (used for PPIs) 577 * @intid: The virtual INTID identifying the interrupt (PPI or SPI) 578 * @owner: Opaque pointer to the owner 579 * 580 * Returns 0 if intid is not already used by another in-kernel device and the 581 * owner is set, otherwise returns an error code. 582 */ 583 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) 584 { 585 struct vgic_irq *irq; 586 unsigned long flags; 587 int ret = 0; 588 589 if (!vgic_initialized(vcpu->kvm)) 590 return -EAGAIN; 591 592 /* SGIs and LPIs cannot be wired up to any device */ 593 if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) 594 return -EINVAL; 595 596 irq = vgic_get_irq(vcpu->kvm, vcpu, intid); 597 raw_spin_lock_irqsave(&irq->irq_lock, flags); 598 if (irq->owner && irq->owner != owner) 599 ret = -EEXIST; 600 else 601 irq->owner = owner; 602 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 603 604 return ret; 605 } 606 607 /** 608 * vgic_prune_ap_list - Remove non-relevant interrupts from the list 609 * 610 * @vcpu: The VCPU pointer 611 * 612 * Go over the list of "interesting" interrupts, and prune those that we 613 * won't have to consider in the near future. 614 */ 615 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) 616 { 617 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 618 struct vgic_irq *irq, *tmp; 619 620 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 621 622 retry: 623 raw_spin_lock(&vgic_cpu->ap_list_lock); 624 625 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { 626 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; 627 bool target_vcpu_needs_kick = false; 628 629 raw_spin_lock(&irq->irq_lock); 630 631 BUG_ON(vcpu != irq->vcpu); 632 633 target_vcpu = vgic_target_oracle(irq); 634 635 if (!target_vcpu) { 636 /* 637 * We don't need to process this interrupt any 638 * further, move it off the list. 639 */ 640 list_del(&irq->ap_list); 641 irq->vcpu = NULL; 642 raw_spin_unlock(&irq->irq_lock); 643 644 /* 645 * This vgic_put_irq call matches the 646 * vgic_get_irq_kref in vgic_queue_irq_unlock, 647 * where we added the LPI to the ap_list. As 648 * we remove the irq from the list, we drop 649 * also drop the refcount. 650 */ 651 vgic_put_irq(vcpu->kvm, irq); 652 continue; 653 } 654 655 if (target_vcpu == vcpu) { 656 /* We're on the right CPU */ 657 raw_spin_unlock(&irq->irq_lock); 658 continue; 659 } 660 661 /* This interrupt looks like it has to be migrated. */ 662 663 raw_spin_unlock(&irq->irq_lock); 664 raw_spin_unlock(&vgic_cpu->ap_list_lock); 665 666 /* 667 * Ensure locking order by always locking the smallest 668 * ID first. 669 */ 670 if (vcpu->vcpu_id < target_vcpu->vcpu_id) { 671 vcpuA = vcpu; 672 vcpuB = target_vcpu; 673 } else { 674 vcpuA = target_vcpu; 675 vcpuB = vcpu; 676 } 677 678 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); 679 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, 680 SINGLE_DEPTH_NESTING); 681 raw_spin_lock(&irq->irq_lock); 682 683 /* 684 * If the affinity has been preserved, move the 685 * interrupt around. Otherwise, it means things have 686 * changed while the interrupt was unlocked, and we 687 * need to replay this. 688 * 689 * In all cases, we cannot trust the list not to have 690 * changed, so we restart from the beginning. 691 */ 692 if (target_vcpu == vgic_target_oracle(irq)) { 693 struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; 694 695 list_del(&irq->ap_list); 696 irq->vcpu = target_vcpu; 697 list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); 698 target_vcpu_needs_kick = true; 699 } 700 701 raw_spin_unlock(&irq->irq_lock); 702 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); 703 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); 704 705 if (target_vcpu_needs_kick) { 706 kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); 707 kvm_vcpu_kick(target_vcpu); 708 } 709 710 goto retry; 711 } 712 713 raw_spin_unlock(&vgic_cpu->ap_list_lock); 714 } 715 716 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) 717 { 718 if (kvm_vgic_global_state.type == VGIC_V2) 719 vgic_v2_fold_lr_state(vcpu); 720 else 721 vgic_v3_fold_lr_state(vcpu); 722 } 723 724 /* Requires the irq_lock to be held. */ 725 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, 726 struct vgic_irq *irq, int lr) 727 { 728 lockdep_assert_held(&irq->irq_lock); 729 730 if (kvm_vgic_global_state.type == VGIC_V2) 731 vgic_v2_populate_lr(vcpu, irq, lr); 732 else 733 vgic_v3_populate_lr(vcpu, irq, lr); 734 } 735 736 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) 737 { 738 if (kvm_vgic_global_state.type == VGIC_V2) 739 vgic_v2_clear_lr(vcpu, lr); 740 else 741 vgic_v3_clear_lr(vcpu, lr); 742 } 743 744 static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) 745 { 746 if (kvm_vgic_global_state.type == VGIC_V2) 747 vgic_v2_set_underflow(vcpu); 748 else 749 vgic_v3_set_underflow(vcpu); 750 } 751 752 /* Requires the ap_list_lock to be held. */ 753 static int compute_ap_list_depth(struct kvm_vcpu *vcpu, 754 bool *multi_sgi) 755 { 756 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 757 struct vgic_irq *irq; 758 int count = 0; 759 760 *multi_sgi = false; 761 762 lockdep_assert_held(&vgic_cpu->ap_list_lock); 763 764 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 765 int w; 766 767 raw_spin_lock(&irq->irq_lock); 768 /* GICv2 SGIs can count for more than one... */ 769 w = vgic_irq_get_lr_count(irq); 770 raw_spin_unlock(&irq->irq_lock); 771 772 count += w; 773 *multi_sgi |= (w > 1); 774 } 775 return count; 776 } 777 778 /* Requires the VCPU's ap_list_lock to be held. */ 779 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) 780 { 781 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 782 struct vgic_irq *irq; 783 int count; 784 bool multi_sgi; 785 u8 prio = 0xff; 786 int i = 0; 787 788 lockdep_assert_held(&vgic_cpu->ap_list_lock); 789 790 count = compute_ap_list_depth(vcpu, &multi_sgi); 791 if (count > kvm_vgic_global_state.nr_lr || multi_sgi) 792 vgic_sort_ap_list(vcpu); 793 794 count = 0; 795 796 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 797 raw_spin_lock(&irq->irq_lock); 798 799 /* 800 * If we have multi-SGIs in the pipeline, we need to 801 * guarantee that they are all seen before any IRQ of 802 * lower priority. In that case, we need to filter out 803 * these interrupts by exiting early. This is easy as 804 * the AP list has been sorted already. 805 */ 806 if (multi_sgi && irq->priority > prio) { 807 _raw_spin_unlock(&irq->irq_lock); 808 break; 809 } 810 811 if (likely(vgic_target_oracle(irq) == vcpu)) { 812 vgic_populate_lr(vcpu, irq, count++); 813 814 if (irq->source) 815 prio = irq->priority; 816 } 817 818 raw_spin_unlock(&irq->irq_lock); 819 820 if (count == kvm_vgic_global_state.nr_lr) { 821 if (!list_is_last(&irq->ap_list, 822 &vgic_cpu->ap_list_head)) 823 vgic_set_underflow(vcpu); 824 break; 825 } 826 } 827 828 /* Nuke remaining LRs */ 829 for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) 830 vgic_clear_lr(vcpu, i); 831 832 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 833 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; 834 else 835 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; 836 } 837 838 static inline bool can_access_vgic_from_kernel(void) 839 { 840 /* 841 * GICv2 can always be accessed from the kernel because it is 842 * memory-mapped, and VHE systems can access GICv3 EL2 system 843 * registers. 844 */ 845 return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); 846 } 847 848 static inline void vgic_save_state(struct kvm_vcpu *vcpu) 849 { 850 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 851 vgic_v2_save_state(vcpu); 852 else 853 __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); 854 } 855 856 /* Sync back the hardware VGIC state into our emulation after a guest's run. */ 857 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) 858 { 859 int used_lrs; 860 861 /* An empty ap_list_head implies used_lrs == 0 */ 862 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) 863 return; 864 865 if (can_access_vgic_from_kernel()) 866 vgic_save_state(vcpu); 867 868 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 869 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; 870 else 871 used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; 872 873 if (used_lrs) 874 vgic_fold_lr_state(vcpu); 875 vgic_prune_ap_list(vcpu); 876 } 877 878 static inline void vgic_restore_state(struct kvm_vcpu *vcpu) 879 { 880 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) 881 vgic_v2_restore_state(vcpu); 882 else 883 __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); 884 } 885 886 /* Flush our emulation state into the GIC hardware before entering the guest. */ 887 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) 888 { 889 /* 890 * If there are no virtual interrupts active or pending for this 891 * VCPU, then there is no work to do and we can bail out without 892 * taking any lock. There is a potential race with someone injecting 893 * interrupts to the VCPU, but it is a benign race as the VCPU will 894 * either observe the new interrupt before or after doing this check, 895 * and introducing additional synchronization mechanism doesn't change 896 * this. 897 * 898 * Note that we still need to go through the whole thing if anything 899 * can be directly injected (GICv4). 900 */ 901 if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && 902 !vgic_supports_direct_msis(vcpu->kvm)) 903 return; 904 905 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); 906 907 if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { 908 raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); 909 vgic_flush_lr_state(vcpu); 910 raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); 911 } 912 913 if (can_access_vgic_from_kernel()) 914 vgic_restore_state(vcpu); 915 916 if (vgic_supports_direct_msis(vcpu->kvm)) 917 vgic_v4_commit(vcpu); 918 } 919 920 void kvm_vgic_load(struct kvm_vcpu *vcpu) 921 { 922 if (unlikely(!vgic_initialized(vcpu->kvm))) 923 return; 924 925 if (kvm_vgic_global_state.type == VGIC_V2) 926 vgic_v2_load(vcpu); 927 else 928 vgic_v3_load(vcpu); 929 } 930 931 void kvm_vgic_put(struct kvm_vcpu *vcpu) 932 { 933 if (unlikely(!vgic_initialized(vcpu->kvm))) 934 return; 935 936 if (kvm_vgic_global_state.type == VGIC_V2) 937 vgic_v2_put(vcpu); 938 else 939 vgic_v3_put(vcpu); 940 } 941 942 void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) 943 { 944 if (unlikely(!irqchip_in_kernel(vcpu->kvm))) 945 return; 946 947 if (kvm_vgic_global_state.type == VGIC_V2) 948 vgic_v2_vmcr_sync(vcpu); 949 else 950 vgic_v3_vmcr_sync(vcpu); 951 } 952 953 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) 954 { 955 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; 956 struct vgic_irq *irq; 957 bool pending = false; 958 unsigned long flags; 959 struct vgic_vmcr vmcr; 960 961 if (!vcpu->kvm->arch.vgic.enabled) 962 return false; 963 964 if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) 965 return true; 966 967 vgic_get_vmcr(vcpu, &vmcr); 968 969 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); 970 971 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { 972 raw_spin_lock(&irq->irq_lock); 973 pending = irq_is_pending(irq) && irq->enabled && 974 !irq->active && 975 irq->priority < vmcr.pmr; 976 raw_spin_unlock(&irq->irq_lock); 977 978 if (pending) 979 break; 980 } 981 982 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); 983 984 return pending; 985 } 986 987 void vgic_kick_vcpus(struct kvm *kvm) 988 { 989 struct kvm_vcpu *vcpu; 990 unsigned long c; 991 992 /* 993 * We've injected an interrupt, time to find out who deserves 994 * a good kick... 995 */ 996 kvm_for_each_vcpu(c, vcpu, kvm) { 997 if (kvm_vgic_vcpu_pending_irq(vcpu)) { 998 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); 999 kvm_vcpu_kick(vcpu); 1000 } 1001 } 1002 } 1003 1004 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) 1005 { 1006 struct vgic_irq *irq; 1007 bool map_is_active; 1008 unsigned long flags; 1009 1010 if (!vgic_initialized(vcpu->kvm)) 1011 return false; 1012 1013 irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); 1014 raw_spin_lock_irqsave(&irq->irq_lock, flags); 1015 map_is_active = irq->hw && irq->active; 1016 raw_spin_unlock_irqrestore(&irq->irq_lock, flags); 1017 vgic_put_irq(vcpu->kvm, irq); 1018 1019 return map_is_active; 1020 } 1021 1022 /* 1023 * Level-triggered mapped IRQs are special because we only observe rising 1024 * edges as input to the VGIC. 1025 * 1026 * If the guest never acked the interrupt we have to sample the physical 1027 * line and set the line level, because the device state could have changed 1028 * or we simply need to process the still pending interrupt later. 1029 * 1030 * We could also have entered the guest with the interrupt active+pending. 1031 * On the next exit, we need to re-evaluate the pending state, as it could 1032 * otherwise result in a spurious interrupt by injecting a now potentially 1033 * stale pending state. 1034 * 1035 * If this causes us to lower the level, we have to also clear the physical 1036 * active state, since we will otherwise never be told when the interrupt 1037 * becomes asserted again. 1038 * 1039 * Another case is when the interrupt requires a helping hand on 1040 * deactivation (no HW deactivation, for example). 1041 */ 1042 void vgic_irq_handle_resampling(struct vgic_irq *irq, 1043 bool lr_deactivated, bool lr_pending) 1044 { 1045 if (vgic_irq_is_mapped_level(irq)) { 1046 bool resample = false; 1047 1048 if (unlikely(vgic_irq_needs_resampling(irq))) { 1049 resample = !(irq->active || irq->pending_latch); 1050 } else if (lr_pending || (lr_deactivated && irq->line_level)) { 1051 irq->line_level = vgic_get_phys_line_level(irq); 1052 resample = !irq->line_level; 1053 } 1054 1055 if (resample) 1056 vgic_irq_set_phys_active(irq, false); 1057 } 1058 } 1059