xref: /linux/arch/arm64/kvm/vgic/vgic.c (revision 9e4e86a604dfd06402933467578c4b79f5412b2c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2015, 2016 ARM Ltd.
4  */
5 
6 #include <linux/interrupt.h>
7 #include <linux/irq.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list_sort.h>
11 #include <linux/nospec.h>
12 
13 #include <asm/kvm_hyp.h>
14 
15 #include "vgic.h"
16 
17 #define CREATE_TRACE_POINTS
18 #include "trace.h"
19 
20 struct vgic_global kvm_vgic_global_state __ro_after_init = {
21 	.gicv3_cpuif = STATIC_KEY_FALSE_INIT,
22 };
23 
24 /*
25  * Locking order is always:
26  * kvm->lock (mutex)
27  *   vcpu->mutex (mutex)
28  *     kvm->arch.config_lock (mutex)
29  *       its->cmd_lock (mutex)
30  *         its->its_lock (mutex)
31  *           vgic_dist->lpi_xa.xa_lock		must be taken with IRQs disabled
32  *             vgic_cpu->ap_list_lock		must be taken with IRQs disabled
33  *               vgic_irq->irq_lock		must be taken with IRQs disabled
34  *
35  * As the ap_list_lock might be taken from the timer interrupt handler,
36  * we have to disable IRQs before taking this lock and everything lower
37  * than it.
38  *
39  * The config_lock has additional ordering requirements:
40  * kvm->slots_lock
41  *   kvm->srcu
42  *     kvm->arch.config_lock
43  *
44  * If you need to take multiple locks, always take the upper lock first,
45  * then the lower ones, e.g. first take the its_lock, then the irq_lock.
46  * If you are already holding a lock and need to take a higher one, you
47  * have to drop the lower ranking lock first and re-acquire it after having
48  * taken the upper one.
49  *
50  * When taking more than one ap_list_lock at the same time, always take the
51  * lowest numbered VCPU's ap_list_lock first, so:
52  *   vcpuX->vcpu_id < vcpuY->vcpu_id:
53  *     raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
54  *     raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
55  *
56  * Since the VGIC must support injecting virtual interrupts from ISRs, we have
57  * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
58  * spinlocks for any lock that may be taken while injecting an interrupt.
59  */
60 
61 /*
62  * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
63  * structure. The caller is expected to call vgic_put_irq() later once it's
64  * finished with the IRQ.
65  */
vgic_get_lpi(struct kvm * kvm,u32 intid)66 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
67 {
68 	struct vgic_dist *dist = &kvm->arch.vgic;
69 	struct vgic_irq *irq = NULL;
70 
71 	rcu_read_lock();
72 
73 	irq = xa_load(&dist->lpi_xa, intid);
74 	if (!vgic_try_get_irq_ref(irq))
75 		irq = NULL;
76 
77 	rcu_read_unlock();
78 
79 	return irq;
80 }
81 
82 /*
83  * This looks up the virtual interrupt ID to get the corresponding
84  * struct vgic_irq. It also increases the refcount, so any caller is expected
85  * to call vgic_put_irq() once it's finished with this IRQ.
86  */
vgic_get_irq(struct kvm * kvm,u32 intid)87 struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
88 {
89 	/* Non-private IRQs are not yet implemented for GICv5 */
90 	if (vgic_is_v5(kvm))
91 		return NULL;
92 
93 	/* SPIs */
94 	if (intid >= VGIC_NR_PRIVATE_IRQS &&
95 	    intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
96 		intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
97 		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
98 	}
99 
100 	/* LPIs */
101 	if (irq_is_lpi(kvm, intid))
102 		return vgic_get_lpi(kvm, intid);
103 
104 	return NULL;
105 }
106 
vgic_get_vcpu_irq(struct kvm_vcpu * vcpu,u32 intid)107 struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
108 {
109 	if (WARN_ON(!vcpu))
110 		return NULL;
111 
112 	if (vgic_is_v5(vcpu->kvm)) {
113 		u32 int_num, hwirq_id;
114 
115 		if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
116 			return NULL;
117 
118 		hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
119 		int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
120 
121 		return &vcpu->arch.vgic_cpu.private_irqs[int_num];
122 	}
123 
124 	/* SGIs and PPIs */
125 	if (intid < VGIC_NR_PRIVATE_IRQS) {
126 		intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
127 		return &vcpu->arch.vgic_cpu.private_irqs[intid];
128 	}
129 
130 	return vgic_get_irq(vcpu->kvm, intid);
131 }
132 
vgic_release_lpi_locked(struct vgic_dist * dist,struct vgic_irq * irq)133 static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq)
134 {
135 	lockdep_assert_held(&dist->lpi_xa.xa_lock);
136 	__xa_erase(&dist->lpi_xa, irq->intid);
137 	kfree_rcu(irq, rcu);
138 }
139 
__vgic_put_irq(struct kvm * kvm,struct vgic_irq * irq)140 static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
141 {
142 	if (!irq_is_lpi(kvm, irq->intid))
143 		return false;
144 
145 	return refcount_dec_and_test(&irq->refcount);
146 }
147 
vgic_put_irq_norelease(struct kvm * kvm,struct vgic_irq * irq)148 static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq)
149 {
150 	if (!__vgic_put_irq(kvm, irq))
151 		return false;
152 
153 	irq->pending_release = true;
154 	return true;
155 }
156 
vgic_put_irq(struct kvm * kvm,struct vgic_irq * irq)157 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
158 {
159 	struct vgic_dist *dist = &kvm->arch.vgic;
160 	unsigned long flags;
161 
162 	/*
163 	 * Normally the lock is only taken when the refcount drops to 0.
164 	 * Acquire/release it early on lockdep kernels to make locking issues
165 	 * in rare release paths a bit more obvious.
166 	 */
167 	if (IS_ENABLED(CONFIG_LOCKDEP) && irq_is_lpi(kvm, irq->intid)) {
168 		guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
169 	}
170 
171 	if (!__vgic_put_irq(kvm, irq))
172 		return;
173 
174 	xa_lock_irqsave(&dist->lpi_xa, flags);
175 	vgic_release_lpi_locked(dist, irq);
176 	xa_unlock_irqrestore(&dist->lpi_xa, flags);
177 }
178 
vgic_release_deleted_lpis(struct kvm * kvm)179 static void vgic_release_deleted_lpis(struct kvm *kvm)
180 {
181 	struct vgic_dist *dist = &kvm->arch.vgic;
182 	unsigned long flags, intid;
183 	struct vgic_irq *irq;
184 
185 	xa_lock_irqsave(&dist->lpi_xa, flags);
186 
187 	xa_for_each(&dist->lpi_xa, intid, irq) {
188 		if (irq->pending_release)
189 			vgic_release_lpi_locked(dist, irq);
190 	}
191 
192 	xa_unlock_irqrestore(&dist->lpi_xa, flags);
193 }
194 
vgic_flush_pending_lpis(struct kvm_vcpu * vcpu)195 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
196 {
197 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
198 	struct vgic_irq *irq, *tmp;
199 	bool deleted = false;
200 	unsigned long flags;
201 
202 	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
203 
204 	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
205 		if (irq_is_lpi(vcpu->kvm, irq->intid)) {
206 			raw_spin_lock(&irq->irq_lock);
207 			list_del(&irq->ap_list);
208 			irq->vcpu = NULL;
209 			raw_spin_unlock(&irq->irq_lock);
210 			deleted |= vgic_put_irq_norelease(vcpu->kvm, irq);
211 		}
212 	}
213 
214 	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
215 
216 	if (deleted)
217 		vgic_release_deleted_lpis(vcpu->kvm);
218 }
219 
vgic_irq_set_phys_pending(struct vgic_irq * irq,bool pending)220 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
221 {
222 	WARN_ON(irq_set_irqchip_state(irq->host_irq,
223 				      IRQCHIP_STATE_PENDING,
224 				      pending));
225 }
226 
vgic_get_phys_line_level(struct vgic_irq * irq)227 bool vgic_get_phys_line_level(struct vgic_irq *irq)
228 {
229 	bool line_level;
230 
231 	BUG_ON(!irq->hw);
232 
233 	if (irq->ops && irq->ops->get_input_level)
234 		return irq->ops->get_input_level(irq->intid);
235 
236 	WARN_ON(irq_get_irqchip_state(irq->host_irq,
237 				      IRQCHIP_STATE_PENDING,
238 				      &line_level));
239 	return line_level;
240 }
241 
242 /* Set/Clear the physical active state */
vgic_irq_set_phys_active(struct vgic_irq * irq,bool active)243 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
244 {
245 
246 	BUG_ON(!irq->hw);
247 	WARN_ON(irq_set_irqchip_state(irq->host_irq,
248 				      IRQCHIP_STATE_ACTIVE,
249 				      active));
250 }
251 
252 /**
253  * vgic_target_oracle - compute the target vcpu for an irq
254  *
255  * @irq:	The irq to route. Must be already locked.
256  *
257  * Based on the current state of the interrupt (enabled, pending,
258  * active, vcpu and target_vcpu), compute the next vcpu this should be
259  * given to. Return NULL if this shouldn't be injected at all.
260  *
261  * Requires the IRQ lock to be held.
262  */
vgic_target_oracle(struct vgic_irq * irq)263 struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
264 {
265 	lockdep_assert_held(&irq->irq_lock);
266 
267 	/* If the interrupt is active, it must stay on the current vcpu */
268 	if (irq->active)
269 		return irq->vcpu ? : irq->target_vcpu;
270 
271 	/*
272 	 * If the IRQ is not active but enabled and pending, we should direct
273 	 * it to its configured target VCPU.
274 	 * If the distributor is disabled, pending interrupts shouldn't be
275 	 * forwarded.
276 	 */
277 	if (irq->enabled && irq_is_pending(irq)) {
278 		if (unlikely(irq->target_vcpu &&
279 			     !irq->target_vcpu->kvm->arch.vgic.enabled))
280 			return NULL;
281 
282 		return irq->target_vcpu;
283 	}
284 
285 	/* If neither active nor pending and enabled, then this IRQ should not
286 	 * be queued to any VCPU.
287 	 */
288 	return NULL;
289 }
290 
291 struct vgic_sort_info {
292 	struct kvm_vcpu *vcpu;
293 	struct vgic_vmcr vmcr;
294 };
295 
296 /*
297  * The order of items in the ap_lists defines how we'll pack things in LRs as
298  * well, the first items in the list being the first things populated in the
299  * LRs.
300  *
301  * Pending, non-active interrupts must be placed at the head of the list.
302  * Otherwise things should be sorted by the priority field and the GIC
303  * hardware support will take care of preemption of priority groups etc.
304  * Interrupts that are not deliverable should be at the end of the list.
305  *
306  * Return negative if "a" sorts before "b", 0 to preserve order, and positive
307  * to sort "b" before "a".
308  */
vgic_irq_cmp(void * priv,const struct list_head * a,const struct list_head * b)309 static int vgic_irq_cmp(void *priv, const struct list_head *a,
310 			const struct list_head *b)
311 {
312 	struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
313 	struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
314 	struct vgic_sort_info *info = priv;
315 	struct kvm_vcpu *vcpu = info->vcpu;
316 	bool penda, pendb;
317 	int ret;
318 
319 	/*
320 	 * list_sort may call this function with the same element when
321 	 * the list is fairly long.
322 	 */
323 	if (unlikely(irqa == irqb))
324 		return 0;
325 
326 	raw_spin_lock(&irqa->irq_lock);
327 	raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
328 
329 	/* Undeliverable interrupts should be last */
330 	ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu);
331 	if (ret)
332 		goto out;
333 
334 	/* Same thing for interrupts targeting a disabled group */
335 	ret =  (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
336 	ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
337 	if (ret)
338 		goto out;
339 
340 	penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active;
341 	pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active;
342 
343 	ret = (int)pendb - (int)penda;
344 	if (ret)
345 		goto out;
346 
347 	/* Both pending and enabled, sort by priority (lower number first) */
348 	ret = (int)irqa->priority - (int)irqb->priority;
349 	if (ret)
350 		goto out;
351 
352 	/* Finally, HW bit active interrupts have priority over non-HW ones */
353 	ret = (int)irqb->hw - (int)irqa->hw;
354 
355 out:
356 	raw_spin_unlock(&irqb->irq_lock);
357 	raw_spin_unlock(&irqa->irq_lock);
358 	return ret;
359 }
360 
361 /* Must be called with the ap_list_lock held */
vgic_sort_ap_list(struct kvm_vcpu * vcpu)362 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
363 {
364 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
365 	struct vgic_sort_info info = { .vcpu = vcpu, };
366 
367 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
368 
369 	vgic_get_vmcr(vcpu, &info.vmcr);
370 	list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp);
371 }
372 
373 /*
374  * Only valid injection if changing level for level-triggered IRQs or for a
375  * rising edge, and in-kernel connected IRQ lines can only be controlled by
376  * their owner.
377  */
vgic_validate_injection(struct vgic_irq * irq,bool level,void * owner)378 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
379 {
380 	if (irq->owner != owner)
381 		return false;
382 
383 	switch (irq->config) {
384 	case VGIC_CONFIG_LEVEL:
385 		return irq->line_level != level;
386 	case VGIC_CONFIG_EDGE:
387 		return level;
388 	}
389 
390 	return false;
391 }
392 
vgic_model_needs_bcst_kick(struct kvm * kvm)393 static bool vgic_model_needs_bcst_kick(struct kvm *kvm)
394 {
395 	/*
396 	 * A GICv3 (or GICv3-like) system exposing a GICv3 to the guest
397 	 * needs a broadcast kick to set TDIR globally.
398 	 *
399 	 * For systems that do not have TDIR (ARM's own v8.0 CPUs), the
400 	 * shadow TDIR bit is always set, and so is the register's TC bit,
401 	 * so no need to kick the CPUs.
402 	 */
403 	return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) &&
404 		kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
405 }
406 
407 /*
408  * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
409  * Do the queuing if necessary, taking the right locks in the right order.
410  * Returns true when the IRQ was queued, false otherwise.
411  *
412  * Needs to be entered with the IRQ lock already held, but will return
413  * with all locks dropped.
414  */
vgic_queue_irq_unlock(struct kvm * kvm,struct vgic_irq * irq,unsigned long flags)415 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
416 			   unsigned long flags) __releases(&irq->irq_lock)
417 {
418 	struct kvm_vcpu *vcpu;
419 	bool bcast;
420 
421 	lockdep_assert_held(&irq->irq_lock);
422 
423 	if (irq->ops && irq->ops->queue_irq_unlock)
424 		return irq->ops->queue_irq_unlock(kvm, irq, flags);
425 
426 retry:
427 	vcpu = vgic_target_oracle(irq);
428 	if (irq->vcpu || !vcpu) {
429 		/*
430 		 * If this IRQ is already on a VCPU's ap_list, then it
431 		 * cannot be moved or modified and there is no more work for
432 		 * us to do.
433 		 *
434 		 * Otherwise, if the irq is not pending and enabled, it does
435 		 * not need to be inserted into an ap_list and there is also
436 		 * no more work for us to do.
437 		 */
438 		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
439 
440 		/*
441 		 * We have to kick the VCPU here, because we could be
442 		 * queueing an edge-triggered interrupt for which we
443 		 * get no EOI maintenance interrupt. In that case,
444 		 * while the IRQ is already on the VCPU's AP list, the
445 		 * VCPU could have EOI'ed the original interrupt and
446 		 * won't see this one until it exits for some other
447 		 * reason.
448 		 */
449 		if (vcpu) {
450 			kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
451 			kvm_vcpu_kick(vcpu);
452 		}
453 		return false;
454 	}
455 
456 	/*
457 	 * We must unlock the irq lock to take the ap_list_lock where
458 	 * we are going to insert this new pending interrupt.
459 	 */
460 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
461 
462 	/* someone can do stuff here, which we re-check below */
463 
464 	raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
465 	raw_spin_lock(&irq->irq_lock);
466 
467 	/*
468 	 * Did something change behind our backs?
469 	 *
470 	 * There are two cases:
471 	 * 1) The irq lost its pending state or was disabled behind our
472 	 *    backs and/or it was queued to another VCPU's ap_list.
473 	 * 2) Someone changed the affinity on this irq behind our
474 	 *    backs and we are now holding the wrong ap_list_lock.
475 	 *
476 	 * In both cases, drop the locks and retry.
477 	 */
478 
479 	if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
480 		raw_spin_unlock(&irq->irq_lock);
481 		raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
482 					   flags);
483 
484 		raw_spin_lock_irqsave(&irq->irq_lock, flags);
485 		goto retry;
486 	}
487 
488 	/*
489 	 * Grab a reference to the irq to reflect the fact that it is
490 	 * now in the ap_list. This is safe as the caller must already hold a
491 	 * reference on the irq.
492 	 */
493 	vgic_get_irq_ref(irq);
494 	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
495 	irq->vcpu = vcpu;
496 
497 	/* A new SPI may result in deactivation trapping on all vcpus */
498 	bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) &&
499 		 vgic_valid_spi(vcpu->kvm, irq->intid) &&
500 		 atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0);
501 
502 	raw_spin_unlock(&irq->irq_lock);
503 	raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
504 
505 	if (!bcast) {
506 		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
507 		kvm_vcpu_kick(vcpu);
508 	} else {
509 		kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING);
510 	}
511 
512 	return true;
513 }
514 
515 /**
516  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
517  * @kvm:     The VM structure pointer
518  * @vcpu:    The CPU for PPIs or NULL for global interrupts
519  * @intid:   The INTID to inject a new state to.
520  * @level:   Edge-triggered:  true:  to trigger the interrupt
521  *			      false: to ignore the call
522  *	     Level-sensitive  true:  raise the input signal
523  *			      false: lower the input signal
524  * @owner:   The opaque pointer to the owner of the IRQ being raised to verify
525  *           that the caller is allowed to inject this IRQ.  Userspace
526  *           injections will have owner == NULL.
527  *
528  * The VGIC is not concerned with devices being active-LOW or active-HIGH for
529  * level-sensitive interrupts.  You can think of the level parameter as 1
530  * being HIGH and 0 being LOW and all devices being active-HIGH.
531  */
kvm_vgic_inject_irq(struct kvm * kvm,struct kvm_vcpu * vcpu,unsigned int intid,bool level,void * owner)532 int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
533 			unsigned int intid, bool level, void *owner)
534 {
535 	struct vgic_irq *irq;
536 	unsigned long flags;
537 	int ret;
538 
539 	ret = vgic_lazy_init(kvm);
540 	if (ret)
541 		return ret;
542 
543 	if (!vcpu && irq_is_private(kvm, intid))
544 		return -EINVAL;
545 
546 	trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
547 
548 	if (irq_is_private(kvm, intid))
549 		irq = vgic_get_vcpu_irq(vcpu, intid);
550 	else
551 		irq = vgic_get_irq(kvm, intid);
552 	if (!irq)
553 		return -EINVAL;
554 
555 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
556 
557 	if (!vgic_validate_injection(irq, level, owner)) {
558 		/* Nothing to see here, move along... */
559 		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
560 		vgic_put_irq(kvm, irq);
561 		return 0;
562 	}
563 
564 	if (irq->config == VGIC_CONFIG_LEVEL)
565 		irq->line_level = level;
566 	else
567 		irq->pending_latch = true;
568 
569 	vgic_queue_irq_unlock(kvm, irq, flags);
570 	vgic_put_irq(kvm, irq);
571 
572 	return 0;
573 }
574 
kvm_vgic_set_irq_ops(struct kvm_vcpu * vcpu,u32 vintid,struct irq_ops * ops)575 void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
576 			  struct irq_ops *ops)
577 {
578 	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
579 
580 	BUG_ON(!irq);
581 
582 	scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
583 		irq->ops = ops;
584 
585 	vgic_put_irq(vcpu->kvm, irq);
586 }
587 
kvm_vgic_clear_irq_ops(struct kvm_vcpu * vcpu,u32 vintid)588 void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid)
589 {
590 	kvm_vgic_set_irq_ops(vcpu, vintid, NULL);
591 }
592 
593 /* @irq->irq_lock must be held */
kvm_vgic_map_irq(struct kvm_vcpu * vcpu,struct vgic_irq * irq,unsigned int host_irq)594 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
595 			    unsigned int host_irq)
596 {
597 	struct irq_desc *desc;
598 	struct irq_data *data;
599 
600 	/*
601 	 * Find the physical IRQ number corresponding to @host_irq
602 	 */
603 	desc = irq_to_desc(host_irq);
604 	if (!desc) {
605 		kvm_err("%s: no interrupt descriptor\n", __func__);
606 		return -EINVAL;
607 	}
608 	data = irq_desc_get_irq_data(desc);
609 	while (data->parent_data)
610 		data = data->parent_data;
611 
612 	irq->hw = true;
613 	irq->host_irq = host_irq;
614 	irq->hwintid = data->hwirq;
615 
616 	if (irq->ops && irq->ops->set_direct_injection)
617 		irq->ops->set_direct_injection(vcpu, irq, true);
618 
619 	return 0;
620 }
621 
622 /* @irq->irq_lock must be held */
kvm_vgic_unmap_irq(struct vgic_irq * irq)623 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
624 {
625 	if (irq->ops && irq->ops->set_direct_injection)
626 		irq->ops->set_direct_injection(irq->target_vcpu, irq, false);
627 
628 	irq->hw = false;
629 	irq->hwintid = 0;
630 }
631 
kvm_vgic_map_phys_irq(struct kvm_vcpu * vcpu,unsigned int host_irq,u32 vintid)632 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
633 			  u32 vintid)
634 {
635 	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
636 	unsigned long flags;
637 	int ret;
638 
639 	BUG_ON(!irq);
640 
641 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
642 	ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
643 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
644 	vgic_put_irq(vcpu->kvm, irq);
645 
646 	return ret;
647 }
648 
649 /**
650  * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
651  * @vcpu: The VCPU pointer
652  * @vintid: The INTID of the interrupt
653  *
654  * Reset the active and pending states of a mapped interrupt.  Kernel
655  * subsystems injecting mapped interrupts should reset their interrupt lines
656  * when we are doing a reset of the VM.
657  */
kvm_vgic_reset_mapped_irq(struct kvm_vcpu * vcpu,u32 vintid)658 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
659 {
660 	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
661 	unsigned long flags;
662 
663 	if (!irq->hw)
664 		goto out;
665 
666 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
667 	irq->active = false;
668 	irq->pending_latch = false;
669 	irq->line_level = false;
670 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
671 out:
672 	vgic_put_irq(vcpu->kvm, irq);
673 }
674 
kvm_vgic_unmap_phys_irq(struct kvm_vcpu * vcpu,unsigned int vintid)675 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
676 {
677 	struct vgic_irq *irq;
678 	unsigned long flags;
679 
680 	if (!vgic_initialized(vcpu->kvm))
681 		return -EAGAIN;
682 
683 	irq = vgic_get_vcpu_irq(vcpu, vintid);
684 	BUG_ON(!irq);
685 
686 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
687 	kvm_vgic_unmap_irq(irq);
688 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
689 	vgic_put_irq(vcpu->kvm, irq);
690 
691 	return 0;
692 }
693 
kvm_vgic_get_map(struct kvm_vcpu * vcpu,unsigned int vintid)694 int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
695 {
696 	struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
697 	unsigned long flags;
698 	int ret = -1;
699 
700 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
701 	if (irq->hw)
702 		ret = irq->hwintid;
703 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
704 
705 	vgic_put_irq(vcpu->kvm, irq);
706 	return ret;
707 }
708 
709 /**
710  * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
711  *
712  * @vcpu:   Pointer to the VCPU (used for PPIs)
713  * @intid:  The virtual INTID identifying the interrupt (PPI or SPI)
714  * @owner:  Opaque pointer to the owner
715  *
716  * Returns 0 if intid is not already used by another in-kernel device and the
717  * owner is set, otherwise returns an error code.
718  */
kvm_vgic_set_owner(struct kvm_vcpu * vcpu,unsigned int intid,void * owner)719 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
720 {
721 	struct vgic_irq *irq;
722 	unsigned long flags;
723 	int ret = 0;
724 
725 	if (!vgic_initialized(vcpu->kvm))
726 		return -EAGAIN;
727 
728 	/* SGIs and LPIs cannot be wired up to any device */
729 	if (!irq_is_ppi(vcpu->kvm, intid) && !vgic_valid_spi(vcpu->kvm, intid))
730 		return -EINVAL;
731 
732 	irq = vgic_get_vcpu_irq(vcpu, intid);
733 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
734 	if (irq->owner && irq->owner != owner)
735 		ret = -EEXIST;
736 	else
737 		irq->owner = owner;
738 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
739 
740 	return ret;
741 }
742 
743 /**
744  * vgic_prune_ap_list - Remove non-relevant interrupts from the list
745  *
746  * @vcpu: The VCPU pointer
747  *
748  * Go over the list of "interesting" interrupts, and prune those that we
749  * won't have to consider in the near future.
750  */
vgic_prune_ap_list(struct kvm_vcpu * vcpu)751 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
752 {
753 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
754 	struct vgic_irq *irq, *tmp;
755 	bool deleted_lpis = false;
756 
757 	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
758 
759 retry:
760 	raw_spin_lock(&vgic_cpu->ap_list_lock);
761 
762 	list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
763 		struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
764 		bool target_vcpu_needs_kick = false;
765 
766 		raw_spin_lock(&irq->irq_lock);
767 
768 		BUG_ON(vcpu != irq->vcpu);
769 
770 		target_vcpu = vgic_target_oracle(irq);
771 
772 		if (!target_vcpu) {
773 			/*
774 			 * We don't need to process this interrupt any
775 			 * further, move it off the list.
776 			 */
777 			list_del(&irq->ap_list);
778 			irq->vcpu = NULL;
779 			raw_spin_unlock(&irq->irq_lock);
780 
781 			/*
782 			 * This vgic_put_irq call matches the
783 			 * vgic_get_irq_ref in vgic_queue_irq_unlock,
784 			 * where we added the LPI to the ap_list. As
785 			 * we remove the irq from the list, we drop
786 			 * also drop the refcount.
787 			 */
788 			deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq);
789 			continue;
790 		}
791 
792 		if (target_vcpu == vcpu) {
793 			/* We're on the right CPU */
794 			raw_spin_unlock(&irq->irq_lock);
795 			continue;
796 		}
797 
798 		/* This interrupt looks like it has to be migrated. */
799 
800 		raw_spin_unlock(&irq->irq_lock);
801 		raw_spin_unlock(&vgic_cpu->ap_list_lock);
802 
803 		/*
804 		 * Ensure locking order by always locking the smallest
805 		 * ID first.
806 		 */
807 		if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
808 			vcpuA = vcpu;
809 			vcpuB = target_vcpu;
810 		} else {
811 			vcpuA = target_vcpu;
812 			vcpuB = vcpu;
813 		}
814 
815 		raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
816 		raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
817 				      SINGLE_DEPTH_NESTING);
818 		raw_spin_lock(&irq->irq_lock);
819 
820 		/*
821 		 * If the affinity has been preserved, move the
822 		 * interrupt around. Otherwise, it means things have
823 		 * changed while the interrupt was unlocked, and we
824 		 * need to replay this.
825 		 *
826 		 * In all cases, we cannot trust the list not to have
827 		 * changed, so we restart from the beginning.
828 		 */
829 		if (target_vcpu == vgic_target_oracle(irq)) {
830 			struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
831 
832 			list_del(&irq->ap_list);
833 			irq->vcpu = target_vcpu;
834 			list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
835 			target_vcpu_needs_kick = true;
836 		}
837 
838 		raw_spin_unlock(&irq->irq_lock);
839 		raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
840 		raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
841 
842 		if (target_vcpu_needs_kick) {
843 			kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
844 			kvm_vcpu_kick(target_vcpu);
845 		}
846 
847 		goto retry;
848 	}
849 
850 	raw_spin_unlock(&vgic_cpu->ap_list_lock);
851 
852 	if (unlikely(deleted_lpis))
853 		vgic_release_deleted_lpis(vcpu->kvm);
854 }
855 
vgic_fold_state(struct kvm_vcpu * vcpu)856 static void vgic_fold_state(struct kvm_vcpu *vcpu)
857 {
858 	if (vgic_is_v5(vcpu->kvm)) {
859 		vgic_v5_fold_ppi_state(vcpu);
860 		return;
861 	}
862 
863 	if (!*host_data_ptr(last_lr_irq))
864 		return;
865 
866 	if (kvm_vgic_global_state.type == VGIC_V2)
867 		vgic_v2_fold_lr_state(vcpu);
868 	else
869 		vgic_v3_fold_lr_state(vcpu);
870 }
871 
872 /* Requires the irq_lock to be held. */
vgic_populate_lr(struct kvm_vcpu * vcpu,struct vgic_irq * irq,int lr)873 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
874 				    struct vgic_irq *irq, int lr)
875 {
876 	lockdep_assert_held(&irq->irq_lock);
877 
878 	if (kvm_vgic_global_state.type == VGIC_V2)
879 		vgic_v2_populate_lr(vcpu, irq, lr);
880 	else
881 		vgic_v3_populate_lr(vcpu, irq, lr);
882 }
883 
vgic_clear_lr(struct kvm_vcpu * vcpu,int lr)884 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
885 {
886 	if (kvm_vgic_global_state.type == VGIC_V2)
887 		vgic_v2_clear_lr(vcpu, lr);
888 	else
889 		vgic_v3_clear_lr(vcpu, lr);
890 }
891 
summarize_ap_list(struct kvm_vcpu * vcpu,struct ap_list_summary * als)892 static void summarize_ap_list(struct kvm_vcpu *vcpu,
893 			      struct ap_list_summary *als)
894 {
895 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
896 	struct vgic_irq *irq;
897 
898 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
899 
900 	*als = (typeof(*als)){};
901 
902 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
903 		guard(raw_spinlock)(&irq->irq_lock);
904 
905 		if (unlikely(vgic_target_oracle(irq) != vcpu))
906 			continue;
907 
908 		if (!irq->active)
909 			als->nr_pend++;
910 		else
911 			als->nr_act++;
912 
913 		if (irq->intid < VGIC_NR_SGIS)
914 			als->nr_sgi++;
915 	}
916 }
917 
918 /*
919  * Dealing with LR overflow is close to black magic -- dress accordingly.
920  *
921  * We have to present an almost infinite number of interrupts through a very
922  * limited number of registers. Therefore crucial decisions must be made to
923  * ensure we feed the most relevant interrupts into the LRs, and yet have
924  * some facilities to let the guest interact with those that are not there.
925  *
926  * All considerations below are in the context of interrupts targeting a
927  * single vcpu with non-idle state (either pending, active, or both),
928  * colloquially called the ap_list:
929  *
930  * - Pending interrupts must have priority over active interrupts. This also
931  *   excludes pending+active interrupts. This ensures that a guest can
932  *   perform priority drops on any number of interrupts, and yet be
933  *   presented the next pending one.
934  *
935  * - Deactivation of interrupts outside of the LRs must be tracked by using
936  *   either the EOIcount-driven maintenance interrupt, and sometimes by
937  *   trapping the DIR register.
938  *
939  * - For EOImode=0, a non-zero EOIcount means walking the ap_list past the
940  *   point that made it into the LRs, and deactivate interrupts that would
941  *   have made it onto the LRs if we had the space.
942  *
943  * - The MI-generation bits must be used to try and force an exit when the
944  *   guest has done enough changes to the LRs that we want to reevaluate the
945  *   situation:
946  *
947  *	- if the total number of pending interrupts exceeds the number of
948  *	  LR, NPIE must be set in order to exit once no pending interrupts
949  *	  are present in the LRs, allowing us to populate the next batch.
950  *
951  *	- if there are active interrupts outside of the LRs, then LRENPIE
952  *	  must be set so that we exit on deactivation of one of these, and
953  *	  work out which one is to be deactivated.  Note that this is not
954  *	  enough to deal with EOImode=1, see below.
955  *
956  *	- if the overall number of interrupts exceeds the number of LRs,
957  *	  then UIE must be set to allow refilling of the LRs once the
958  *	  majority of them has been processed.
959  *
960  *	- as usual, MI triggers are only an optimisation, since we cannot
961  *        rely on the MI being delivered in timely manner...
962  *
963  * - EOImode=1 creates some additional problems:
964  *
965  *      - deactivation can happen in any order, and we cannot rely on
966  *	  EOImode=0's coupling of priority-drop and deactivation which
967  *	  imposes strict reverse Ack order. This means that DIR must
968  *	  trap if we have active interrupts outside of the LRs.
969  *
970  *      - deactivation of SPIs can occur on any CPU, while the SPI is only
971  *	  present in the ap_list of the CPU that actually ack-ed it. In that
972  *	  case, EOIcount doesn't provide enough information, and we must
973  *	  resort to trapping DIR even if we don't overflow the LRs. Bonus
974  *	  point for not trapping DIR when no SPIs are pending or active in
975  *	  the whole VM.
976  *
977  *	- LPIs do not suffer the same problem as SPIs on deactivation, as we
978  *	  have to essentially discard the active state, see below.
979  *
980  * - Virtual LPIs have an active state (surprise!), which gets removed on
981  *   priority drop (EOI). However, EOIcount doesn't get bumped when the LPI
982  *   is not present in the LR (surprise again!). Special care must therefore
983  *   be taken to remove the active state from any activated LPI when exiting
984  *   from the guest. This is in a way no different from what happens on the
985  *   physical side. We still rely on the running priority to have been
986  *   removed from the APRs, irrespective of the LPI being present in the LRs
987  *   or not.
988  *
989  * - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as
990  *   they are not managed in SW and don't have a true active state. So only
991  *   set vSGIEOICount when no SGIs are in the ap_list.
992  *
993  * - GICv2 SGIs with multiple sources are injected one source at a time, as
994  *   if they were made pending sequentially. This may mean that we don't
995  *   always present the HPPI if other interrupts with lower priority are
996  *   pending in the LRs. Big deal.
997  */
vgic_flush_lr_state(struct kvm_vcpu * vcpu)998 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
999 {
1000 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1001 	struct ap_list_summary als;
1002 	struct vgic_irq *irq;
1003 	int count = 0;
1004 
1005 	lockdep_assert_held(&vgic_cpu->ap_list_lock);
1006 
1007 	summarize_ap_list(vcpu, &als);
1008 
1009 	if (irqs_outside_lrs(&als))
1010 		vgic_sort_ap_list(vcpu);
1011 
1012 	*host_data_ptr(last_lr_irq) = NULL;
1013 
1014 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
1015 		scoped_guard(raw_spinlock,  &irq->irq_lock) {
1016 			if (likely(vgic_target_oracle(irq) == vcpu)) {
1017 				vgic_populate_lr(vcpu, irq, count++);
1018 				*host_data_ptr(last_lr_irq) = irq;
1019 			}
1020 		}
1021 
1022 		if (count == kvm_vgic_global_state.nr_lr)
1023 			break;
1024 	}
1025 
1026 	/* Nuke remaining LRs */
1027 	for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++)
1028 		vgic_clear_lr(vcpu, i);
1029 
1030 	if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
1031 		vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
1032 		vgic_v2_configure_hcr(vcpu, &als);
1033 	} else {
1034 		vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
1035 		vgic_v3_configure_hcr(vcpu, &als);
1036 	}
1037 }
1038 
can_access_vgic_from_kernel(void)1039 static inline bool can_access_vgic_from_kernel(void)
1040 {
1041 	/*
1042 	 * GICv2 can always be accessed from the kernel because it is
1043 	 * memory-mapped, and VHE systems can access GICv3 EL2 system
1044 	 * registers.
1045 	 */
1046 	return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
1047 }
1048 
vgic_save_state(struct kvm_vcpu * vcpu)1049 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
1050 {
1051 	/* No switch statement here. See comment in vgic_restore_state() */
1052 	if (vgic_is_v5(vcpu->kvm))
1053 		vgic_v5_save_state(vcpu);
1054 	else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1055 		vgic_v2_save_state(vcpu);
1056 	else
1057 		__vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
1058 }
1059 
1060 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
kvm_vgic_sync_hwstate(struct kvm_vcpu * vcpu)1061 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1062 {
1063 	if (vgic_is_v3(vcpu->kvm)) {
1064 		/* If nesting, emulate the HW effect from L0 to L1 */
1065 		if (vgic_state_is_nested(vcpu)) {
1066 			vgic_v3_sync_nested(vcpu);
1067 			return;
1068 		}
1069 
1070 		if (vcpu_has_nv(vcpu))
1071 			vgic_v3_nested_update_mi(vcpu);
1072 	}
1073 
1074 	if (can_access_vgic_from_kernel())
1075 		vgic_save_state(vcpu);
1076 
1077 	vgic_fold_state(vcpu);
1078 
1079 	if (!vgic_is_v5(vcpu->kvm))
1080 		vgic_prune_ap_list(vcpu);
1081 }
1082 
1083 /* Sync interrupts that were deactivated through a DIR trap */
kvm_vgic_process_async_update(struct kvm_vcpu * vcpu)1084 void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
1085 {
1086 	unsigned long flags;
1087 
1088 	/* Make sure we're in the same context as LR handling */
1089 	local_irq_save(flags);
1090 	vgic_prune_ap_list(vcpu);
1091 	local_irq_restore(flags);
1092 }
1093 
vgic_restore_state(struct kvm_vcpu * vcpu)1094 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
1095 {
1096 	/*
1097 	 * As nice as it would be to restructure this code into a switch
1098 	 * statement as can be found elsewhere, the logic quickly gets ugly.
1099 	 *
1100 	 * __vgic_v3_restore_state() is doing a lot of heavy lifting here. It is
1101 	 * required for GICv3-on-GICv3, GICv2-on-GICv3, GICv3-on-GICv5, and the
1102 	 * no-in-kernel-irqchip case on GICv3 hardware. Hence, adding a switch
1103 	 * here results in much more complex code.
1104 	 */
1105 	if (vgic_is_v5(vcpu->kvm))
1106 		vgic_v5_restore_state(vcpu);
1107 	else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1108 		vgic_v2_restore_state(vcpu);
1109 	else
1110 		__vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
1111 }
1112 
vgic_flush_state(struct kvm_vcpu * vcpu)1113 static void vgic_flush_state(struct kvm_vcpu *vcpu)
1114 {
1115 	if (vgic_is_v5(vcpu->kvm)) {
1116 		vgic_v5_flush_ppi_state(vcpu);
1117 		return;
1118 	}
1119 
1120 	scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
1121 		vgic_flush_lr_state(vcpu);
1122 }
1123 
1124 /* Flush our emulation state into the GIC hardware before entering the guest. */
kvm_vgic_flush_hwstate(struct kvm_vcpu * vcpu)1125 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1126 {
1127 	/*
1128 	 * If in a nested state, we must return early. Two possibilities:
1129 	 *
1130 	 * - If we have any pending IRQ for the guest and the guest
1131 	 *   expects IRQs to be handled in its virtual EL2 mode (the
1132 	 *   virtual IMO bit is set) and it is not already running in
1133 	 *   virtual EL2 mode, then we have to emulate an IRQ
1134 	 *   exception to virtual EL2.
1135 	 *
1136 	 *   We do that by placing a request to ourselves which will
1137 	 *   abort the entry procedure and inject the exception at the
1138 	 *   beginning of the run loop.
1139 	 *
1140 	 * - Otherwise, do exactly *NOTHING* apart from enabling the virtual
1141 	 *   CPU interface. The guest state is already loaded, and we can
1142 	 *   carry on with running it.
1143 	 *
1144 	 * If we have NV, but are not in a nested state, compute the
1145 	 * maintenance interrupt state, as it may fire.
1146 	 */
1147 	if (vgic_state_is_nested(vcpu)) {
1148 		if (kvm_vgic_vcpu_pending_irq(vcpu))
1149 			kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
1150 
1151 		vgic_v3_flush_nested(vcpu);
1152 		return;
1153 	}
1154 
1155 	if (vcpu_has_nv(vcpu))
1156 		vgic_v3_nested_update_mi(vcpu);
1157 
1158 	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
1159 
1160 	vgic_flush_state(vcpu);
1161 
1162 	if (can_access_vgic_from_kernel())
1163 		vgic_restore_state(vcpu);
1164 
1165 	if (vgic_supports_direct_irqs(vcpu->kvm) && kvm_vgic_global_state.has_gicv4)
1166 		vgic_v4_commit(vcpu);
1167 }
1168 
kvm_vgic_load(struct kvm_vcpu * vcpu)1169 void kvm_vgic_load(struct kvm_vcpu *vcpu)
1170 {
1171 	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1172 
1173 	if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
1174 		if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1175 			__vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
1176 		return;
1177 	}
1178 
1179 	switch (dist->vgic_model) {
1180 	case KVM_DEV_TYPE_ARM_VGIC_V5:
1181 		vgic_v5_load(vcpu);
1182 		break;
1183 	case KVM_DEV_TYPE_ARM_VGIC_V3:
1184 		vgic_v3_load(vcpu);
1185 		break;
1186 	case KVM_DEV_TYPE_ARM_VGIC_V2:
1187 		if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1188 			vgic_v3_load(vcpu);
1189 		else
1190 			vgic_v2_load(vcpu);
1191 		break;
1192 	default:
1193 		BUG();
1194 	}
1195 }
1196 
kvm_vgic_put(struct kvm_vcpu * vcpu)1197 void kvm_vgic_put(struct kvm_vcpu *vcpu)
1198 {
1199 	const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1200 
1201 	if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
1202 		if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1203 			__vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
1204 		return;
1205 	}
1206 
1207 	switch (dist->vgic_model) {
1208 	case KVM_DEV_TYPE_ARM_VGIC_V5:
1209 		vgic_v5_put(vcpu);
1210 		break;
1211 	case KVM_DEV_TYPE_ARM_VGIC_V3:
1212 		vgic_v3_put(vcpu);
1213 		break;
1214 	case KVM_DEV_TYPE_ARM_VGIC_V2:
1215 		if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1216 			vgic_v3_put(vcpu);
1217 		else
1218 			vgic_v2_put(vcpu);
1219 		break;
1220 	default:
1221 		BUG();
1222 	}
1223 }
1224 
kvm_vgic_vcpu_pending_irq(struct kvm_vcpu * vcpu)1225 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1226 {
1227 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1228 	struct vgic_irq *irq;
1229 	bool pending = false;
1230 	unsigned long flags;
1231 	struct vgic_vmcr vmcr;
1232 
1233 	if (vgic_is_v5(vcpu->kvm))
1234 		return vgic_v5_has_pending_ppi(vcpu);
1235 
1236 	if (!vcpu->kvm->arch.vgic.enabled)
1237 		return false;
1238 
1239 	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
1240 		return true;
1241 
1242 	vgic_get_vmcr(vcpu, &vmcr);
1243 
1244 	raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
1245 
1246 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
1247 		raw_spin_lock(&irq->irq_lock);
1248 		pending = irq_is_pending(irq) && irq->enabled &&
1249 			  !irq->active &&
1250 			  irq->priority < vmcr.pmr;
1251 		raw_spin_unlock(&irq->irq_lock);
1252 
1253 		if (pending)
1254 			break;
1255 	}
1256 
1257 	raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
1258 
1259 	return pending;
1260 }
1261 
vgic_kick_vcpus(struct kvm * kvm)1262 void vgic_kick_vcpus(struct kvm *kvm)
1263 {
1264 	struct kvm_vcpu *vcpu;
1265 	unsigned long c;
1266 
1267 	/*
1268 	 * We've injected an interrupt, time to find out who deserves
1269 	 * a good kick...
1270 	 */
1271 	kvm_for_each_vcpu(c, vcpu, kvm) {
1272 		if (kvm_vgic_vcpu_pending_irq(vcpu)) {
1273 			kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1274 			kvm_vcpu_kick(vcpu);
1275 		}
1276 	}
1277 }
1278 
kvm_vgic_map_is_active(struct kvm_vcpu * vcpu,unsigned int vintid)1279 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
1280 {
1281 	struct vgic_irq *irq;
1282 	bool map_is_active;
1283 	unsigned long flags;
1284 
1285 	if (!vgic_initialized(vcpu->kvm))
1286 		return false;
1287 
1288 	irq = vgic_get_vcpu_irq(vcpu, vintid);
1289 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
1290 	map_is_active = irq->hw && irq->active;
1291 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1292 	vgic_put_irq(vcpu->kvm, irq);
1293 
1294 	return map_is_active;
1295 }
1296 
1297 /*
1298  * Level-triggered mapped IRQs are special because we only observe rising
1299  * edges as input to the VGIC.
1300  *
1301  * If the guest never acked the interrupt we have to sample the physical
1302  * line and set the line level, because the device state could have changed
1303  * or we simply need to process the still pending interrupt later.
1304  *
1305  * We could also have entered the guest with the interrupt active+pending.
1306  * On the next exit, we need to re-evaluate the pending state, as it could
1307  * otherwise result in a spurious interrupt by injecting a now potentially
1308  * stale pending state.
1309  *
1310  * If this causes us to lower the level, we have to also clear the physical
1311  * active state, since we will otherwise never be told when the interrupt
1312  * becomes asserted again.
1313  *
1314  * Another case is when the interrupt requires a helping hand on
1315  * deactivation (no HW deactivation, for example).
1316  */
vgic_irq_handle_resampling(struct vgic_irq * irq,bool lr_deactivated,bool lr_pending)1317 void vgic_irq_handle_resampling(struct vgic_irq *irq,
1318 				bool lr_deactivated, bool lr_pending)
1319 {
1320 	if (vgic_irq_is_mapped_level(irq)) {
1321 		bool resample = false;
1322 
1323 		if (unlikely(vgic_irq_needs_resampling(irq))) {
1324 			resample = !(irq->active || irq->pending_latch);
1325 		} else if (lr_pending || (lr_deactivated && irq->line_level)) {
1326 			irq->line_level = vgic_get_phys_line_level(irq);
1327 			resample = !irq->line_level;
1328 		}
1329 
1330 		if (resample)
1331 			vgic_irq_set_phys_active(irq, false);
1332 	}
1333 }
1334