1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2015, 2016 ARM Ltd.
4 */
5
6 #include <linux/interrupt.h>
7 #include <linux/irq.h>
8 #include <linux/kvm.h>
9 #include <linux/kvm_host.h>
10 #include <linux/list_sort.h>
11 #include <linux/nospec.h>
12
13 #include <asm/kvm_hyp.h>
14
15 #include "vgic.h"
16
17 #define CREATE_TRACE_POINTS
18 #include "trace.h"
19
20 struct vgic_global kvm_vgic_global_state __ro_after_init = {
21 .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
22 };
23
24 /*
25 * Locking order is always:
26 * kvm->lock (mutex)
27 * vcpu->mutex (mutex)
28 * kvm->arch.config_lock (mutex)
29 * its->cmd_lock (mutex)
30 * its->its_lock (mutex)
31 * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
32 * vgic_cpu->ap_list_lock must be taken with IRQs disabled
33 * vgic_irq->irq_lock must be taken with IRQs disabled
34 *
35 * As the ap_list_lock might be taken from the timer interrupt handler,
36 * we have to disable IRQs before taking this lock and everything lower
37 * than it.
38 *
39 * The config_lock has additional ordering requirements:
40 * kvm->slots_lock
41 * kvm->srcu
42 * kvm->arch.config_lock
43 *
44 * If you need to take multiple locks, always take the upper lock first,
45 * then the lower ones, e.g. first take the its_lock, then the irq_lock.
46 * If you are already holding a lock and need to take a higher one, you
47 * have to drop the lower ranking lock first and re-acquire it after having
48 * taken the upper one.
49 *
50 * When taking more than one ap_list_lock at the same time, always take the
51 * lowest numbered VCPU's ap_list_lock first, so:
52 * vcpuX->vcpu_id < vcpuY->vcpu_id:
53 * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
54 * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
55 *
56 * Since the VGIC must support injecting virtual interrupts from ISRs, we have
57 * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer
58 * spinlocks for any lock that may be taken while injecting an interrupt.
59 */
60
61 /*
62 * Index the VM's xarray of mapped LPIs and return a reference to the IRQ
63 * structure. The caller is expected to call vgic_put_irq() later once it's
64 * finished with the IRQ.
65 */
vgic_get_lpi(struct kvm * kvm,u32 intid)66 static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid)
67 {
68 struct vgic_dist *dist = &kvm->arch.vgic;
69 struct vgic_irq *irq = NULL;
70
71 rcu_read_lock();
72
73 irq = xa_load(&dist->lpi_xa, intid);
74 if (!vgic_try_get_irq_ref(irq))
75 irq = NULL;
76
77 rcu_read_unlock();
78
79 return irq;
80 }
81
82 /*
83 * This looks up the virtual interrupt ID to get the corresponding
84 * struct vgic_irq. It also increases the refcount, so any caller is expected
85 * to call vgic_put_irq() once it's finished with this IRQ.
86 */
vgic_get_irq(struct kvm * kvm,u32 intid)87 struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid)
88 {
89 /* Non-private IRQs are not yet implemented for GICv5 */
90 if (vgic_is_v5(kvm))
91 return NULL;
92
93 /* SPIs */
94 if (intid >= VGIC_NR_PRIVATE_IRQS &&
95 intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) {
96 intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS);
97 return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
98 }
99
100 /* LPIs */
101 if (irq_is_lpi(kvm, intid))
102 return vgic_get_lpi(kvm, intid);
103
104 return NULL;
105 }
106
vgic_get_vcpu_irq(struct kvm_vcpu * vcpu,u32 intid)107 struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid)
108 {
109 if (WARN_ON(!vcpu))
110 return NULL;
111
112 if (vgic_is_v5(vcpu->kvm)) {
113 u32 int_num, hwirq_id;
114
115 if (!__irq_is_ppi(KVM_DEV_TYPE_ARM_VGIC_V5, intid))
116 return NULL;
117
118 hwirq_id = FIELD_GET(GICV5_HWIRQ_ID, intid);
119 int_num = array_index_nospec(hwirq_id, VGIC_V5_NR_PRIVATE_IRQS);
120
121 return &vcpu->arch.vgic_cpu.private_irqs[int_num];
122 }
123
124 /* SGIs and PPIs */
125 if (intid < VGIC_NR_PRIVATE_IRQS) {
126 intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS);
127 return &vcpu->arch.vgic_cpu.private_irqs[intid];
128 }
129
130 return vgic_get_irq(vcpu->kvm, intid);
131 }
132
vgic_release_lpi_locked(struct vgic_dist * dist,struct vgic_irq * irq)133 static void vgic_release_lpi_locked(struct vgic_dist *dist, struct vgic_irq *irq)
134 {
135 lockdep_assert_held(&dist->lpi_xa.xa_lock);
136 __xa_erase(&dist->lpi_xa, irq->intid);
137 kfree_rcu(irq, rcu);
138 }
139
__vgic_put_irq(struct kvm * kvm,struct vgic_irq * irq)140 static __must_check bool __vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
141 {
142 if (!irq_is_lpi(kvm, irq->intid))
143 return false;
144
145 return refcount_dec_and_test(&irq->refcount);
146 }
147
vgic_put_irq_norelease(struct kvm * kvm,struct vgic_irq * irq)148 static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq *irq)
149 {
150 if (!__vgic_put_irq(kvm, irq))
151 return false;
152
153 irq->pending_release = true;
154 return true;
155 }
156
vgic_put_irq(struct kvm * kvm,struct vgic_irq * irq)157 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
158 {
159 struct vgic_dist *dist = &kvm->arch.vgic;
160 unsigned long flags;
161
162 /*
163 * Normally the lock is only taken when the refcount drops to 0.
164 * Acquire/release it early on lockdep kernels to make locking issues
165 * in rare release paths a bit more obvious.
166 */
167 if (IS_ENABLED(CONFIG_LOCKDEP) && irq_is_lpi(kvm, irq->intid)) {
168 guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock);
169 }
170
171 if (!__vgic_put_irq(kvm, irq))
172 return;
173
174 xa_lock_irqsave(&dist->lpi_xa, flags);
175 vgic_release_lpi_locked(dist, irq);
176 xa_unlock_irqrestore(&dist->lpi_xa, flags);
177 }
178
vgic_release_deleted_lpis(struct kvm * kvm)179 static void vgic_release_deleted_lpis(struct kvm *kvm)
180 {
181 struct vgic_dist *dist = &kvm->arch.vgic;
182 unsigned long flags, intid;
183 struct vgic_irq *irq;
184
185 xa_lock_irqsave(&dist->lpi_xa, flags);
186
187 xa_for_each(&dist->lpi_xa, intid, irq) {
188 if (irq->pending_release)
189 vgic_release_lpi_locked(dist, irq);
190 }
191
192 xa_unlock_irqrestore(&dist->lpi_xa, flags);
193 }
194
vgic_flush_pending_lpis(struct kvm_vcpu * vcpu)195 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)
196 {
197 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
198 struct vgic_irq *irq, *tmp;
199 bool deleted = false;
200 unsigned long flags;
201
202 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
203
204 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
205 if (irq_is_lpi(vcpu->kvm, irq->intid)) {
206 raw_spin_lock(&irq->irq_lock);
207 list_del(&irq->ap_list);
208 irq->vcpu = NULL;
209 raw_spin_unlock(&irq->irq_lock);
210 deleted |= vgic_put_irq_norelease(vcpu->kvm, irq);
211 }
212 }
213
214 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
215
216 if (deleted)
217 vgic_release_deleted_lpis(vcpu->kvm);
218 }
219
vgic_irq_set_phys_pending(struct vgic_irq * irq,bool pending)220 void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending)
221 {
222 WARN_ON(irq_set_irqchip_state(irq->host_irq,
223 IRQCHIP_STATE_PENDING,
224 pending));
225 }
226
vgic_get_phys_line_level(struct vgic_irq * irq)227 bool vgic_get_phys_line_level(struct vgic_irq *irq)
228 {
229 bool line_level;
230
231 BUG_ON(!irq->hw);
232
233 if (irq->ops && irq->ops->get_input_level)
234 return irq->ops->get_input_level(irq->intid);
235
236 WARN_ON(irq_get_irqchip_state(irq->host_irq,
237 IRQCHIP_STATE_PENDING,
238 &line_level));
239 return line_level;
240 }
241
242 /* Set/Clear the physical active state */
vgic_irq_set_phys_active(struct vgic_irq * irq,bool active)243 void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
244 {
245
246 BUG_ON(!irq->hw);
247 WARN_ON(irq_set_irqchip_state(irq->host_irq,
248 IRQCHIP_STATE_ACTIVE,
249 active));
250 }
251
252 /**
253 * vgic_target_oracle - compute the target vcpu for an irq
254 *
255 * @irq: The irq to route. Must be already locked.
256 *
257 * Based on the current state of the interrupt (enabled, pending,
258 * active, vcpu and target_vcpu), compute the next vcpu this should be
259 * given to. Return NULL if this shouldn't be injected at all.
260 *
261 * Requires the IRQ lock to be held.
262 */
vgic_target_oracle(struct vgic_irq * irq)263 struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
264 {
265 lockdep_assert_held(&irq->irq_lock);
266
267 /* If the interrupt is active, it must stay on the current vcpu */
268 if (irq->active)
269 return irq->vcpu ? : irq->target_vcpu;
270
271 /*
272 * If the IRQ is not active but enabled and pending, we should direct
273 * it to its configured target VCPU.
274 * If the distributor is disabled, pending interrupts shouldn't be
275 * forwarded.
276 */
277 if (irq->enabled && irq_is_pending(irq)) {
278 if (unlikely(irq->target_vcpu &&
279 !irq->target_vcpu->kvm->arch.vgic.enabled))
280 return NULL;
281
282 return irq->target_vcpu;
283 }
284
285 /* If neither active nor pending and enabled, then this IRQ should not
286 * be queued to any VCPU.
287 */
288 return NULL;
289 }
290
291 struct vgic_sort_info {
292 struct kvm_vcpu *vcpu;
293 struct vgic_vmcr vmcr;
294 };
295
296 /*
297 * The order of items in the ap_lists defines how we'll pack things in LRs as
298 * well, the first items in the list being the first things populated in the
299 * LRs.
300 *
301 * Pending, non-active interrupts must be placed at the head of the list.
302 * Otherwise things should be sorted by the priority field and the GIC
303 * hardware support will take care of preemption of priority groups etc.
304 * Interrupts that are not deliverable should be at the end of the list.
305 *
306 * Return negative if "a" sorts before "b", 0 to preserve order, and positive
307 * to sort "b" before "a".
308 */
vgic_irq_cmp(void * priv,const struct list_head * a,const struct list_head * b)309 static int vgic_irq_cmp(void *priv, const struct list_head *a,
310 const struct list_head *b)
311 {
312 struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
313 struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
314 struct vgic_sort_info *info = priv;
315 struct kvm_vcpu *vcpu = info->vcpu;
316 bool penda, pendb;
317 int ret;
318
319 /*
320 * list_sort may call this function with the same element when
321 * the list is fairly long.
322 */
323 if (unlikely(irqa == irqb))
324 return 0;
325
326 raw_spin_lock(&irqa->irq_lock);
327 raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
328
329 /* Undeliverable interrupts should be last */
330 ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu);
331 if (ret)
332 goto out;
333
334 /* Same thing for interrupts targeting a disabled group */
335 ret = (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
336 ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
337 if (ret)
338 goto out;
339
340 penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active;
341 pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active;
342
343 ret = (int)pendb - (int)penda;
344 if (ret)
345 goto out;
346
347 /* Both pending and enabled, sort by priority (lower number first) */
348 ret = (int)irqa->priority - (int)irqb->priority;
349 if (ret)
350 goto out;
351
352 /* Finally, HW bit active interrupts have priority over non-HW ones */
353 ret = (int)irqb->hw - (int)irqa->hw;
354
355 out:
356 raw_spin_unlock(&irqb->irq_lock);
357 raw_spin_unlock(&irqa->irq_lock);
358 return ret;
359 }
360
361 /* Must be called with the ap_list_lock held */
vgic_sort_ap_list(struct kvm_vcpu * vcpu)362 static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
363 {
364 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
365 struct vgic_sort_info info = { .vcpu = vcpu, };
366
367 lockdep_assert_held(&vgic_cpu->ap_list_lock);
368
369 vgic_get_vmcr(vcpu, &info.vmcr);
370 list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp);
371 }
372
373 /*
374 * Only valid injection if changing level for level-triggered IRQs or for a
375 * rising edge, and in-kernel connected IRQ lines can only be controlled by
376 * their owner.
377 */
vgic_validate_injection(struct vgic_irq * irq,bool level,void * owner)378 static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner)
379 {
380 if (irq->owner != owner)
381 return false;
382
383 switch (irq->config) {
384 case VGIC_CONFIG_LEVEL:
385 return irq->line_level != level;
386 case VGIC_CONFIG_EDGE:
387 return level;
388 }
389
390 return false;
391 }
392
vgic_model_needs_bcst_kick(struct kvm * kvm)393 static bool vgic_model_needs_bcst_kick(struct kvm *kvm)
394 {
395 /*
396 * A GICv3 (or GICv3-like) system exposing a GICv3 to the guest
397 * needs a broadcast kick to set TDIR globally.
398 *
399 * For systems that do not have TDIR (ARM's own v8.0 CPUs), the
400 * shadow TDIR bit is always set, and so is the register's TC bit,
401 * so no need to kick the CPUs.
402 */
403 return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) &&
404 kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
405 }
406
407 /*
408 * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
409 * Do the queuing if necessary, taking the right locks in the right order.
410 * Returns true when the IRQ was queued, false otherwise.
411 *
412 * Needs to be entered with the IRQ lock already held, but will return
413 * with all locks dropped.
414 */
vgic_queue_irq_unlock(struct kvm * kvm,struct vgic_irq * irq,unsigned long flags)415 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
416 unsigned long flags) __releases(&irq->irq_lock)
417 {
418 struct kvm_vcpu *vcpu;
419 bool bcast;
420
421 lockdep_assert_held(&irq->irq_lock);
422
423 if (irq->ops && irq->ops->queue_irq_unlock)
424 return irq->ops->queue_irq_unlock(kvm, irq, flags);
425
426 retry:
427 vcpu = vgic_target_oracle(irq);
428 if (irq->vcpu || !vcpu) {
429 /*
430 * If this IRQ is already on a VCPU's ap_list, then it
431 * cannot be moved or modified and there is no more work for
432 * us to do.
433 *
434 * Otherwise, if the irq is not pending and enabled, it does
435 * not need to be inserted into an ap_list and there is also
436 * no more work for us to do.
437 */
438 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
439
440 /*
441 * We have to kick the VCPU here, because we could be
442 * queueing an edge-triggered interrupt for which we
443 * get no EOI maintenance interrupt. In that case,
444 * while the IRQ is already on the VCPU's AP list, the
445 * VCPU could have EOI'ed the original interrupt and
446 * won't see this one until it exits for some other
447 * reason.
448 */
449 if (vcpu) {
450 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
451 kvm_vcpu_kick(vcpu);
452 }
453 return false;
454 }
455
456 /*
457 * We must unlock the irq lock to take the ap_list_lock where
458 * we are going to insert this new pending interrupt.
459 */
460 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
461
462 /* someone can do stuff here, which we re-check below */
463
464 raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
465 raw_spin_lock(&irq->irq_lock);
466
467 /*
468 * Did something change behind our backs?
469 *
470 * There are two cases:
471 * 1) The irq lost its pending state or was disabled behind our
472 * backs and/or it was queued to another VCPU's ap_list.
473 * 2) Someone changed the affinity on this irq behind our
474 * backs and we are now holding the wrong ap_list_lock.
475 *
476 * In both cases, drop the locks and retry.
477 */
478
479 if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
480 raw_spin_unlock(&irq->irq_lock);
481 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock,
482 flags);
483
484 raw_spin_lock_irqsave(&irq->irq_lock, flags);
485 goto retry;
486 }
487
488 /*
489 * Grab a reference to the irq to reflect the fact that it is
490 * now in the ap_list. This is safe as the caller must already hold a
491 * reference on the irq.
492 */
493 vgic_get_irq_ref(irq);
494 list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
495 irq->vcpu = vcpu;
496
497 /* A new SPI may result in deactivation trapping on all vcpus */
498 bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) &&
499 vgic_valid_spi(vcpu->kvm, irq->intid) &&
500 atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0);
501
502 raw_spin_unlock(&irq->irq_lock);
503 raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
504
505 if (!bcast) {
506 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
507 kvm_vcpu_kick(vcpu);
508 } else {
509 kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING);
510 }
511
512 return true;
513 }
514
515 /**
516 * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
517 * @kvm: The VM structure pointer
518 * @vcpu: The CPU for PPIs or NULL for global interrupts
519 * @intid: The INTID to inject a new state to.
520 * @level: Edge-triggered: true: to trigger the interrupt
521 * false: to ignore the call
522 * Level-sensitive true: raise the input signal
523 * false: lower the input signal
524 * @owner: The opaque pointer to the owner of the IRQ being raised to verify
525 * that the caller is allowed to inject this IRQ. Userspace
526 * injections will have owner == NULL.
527 *
528 * The VGIC is not concerned with devices being active-LOW or active-HIGH for
529 * level-sensitive interrupts. You can think of the level parameter as 1
530 * being HIGH and 0 being LOW and all devices being active-HIGH.
531 */
kvm_vgic_inject_irq(struct kvm * kvm,struct kvm_vcpu * vcpu,unsigned int intid,bool level,void * owner)532 int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
533 unsigned int intid, bool level, void *owner)
534 {
535 struct vgic_irq *irq;
536 unsigned long flags;
537 int ret;
538
539 ret = vgic_lazy_init(kvm);
540 if (ret)
541 return ret;
542
543 if (!vcpu && irq_is_private(kvm, intid))
544 return -EINVAL;
545
546 trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level);
547
548 if (irq_is_private(kvm, intid))
549 irq = vgic_get_vcpu_irq(vcpu, intid);
550 else
551 irq = vgic_get_irq(kvm, intid);
552 if (!irq)
553 return -EINVAL;
554
555 raw_spin_lock_irqsave(&irq->irq_lock, flags);
556
557 if (!vgic_validate_injection(irq, level, owner)) {
558 /* Nothing to see here, move along... */
559 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
560 vgic_put_irq(kvm, irq);
561 return 0;
562 }
563
564 if (irq->config == VGIC_CONFIG_LEVEL)
565 irq->line_level = level;
566 else
567 irq->pending_latch = true;
568
569 vgic_queue_irq_unlock(kvm, irq, flags);
570 vgic_put_irq(kvm, irq);
571
572 return 0;
573 }
574
kvm_vgic_set_irq_ops(struct kvm_vcpu * vcpu,u32 vintid,struct irq_ops * ops)575 void kvm_vgic_set_irq_ops(struct kvm_vcpu *vcpu, u32 vintid,
576 struct irq_ops *ops)
577 {
578 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
579
580 BUG_ON(!irq);
581
582 scoped_guard(raw_spinlock_irqsave, &irq->irq_lock)
583 irq->ops = ops;
584
585 vgic_put_irq(vcpu->kvm, irq);
586 }
587
kvm_vgic_clear_irq_ops(struct kvm_vcpu * vcpu,u32 vintid)588 void kvm_vgic_clear_irq_ops(struct kvm_vcpu *vcpu, u32 vintid)
589 {
590 kvm_vgic_set_irq_ops(vcpu, vintid, NULL);
591 }
592
593 /* @irq->irq_lock must be held */
kvm_vgic_map_irq(struct kvm_vcpu * vcpu,struct vgic_irq * irq,unsigned int host_irq)594 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
595 unsigned int host_irq)
596 {
597 struct irq_desc *desc;
598 struct irq_data *data;
599
600 /*
601 * Find the physical IRQ number corresponding to @host_irq
602 */
603 desc = irq_to_desc(host_irq);
604 if (!desc) {
605 kvm_err("%s: no interrupt descriptor\n", __func__);
606 return -EINVAL;
607 }
608 data = irq_desc_get_irq_data(desc);
609 while (data->parent_data)
610 data = data->parent_data;
611
612 irq->hw = true;
613 irq->host_irq = host_irq;
614 irq->hwintid = data->hwirq;
615
616 if (irq->ops && irq->ops->set_direct_injection)
617 irq->ops->set_direct_injection(vcpu, irq, true);
618
619 return 0;
620 }
621
622 /* @irq->irq_lock must be held */
kvm_vgic_unmap_irq(struct vgic_irq * irq)623 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
624 {
625 if (irq->ops && irq->ops->set_direct_injection)
626 irq->ops->set_direct_injection(irq->target_vcpu, irq, false);
627
628 irq->hw = false;
629 irq->hwintid = 0;
630 }
631
kvm_vgic_map_phys_irq(struct kvm_vcpu * vcpu,unsigned int host_irq,u32 vintid)632 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
633 u32 vintid)
634 {
635 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
636 unsigned long flags;
637 int ret;
638
639 BUG_ON(!irq);
640
641 raw_spin_lock_irqsave(&irq->irq_lock, flags);
642 ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
643 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
644 vgic_put_irq(vcpu->kvm, irq);
645
646 return ret;
647 }
648
649 /**
650 * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ
651 * @vcpu: The VCPU pointer
652 * @vintid: The INTID of the interrupt
653 *
654 * Reset the active and pending states of a mapped interrupt. Kernel
655 * subsystems injecting mapped interrupts should reset their interrupt lines
656 * when we are doing a reset of the VM.
657 */
kvm_vgic_reset_mapped_irq(struct kvm_vcpu * vcpu,u32 vintid)658 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid)
659 {
660 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
661 unsigned long flags;
662
663 if (!irq->hw)
664 goto out;
665
666 raw_spin_lock_irqsave(&irq->irq_lock, flags);
667 irq->active = false;
668 irq->pending_latch = false;
669 irq->line_level = false;
670 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
671 out:
672 vgic_put_irq(vcpu->kvm, irq);
673 }
674
kvm_vgic_unmap_phys_irq(struct kvm_vcpu * vcpu,unsigned int vintid)675 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
676 {
677 struct vgic_irq *irq;
678 unsigned long flags;
679
680 if (!vgic_initialized(vcpu->kvm))
681 return -EAGAIN;
682
683 irq = vgic_get_vcpu_irq(vcpu, vintid);
684 BUG_ON(!irq);
685
686 raw_spin_lock_irqsave(&irq->irq_lock, flags);
687 kvm_vgic_unmap_irq(irq);
688 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
689 vgic_put_irq(vcpu->kvm, irq);
690
691 return 0;
692 }
693
kvm_vgic_get_map(struct kvm_vcpu * vcpu,unsigned int vintid)694 int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid)
695 {
696 struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid);
697 unsigned long flags;
698 int ret = -1;
699
700 raw_spin_lock_irqsave(&irq->irq_lock, flags);
701 if (irq->hw)
702 ret = irq->hwintid;
703 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
704
705 vgic_put_irq(vcpu->kvm, irq);
706 return ret;
707 }
708
709 /**
710 * kvm_vgic_set_owner - Set the owner of an interrupt for a VM
711 *
712 * @vcpu: Pointer to the VCPU (used for PPIs)
713 * @intid: The virtual INTID identifying the interrupt (PPI or SPI)
714 * @owner: Opaque pointer to the owner
715 *
716 * Returns 0 if intid is not already used by another in-kernel device and the
717 * owner is set, otherwise returns an error code.
718 */
kvm_vgic_set_owner(struct kvm_vcpu * vcpu,unsigned int intid,void * owner)719 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner)
720 {
721 struct vgic_irq *irq;
722 unsigned long flags;
723 int ret = 0;
724
725 if (!vgic_initialized(vcpu->kvm))
726 return -EAGAIN;
727
728 /* SGIs and LPIs cannot be wired up to any device */
729 if (!irq_is_ppi(vcpu->kvm, intid) && !vgic_valid_spi(vcpu->kvm, intid))
730 return -EINVAL;
731
732 irq = vgic_get_vcpu_irq(vcpu, intid);
733 raw_spin_lock_irqsave(&irq->irq_lock, flags);
734 if (irq->owner && irq->owner != owner)
735 ret = -EEXIST;
736 else
737 irq->owner = owner;
738 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
739
740 return ret;
741 }
742
743 /**
744 * vgic_prune_ap_list - Remove non-relevant interrupts from the list
745 *
746 * @vcpu: The VCPU pointer
747 *
748 * Go over the list of "interesting" interrupts, and prune those that we
749 * won't have to consider in the near future.
750 */
vgic_prune_ap_list(struct kvm_vcpu * vcpu)751 static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
752 {
753 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
754 struct vgic_irq *irq, *tmp;
755 bool deleted_lpis = false;
756
757 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
758
759 retry:
760 raw_spin_lock(&vgic_cpu->ap_list_lock);
761
762 list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
763 struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
764 bool target_vcpu_needs_kick = false;
765
766 raw_spin_lock(&irq->irq_lock);
767
768 BUG_ON(vcpu != irq->vcpu);
769
770 target_vcpu = vgic_target_oracle(irq);
771
772 if (!target_vcpu) {
773 /*
774 * We don't need to process this interrupt any
775 * further, move it off the list.
776 */
777 list_del(&irq->ap_list);
778 irq->vcpu = NULL;
779 raw_spin_unlock(&irq->irq_lock);
780
781 /*
782 * This vgic_put_irq call matches the
783 * vgic_get_irq_ref in vgic_queue_irq_unlock,
784 * where we added the LPI to the ap_list. As
785 * we remove the irq from the list, we drop
786 * also drop the refcount.
787 */
788 deleted_lpis |= vgic_put_irq_norelease(vcpu->kvm, irq);
789 continue;
790 }
791
792 if (target_vcpu == vcpu) {
793 /* We're on the right CPU */
794 raw_spin_unlock(&irq->irq_lock);
795 continue;
796 }
797
798 /* This interrupt looks like it has to be migrated. */
799
800 raw_spin_unlock(&irq->irq_lock);
801 raw_spin_unlock(&vgic_cpu->ap_list_lock);
802
803 /*
804 * Ensure locking order by always locking the smallest
805 * ID first.
806 */
807 if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
808 vcpuA = vcpu;
809 vcpuB = target_vcpu;
810 } else {
811 vcpuA = target_vcpu;
812 vcpuB = vcpu;
813 }
814
815 raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
816 raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
817 SINGLE_DEPTH_NESTING);
818 raw_spin_lock(&irq->irq_lock);
819
820 /*
821 * If the affinity has been preserved, move the
822 * interrupt around. Otherwise, it means things have
823 * changed while the interrupt was unlocked, and we
824 * need to replay this.
825 *
826 * In all cases, we cannot trust the list not to have
827 * changed, so we restart from the beginning.
828 */
829 if (target_vcpu == vgic_target_oracle(irq)) {
830 struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
831
832 list_del(&irq->ap_list);
833 irq->vcpu = target_vcpu;
834 list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
835 target_vcpu_needs_kick = true;
836 }
837
838 raw_spin_unlock(&irq->irq_lock);
839 raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
840 raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
841
842 if (target_vcpu_needs_kick) {
843 kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
844 kvm_vcpu_kick(target_vcpu);
845 }
846
847 goto retry;
848 }
849
850 raw_spin_unlock(&vgic_cpu->ap_list_lock);
851
852 if (unlikely(deleted_lpis))
853 vgic_release_deleted_lpis(vcpu->kvm);
854 }
855
vgic_fold_state(struct kvm_vcpu * vcpu)856 static void vgic_fold_state(struct kvm_vcpu *vcpu)
857 {
858 if (vgic_is_v5(vcpu->kvm)) {
859 vgic_v5_fold_ppi_state(vcpu);
860 return;
861 }
862
863 if (!*host_data_ptr(last_lr_irq))
864 return;
865
866 if (kvm_vgic_global_state.type == VGIC_V2)
867 vgic_v2_fold_lr_state(vcpu);
868 else
869 vgic_v3_fold_lr_state(vcpu);
870 }
871
872 /* Requires the irq_lock to be held. */
vgic_populate_lr(struct kvm_vcpu * vcpu,struct vgic_irq * irq,int lr)873 static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
874 struct vgic_irq *irq, int lr)
875 {
876 lockdep_assert_held(&irq->irq_lock);
877
878 if (kvm_vgic_global_state.type == VGIC_V2)
879 vgic_v2_populate_lr(vcpu, irq, lr);
880 else
881 vgic_v3_populate_lr(vcpu, irq, lr);
882 }
883
vgic_clear_lr(struct kvm_vcpu * vcpu,int lr)884 static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
885 {
886 if (kvm_vgic_global_state.type == VGIC_V2)
887 vgic_v2_clear_lr(vcpu, lr);
888 else
889 vgic_v3_clear_lr(vcpu, lr);
890 }
891
summarize_ap_list(struct kvm_vcpu * vcpu,struct ap_list_summary * als)892 static void summarize_ap_list(struct kvm_vcpu *vcpu,
893 struct ap_list_summary *als)
894 {
895 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
896 struct vgic_irq *irq;
897
898 lockdep_assert_held(&vgic_cpu->ap_list_lock);
899
900 *als = (typeof(*als)){};
901
902 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
903 guard(raw_spinlock)(&irq->irq_lock);
904
905 if (unlikely(vgic_target_oracle(irq) != vcpu))
906 continue;
907
908 if (!irq->active)
909 als->nr_pend++;
910 else
911 als->nr_act++;
912
913 if (irq->intid < VGIC_NR_SGIS)
914 als->nr_sgi++;
915 }
916 }
917
918 /*
919 * Dealing with LR overflow is close to black magic -- dress accordingly.
920 *
921 * We have to present an almost infinite number of interrupts through a very
922 * limited number of registers. Therefore crucial decisions must be made to
923 * ensure we feed the most relevant interrupts into the LRs, and yet have
924 * some facilities to let the guest interact with those that are not there.
925 *
926 * All considerations below are in the context of interrupts targeting a
927 * single vcpu with non-idle state (either pending, active, or both),
928 * colloquially called the ap_list:
929 *
930 * - Pending interrupts must have priority over active interrupts. This also
931 * excludes pending+active interrupts. This ensures that a guest can
932 * perform priority drops on any number of interrupts, and yet be
933 * presented the next pending one.
934 *
935 * - Deactivation of interrupts outside of the LRs must be tracked by using
936 * either the EOIcount-driven maintenance interrupt, and sometimes by
937 * trapping the DIR register.
938 *
939 * - For EOImode=0, a non-zero EOIcount means walking the ap_list past the
940 * point that made it into the LRs, and deactivate interrupts that would
941 * have made it onto the LRs if we had the space.
942 *
943 * - The MI-generation bits must be used to try and force an exit when the
944 * guest has done enough changes to the LRs that we want to reevaluate the
945 * situation:
946 *
947 * - if the total number of pending interrupts exceeds the number of
948 * LR, NPIE must be set in order to exit once no pending interrupts
949 * are present in the LRs, allowing us to populate the next batch.
950 *
951 * - if there are active interrupts outside of the LRs, then LRENPIE
952 * must be set so that we exit on deactivation of one of these, and
953 * work out which one is to be deactivated. Note that this is not
954 * enough to deal with EOImode=1, see below.
955 *
956 * - if the overall number of interrupts exceeds the number of LRs,
957 * then UIE must be set to allow refilling of the LRs once the
958 * majority of them has been processed.
959 *
960 * - as usual, MI triggers are only an optimisation, since we cannot
961 * rely on the MI being delivered in timely manner...
962 *
963 * - EOImode=1 creates some additional problems:
964 *
965 * - deactivation can happen in any order, and we cannot rely on
966 * EOImode=0's coupling of priority-drop and deactivation which
967 * imposes strict reverse Ack order. This means that DIR must
968 * trap if we have active interrupts outside of the LRs.
969 *
970 * - deactivation of SPIs can occur on any CPU, while the SPI is only
971 * present in the ap_list of the CPU that actually ack-ed it. In that
972 * case, EOIcount doesn't provide enough information, and we must
973 * resort to trapping DIR even if we don't overflow the LRs. Bonus
974 * point for not trapping DIR when no SPIs are pending or active in
975 * the whole VM.
976 *
977 * - LPIs do not suffer the same problem as SPIs on deactivation, as we
978 * have to essentially discard the active state, see below.
979 *
980 * - Virtual LPIs have an active state (surprise!), which gets removed on
981 * priority drop (EOI). However, EOIcount doesn't get bumped when the LPI
982 * is not present in the LR (surprise again!). Special care must therefore
983 * be taken to remove the active state from any activated LPI when exiting
984 * from the guest. This is in a way no different from what happens on the
985 * physical side. We still rely on the running priority to have been
986 * removed from the APRs, irrespective of the LPI being present in the LRs
987 * or not.
988 *
989 * - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as
990 * they are not managed in SW and don't have a true active state. So only
991 * set vSGIEOICount when no SGIs are in the ap_list.
992 *
993 * - GICv2 SGIs with multiple sources are injected one source at a time, as
994 * if they were made pending sequentially. This may mean that we don't
995 * always present the HPPI if other interrupts with lower priority are
996 * pending in the LRs. Big deal.
997 */
vgic_flush_lr_state(struct kvm_vcpu * vcpu)998 static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
999 {
1000 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1001 struct ap_list_summary als;
1002 struct vgic_irq *irq;
1003 int count = 0;
1004
1005 lockdep_assert_held(&vgic_cpu->ap_list_lock);
1006
1007 summarize_ap_list(vcpu, &als);
1008
1009 if (irqs_outside_lrs(&als))
1010 vgic_sort_ap_list(vcpu);
1011
1012 *host_data_ptr(last_lr_irq) = NULL;
1013
1014 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
1015 scoped_guard(raw_spinlock, &irq->irq_lock) {
1016 if (likely(vgic_target_oracle(irq) == vcpu)) {
1017 vgic_populate_lr(vcpu, irq, count++);
1018 *host_data_ptr(last_lr_irq) = irq;
1019 }
1020 }
1021
1022 if (count == kvm_vgic_global_state.nr_lr)
1023 break;
1024 }
1025
1026 /* Nuke remaining LRs */
1027 for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++)
1028 vgic_clear_lr(vcpu, i);
1029
1030 if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
1031 vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
1032 vgic_v2_configure_hcr(vcpu, &als);
1033 } else {
1034 vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
1035 vgic_v3_configure_hcr(vcpu, &als);
1036 }
1037 }
1038
can_access_vgic_from_kernel(void)1039 static inline bool can_access_vgic_from_kernel(void)
1040 {
1041 /*
1042 * GICv2 can always be accessed from the kernel because it is
1043 * memory-mapped, and VHE systems can access GICv3 EL2 system
1044 * registers.
1045 */
1046 return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe();
1047 }
1048
vgic_save_state(struct kvm_vcpu * vcpu)1049 static inline void vgic_save_state(struct kvm_vcpu *vcpu)
1050 {
1051 /* No switch statement here. See comment in vgic_restore_state() */
1052 if (vgic_is_v5(vcpu->kvm))
1053 vgic_v5_save_state(vcpu);
1054 else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1055 vgic_v2_save_state(vcpu);
1056 else
1057 __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3);
1058 }
1059
1060 /* Sync back the hardware VGIC state into our emulation after a guest's run. */
kvm_vgic_sync_hwstate(struct kvm_vcpu * vcpu)1061 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1062 {
1063 if (vgic_is_v3(vcpu->kvm)) {
1064 /* If nesting, emulate the HW effect from L0 to L1 */
1065 if (vgic_state_is_nested(vcpu)) {
1066 vgic_v3_sync_nested(vcpu);
1067 return;
1068 }
1069
1070 if (vcpu_has_nv(vcpu))
1071 vgic_v3_nested_update_mi(vcpu);
1072 }
1073
1074 if (can_access_vgic_from_kernel())
1075 vgic_save_state(vcpu);
1076
1077 vgic_fold_state(vcpu);
1078
1079 if (!vgic_is_v5(vcpu->kvm))
1080 vgic_prune_ap_list(vcpu);
1081 }
1082
1083 /* Sync interrupts that were deactivated through a DIR trap */
kvm_vgic_process_async_update(struct kvm_vcpu * vcpu)1084 void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
1085 {
1086 unsigned long flags;
1087
1088 /* Make sure we're in the same context as LR handling */
1089 local_irq_save(flags);
1090 vgic_prune_ap_list(vcpu);
1091 local_irq_restore(flags);
1092 }
1093
vgic_restore_state(struct kvm_vcpu * vcpu)1094 static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
1095 {
1096 /*
1097 * As nice as it would be to restructure this code into a switch
1098 * statement as can be found elsewhere, the logic quickly gets ugly.
1099 *
1100 * __vgic_v3_restore_state() is doing a lot of heavy lifting here. It is
1101 * required for GICv3-on-GICv3, GICv2-on-GICv3, GICv3-on-GICv5, and the
1102 * no-in-kernel-irqchip case on GICv3 hardware. Hence, adding a switch
1103 * here results in much more complex code.
1104 */
1105 if (vgic_is_v5(vcpu->kvm))
1106 vgic_v5_restore_state(vcpu);
1107 else if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1108 vgic_v2_restore_state(vcpu);
1109 else
1110 __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3);
1111 }
1112
vgic_flush_state(struct kvm_vcpu * vcpu)1113 static void vgic_flush_state(struct kvm_vcpu *vcpu)
1114 {
1115 if (vgic_is_v5(vcpu->kvm)) {
1116 vgic_v5_flush_ppi_state(vcpu);
1117 return;
1118 }
1119
1120 scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
1121 vgic_flush_lr_state(vcpu);
1122 }
1123
1124 /* Flush our emulation state into the GIC hardware before entering the guest. */
kvm_vgic_flush_hwstate(struct kvm_vcpu * vcpu)1125 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1126 {
1127 /*
1128 * If in a nested state, we must return early. Two possibilities:
1129 *
1130 * - If we have any pending IRQ for the guest and the guest
1131 * expects IRQs to be handled in its virtual EL2 mode (the
1132 * virtual IMO bit is set) and it is not already running in
1133 * virtual EL2 mode, then we have to emulate an IRQ
1134 * exception to virtual EL2.
1135 *
1136 * We do that by placing a request to ourselves which will
1137 * abort the entry procedure and inject the exception at the
1138 * beginning of the run loop.
1139 *
1140 * - Otherwise, do exactly *NOTHING* apart from enabling the virtual
1141 * CPU interface. The guest state is already loaded, and we can
1142 * carry on with running it.
1143 *
1144 * If we have NV, but are not in a nested state, compute the
1145 * maintenance interrupt state, as it may fire.
1146 */
1147 if (vgic_state_is_nested(vcpu)) {
1148 if (kvm_vgic_vcpu_pending_irq(vcpu))
1149 kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
1150
1151 vgic_v3_flush_nested(vcpu);
1152 return;
1153 }
1154
1155 if (vcpu_has_nv(vcpu))
1156 vgic_v3_nested_update_mi(vcpu);
1157
1158 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
1159
1160 vgic_flush_state(vcpu);
1161
1162 if (can_access_vgic_from_kernel())
1163 vgic_restore_state(vcpu);
1164
1165 if (vgic_supports_direct_irqs(vcpu->kvm) && kvm_vgic_global_state.has_gicv4)
1166 vgic_v4_commit(vcpu);
1167 }
1168
kvm_vgic_load(struct kvm_vcpu * vcpu)1169 void kvm_vgic_load(struct kvm_vcpu *vcpu)
1170 {
1171 const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1172
1173 if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
1174 if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1175 __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
1176 return;
1177 }
1178
1179 switch (dist->vgic_model) {
1180 case KVM_DEV_TYPE_ARM_VGIC_V5:
1181 vgic_v5_load(vcpu);
1182 break;
1183 case KVM_DEV_TYPE_ARM_VGIC_V3:
1184 vgic_v3_load(vcpu);
1185 break;
1186 case KVM_DEV_TYPE_ARM_VGIC_V2:
1187 if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1188 vgic_v3_load(vcpu);
1189 else
1190 vgic_v2_load(vcpu);
1191 break;
1192 default:
1193 BUG();
1194 }
1195 }
1196
kvm_vgic_put(struct kvm_vcpu * vcpu)1197 void kvm_vgic_put(struct kvm_vcpu *vcpu)
1198 {
1199 const struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1200
1201 if (unlikely(!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm))) {
1202 if (has_vhe() && static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1203 __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3);
1204 return;
1205 }
1206
1207 switch (dist->vgic_model) {
1208 case KVM_DEV_TYPE_ARM_VGIC_V5:
1209 vgic_v5_put(vcpu);
1210 break;
1211 case KVM_DEV_TYPE_ARM_VGIC_V3:
1212 vgic_v3_put(vcpu);
1213 break;
1214 case KVM_DEV_TYPE_ARM_VGIC_V2:
1215 if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
1216 vgic_v3_put(vcpu);
1217 else
1218 vgic_v2_put(vcpu);
1219 break;
1220 default:
1221 BUG();
1222 }
1223 }
1224
kvm_vgic_vcpu_pending_irq(struct kvm_vcpu * vcpu)1225 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1226 {
1227 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1228 struct vgic_irq *irq;
1229 bool pending = false;
1230 unsigned long flags;
1231 struct vgic_vmcr vmcr;
1232
1233 if (vgic_is_v5(vcpu->kvm))
1234 return vgic_v5_has_pending_ppi(vcpu);
1235
1236 if (!vcpu->kvm->arch.vgic.enabled)
1237 return false;
1238
1239 if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
1240 return true;
1241
1242 vgic_get_vmcr(vcpu, &vmcr);
1243
1244 raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
1245
1246 list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
1247 raw_spin_lock(&irq->irq_lock);
1248 pending = irq_is_pending(irq) && irq->enabled &&
1249 !irq->active &&
1250 irq->priority < vmcr.pmr;
1251 raw_spin_unlock(&irq->irq_lock);
1252
1253 if (pending)
1254 break;
1255 }
1256
1257 raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
1258
1259 return pending;
1260 }
1261
vgic_kick_vcpus(struct kvm * kvm)1262 void vgic_kick_vcpus(struct kvm *kvm)
1263 {
1264 struct kvm_vcpu *vcpu;
1265 unsigned long c;
1266
1267 /*
1268 * We've injected an interrupt, time to find out who deserves
1269 * a good kick...
1270 */
1271 kvm_for_each_vcpu(c, vcpu, kvm) {
1272 if (kvm_vgic_vcpu_pending_irq(vcpu)) {
1273 kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
1274 kvm_vcpu_kick(vcpu);
1275 }
1276 }
1277 }
1278
kvm_vgic_map_is_active(struct kvm_vcpu * vcpu,unsigned int vintid)1279 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
1280 {
1281 struct vgic_irq *irq;
1282 bool map_is_active;
1283 unsigned long flags;
1284
1285 if (!vgic_initialized(vcpu->kvm))
1286 return false;
1287
1288 irq = vgic_get_vcpu_irq(vcpu, vintid);
1289 raw_spin_lock_irqsave(&irq->irq_lock, flags);
1290 map_is_active = irq->hw && irq->active;
1291 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
1292 vgic_put_irq(vcpu->kvm, irq);
1293
1294 return map_is_active;
1295 }
1296
1297 /*
1298 * Level-triggered mapped IRQs are special because we only observe rising
1299 * edges as input to the VGIC.
1300 *
1301 * If the guest never acked the interrupt we have to sample the physical
1302 * line and set the line level, because the device state could have changed
1303 * or we simply need to process the still pending interrupt later.
1304 *
1305 * We could also have entered the guest with the interrupt active+pending.
1306 * On the next exit, we need to re-evaluate the pending state, as it could
1307 * otherwise result in a spurious interrupt by injecting a now potentially
1308 * stale pending state.
1309 *
1310 * If this causes us to lower the level, we have to also clear the physical
1311 * active state, since we will otherwise never be told when the interrupt
1312 * becomes asserted again.
1313 *
1314 * Another case is when the interrupt requires a helping hand on
1315 * deactivation (no HW deactivation, for example).
1316 */
vgic_irq_handle_resampling(struct vgic_irq * irq,bool lr_deactivated,bool lr_pending)1317 void vgic_irq_handle_resampling(struct vgic_irq *irq,
1318 bool lr_deactivated, bool lr_pending)
1319 {
1320 if (vgic_irq_is_mapped_level(irq)) {
1321 bool resample = false;
1322
1323 if (unlikely(vgic_irq_needs_resampling(irq))) {
1324 resample = !(irq->active || irq->pending_latch);
1325 } else if (lr_pending || (lr_deactivated && irq->line_level)) {
1326 irq->line_level = vgic_get_phys_line_level(irq);
1327 resample = !irq->line_level;
1328 }
1329
1330 if (resample)
1331 vgic_irq_set_phys_active(irq, false);
1332 }
1333 }
1334