1 // SPDX-License-Identifier: GPL-2.0-only
2
3 #include <linux/irqchip/arm-gic-v3.h>
4 #include <linux/irq.h>
5 #include <linux/irqdomain.h>
6 #include <linux/kstrtox.h>
7 #include <linux/kvm.h>
8 #include <linux/kvm_host.h>
9 #include <linux/string_choices.h>
10 #include <kvm/arm_vgic.h>
11 #include <asm/kvm_hyp.h>
12 #include <asm/kvm_mmu.h>
13 #include <asm/kvm_asm.h>
14
15 #include "vgic-mmio.h"
16 #include "vgic.h"
17
18 static bool group0_trap;
19 static bool group1_trap;
20 static bool common_trap;
21 static bool dir_trap;
22 static bool gicv4_enable;
23
vgic_v3_configure_hcr(struct kvm_vcpu * vcpu,struct ap_list_summary * als)24 void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu,
25 struct ap_list_summary *als)
26 {
27 struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
28
29 if (!irqchip_in_kernel(vcpu->kvm))
30 return;
31
32 cpuif->vgic_hcr = ICH_HCR_EL2_En;
33
34 if (irqs_pending_outside_lrs(als))
35 cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE;
36 if (irqs_active_outside_lrs(als))
37 cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE;
38 if (irqs_outside_lrs(als))
39 cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
40
41 if (!als->nr_sgi)
42 cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount;
43
44 cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG0_MASK) ?
45 ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE;
46 cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_EL2_VENG1_MASK) ?
47 ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE;
48
49 /*
50 * Dealing with EOImode=1 is a massive source of headache. Not
51 * only do we need to track that we have active interrupts
52 * outside of the LRs and force DIR to be trapped, we also
53 * need to deal with SPIs that can be deactivated on another
54 * CPU.
55 *
56 * On systems that do not implement TDIR, force the bit in the
57 * shadow state anyway to avoid IPI-ing on these poor sods.
58 *
59 * Note that we set the trap irrespective of EOIMode, as that
60 * can change behind our back without any warning...
61 */
62 if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) ||
63 irqs_active_outside_lrs(als) ||
64 atomic_read(&vcpu->kvm->arch.vgic.active_spis))
65 cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR;
66 }
67
lr_signals_eoi_mi(u64 lr_val)68 static bool lr_signals_eoi_mi(u64 lr_val)
69 {
70 return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) &&
71 !(lr_val & ICH_LR_HW);
72 }
73
vgic_v3_fold_lr(struct kvm_vcpu * vcpu,u64 val)74 static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val)
75 {
76 struct vgic_irq *irq;
77 bool is_v2_sgi = false;
78 bool deactivated;
79 u32 intid;
80
81 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
82 intid = val & ICH_LR_VIRTUAL_ID_MASK;
83 } else {
84 intid = val & GICH_LR_VIRTUALID;
85 is_v2_sgi = vgic_irq_is_sgi(intid);
86 }
87
88 irq = vgic_get_vcpu_irq(vcpu, intid);
89 if (!irq) /* An LPI could have been unmapped. */
90 return;
91
92 scoped_guard(raw_spinlock, &irq->irq_lock) {
93 /* Always preserve the active bit for !LPIs, note deactivation */
94 if (irq->intid >= VGIC_MIN_LPI)
95 val &= ~ICH_LR_ACTIVE_BIT;
96 deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
97 irq->active = !!(val & ICH_LR_ACTIVE_BIT);
98
99 /* Edge is the only case where we preserve the pending bit */
100 if (irq->config == VGIC_CONFIG_EDGE &&
101 (val & ICH_LR_PENDING_BIT))
102 irq->pending_latch = true;
103
104 /*
105 * Clear soft pending state when level irqs have been acked.
106 */
107 if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
108 irq->pending_latch = false;
109
110 if (is_v2_sgi) {
111 u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val);
112
113 if (irq->active)
114 irq->active_source = cpuid;
115
116 if (val & ICH_LR_PENDING_BIT)
117 irq->source |= BIT(cpuid);
118 }
119
120 /* Handle resampling for mapped interrupts if required */
121 vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
122
123 irq->on_lr = false;
124 }
125
126 /* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */
127 if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) {
128 kvm_notify_acked_irq(vcpu->kvm, 0,
129 intid - VGIC_NR_PRIVATE_IRQS);
130 atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis);
131 }
132
133 vgic_put_irq(vcpu->kvm, irq);
134 }
135
136 static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
137
vgic_v3_deactivate_phys(u32 intid)138 static void vgic_v3_deactivate_phys(u32 intid)
139 {
140 if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
141 gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI);
142 else
143 gic_write_dir(intid);
144 }
145
vgic_v3_fold_lr_state(struct kvm_vcpu * vcpu)146 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
147 {
148 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
149 struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
150 u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr);
151 struct vgic_irq *irq = *host_data_ptr(last_lr_irq);
152
153 DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
154
155 for (int lr = 0; lr < cpuif->used_lrs; lr++)
156 vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]);
157
158 /*
159 * EOIMode=0: use EOIcount to emulate deactivation. We are
160 * guaranteed to deactivate in reverse order of the activation, so
161 * just pick one active interrupt after the other in the tail part
162 * of the ap_list, past the LRs, and replay the deactivation as if
163 * the CPU was doing it. We also rely on priority drop to have taken
164 * place, and the list to be sorted by priority.
165 */
166 list_for_each_entry_continue(irq, &vgic_cpu->ap_list_head, ap_list) {
167 u64 lr;
168
169 /*
170 * I would have loved to write this using a scoped_guard(),
171 * but using 'continue' here is a total train wreck.
172 */
173 if (!eoicount) {
174 break;
175 } else {
176 guard(raw_spinlock)(&irq->irq_lock);
177
178 if (!(likely(vgic_target_oracle(irq) == vcpu) &&
179 irq->active))
180 continue;
181
182 lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
183 }
184
185 if (lr & ICH_LR_HW)
186 vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
187
188 vgic_v3_fold_lr(vcpu, lr);
189 eoicount--;
190 }
191
192 cpuif->used_lrs = 0;
193 }
194
vgic_v3_deactivate(struct kvm_vcpu * vcpu,u64 val)195 void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val)
196 {
197 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
198 struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
199 u32 model = vcpu->kvm->arch.vgic.vgic_model;
200 struct kvm_vcpu *target_vcpu = NULL;
201 bool mmio = false, is_v2_sgi;
202 struct vgic_irq *irq;
203 unsigned long flags;
204 u64 lr = 0;
205 u8 cpuid;
206
207 /* Snapshot CPUID, and remove it from the INTID */
208 cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
209 val &= ~GENMASK_ULL(12, 10);
210
211 is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
212 val < VGIC_NR_SGIS);
213
214 /*
215 * We only deal with DIR when EOIMode==1, and only for SGI,
216 * PPI or SPI.
217 */
218 if (!(cpuif->vgic_vmcr & ICH_VMCR_EL2_VEOIM_MASK) ||
219 val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)
220 return;
221
222 /* Make sure we're in the same context as LR handling */
223 local_irq_save(flags);
224
225 irq = vgic_get_vcpu_irq(vcpu, val);
226 if (WARN_ON_ONCE(!irq))
227 goto out;
228
229 /*
230 * EOIMode=1: we must rely on traps to handle deactivate of
231 * overflowing interrupts, as there is no ordering guarantee and
232 * EOIcount isn't being incremented. Priority drop will have taken
233 * place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs.
234 *
235 * Three possibities:
236 *
237 * - The irq is not queued on any CPU, and there is nothing to
238 * do,
239 *
240 * - Or the irq is in an LR, meaning that its state is not
241 * directly observable. Treat it bluntly by making it as if
242 * this was a write to GICD_ICACTIVER, which will force an
243 * exit on all vcpus. If it hurts, don't do that.
244 *
245 * - Or the irq is active, but not in an LR, and we can
246 * directly deactivate it by building a pseudo-LR, fold it,
247 * and queue a request to prune the resulting ap_list,
248 *
249 * Special care must be taken to match the source CPUID when
250 * deactivating a GICv2 SGI.
251 */
252 scoped_guard(raw_spinlock, &irq->irq_lock) {
253 target_vcpu = irq->vcpu;
254
255 /* Not on any ap_list? */
256 if (!target_vcpu)
257 goto put;
258
259 /*
260 * Urgh. We're deactivating something that we cannot
261 * observe yet... Big hammer time.
262 */
263 if (irq->on_lr) {
264 mmio = true;
265 goto put;
266 }
267
268 /* GICv2 SGI: check that the cpuid matches */
269 if (is_v2_sgi && irq->active_source != cpuid) {
270 target_vcpu = NULL;
271 goto put;
272 }
273
274 /* (with a Dalek voice) DEACTIVATE!!!! */
275 lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
276 }
277
278 if (lr & ICH_LR_HW)
279 vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
280
281 vgic_v3_fold_lr(vcpu, lr);
282
283 put:
284 vgic_put_irq(vcpu->kvm, irq);
285
286 out:
287 local_irq_restore(flags);
288
289 if (mmio)
290 vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
291
292 /* Force the ap_list to be pruned */
293 if (target_vcpu)
294 kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
295 }
296
297 /* Requires the irq to be locked already */
vgic_v3_compute_lr(struct kvm_vcpu * vcpu,struct vgic_irq * irq)298 static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
299 {
300 u32 model = vcpu->kvm->arch.vgic.vgic_model;
301 u64 val = irq->intid;
302 bool allow_pending = true, is_v2_sgi;
303
304 WARN_ON(irq->on_lr);
305
306 is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
307 model == KVM_DEV_TYPE_ARM_VGIC_V2);
308
309 if (irq->active) {
310 val |= ICH_LR_ACTIVE_BIT;
311 if (is_v2_sgi)
312 val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT;
313 if (vgic_irq_is_multi_sgi(irq)) {
314 allow_pending = false;
315 val |= ICH_LR_EOI;
316 }
317 }
318
319 if (irq->hw && !vgic_irq_needs_resampling(irq)) {
320 val |= ICH_LR_HW;
321 val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
322 /*
323 * Never set pending+active on a HW interrupt, as the
324 * pending state is kept at the physical distributor
325 * level.
326 */
327 if (irq->active)
328 allow_pending = false;
329 } else {
330 if (irq->config == VGIC_CONFIG_LEVEL) {
331 val |= ICH_LR_EOI;
332
333 /*
334 * Software resampling doesn't work very well
335 * if we allow P+A, so let's not do that.
336 */
337 if (irq->active)
338 allow_pending = false;
339 }
340 }
341
342 if (allow_pending && irq_is_pending(irq)) {
343 val |= ICH_LR_PENDING_BIT;
344
345 if (is_v2_sgi) {
346 u32 src = ffs(irq->source);
347
348 if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
349 irq->intid))
350 return 0;
351
352 val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
353 if (irq->source & ~BIT(src - 1))
354 val |= ICH_LR_EOI;
355 }
356 }
357
358 if (irq->group)
359 val |= ICH_LR_GROUP;
360
361 val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
362
363 return val;
364 }
365
vgic_v3_populate_lr(struct kvm_vcpu * vcpu,struct vgic_irq * irq,int lr)366 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
367 {
368 u32 model = vcpu->kvm->arch.vgic.vgic_model;
369 u64 val = vgic_v3_compute_lr(vcpu, irq);
370
371 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
372
373 if (val & ICH_LR_PENDING_BIT) {
374 if (irq->config == VGIC_CONFIG_EDGE)
375 irq->pending_latch = false;
376
377 if (vgic_irq_is_sgi(irq->intid) &&
378 model == KVM_DEV_TYPE_ARM_VGIC_V2) {
379 u32 src = ffs(irq->source);
380
381 irq->source &= ~BIT(src - 1);
382 if (irq->source)
383 irq->pending_latch = true;
384 }
385 }
386
387 /*
388 * Level-triggered mapped IRQs are special because we only observe
389 * rising edges as input to the VGIC. We therefore lower the line
390 * level here, so that we can take new virtual IRQs. See
391 * vgic_v3_fold_lr_state for more info.
392 */
393 if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
394 irq->line_level = false;
395
396 irq->on_lr = true;
397 }
398
vgic_v3_clear_lr(struct kvm_vcpu * vcpu,int lr)399 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
400 {
401 vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0;
402 }
403
vgic_v3_set_vmcr(struct kvm_vcpu * vcpu,struct vgic_vmcr * vmcrp)404 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
405 {
406 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
407 u32 model = vcpu->kvm->arch.vgic.vgic_model;
408 u32 vmcr;
409
410 if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
411 vmcr = FIELD_PREP(ICH_VMCR_EL2_VAckCtl, vmcrp->ackctl);
412 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VFIQEn, vmcrp->fiqen);
413 } else {
414 /*
415 * When emulating GICv3 on GICv3 with SRE=1 on the
416 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
417 */
418 vmcr = ICH_VMCR_EL2_VFIQEn_MASK;
419 }
420
421 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VCBPR, vmcrp->cbpr);
422 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VEOIM, vmcrp->eoim);
423 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR1, vmcrp->abpr);
424 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VBPR0, vmcrp->bpr);
425 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VPMR, vmcrp->pmr);
426 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG0, vmcrp->grpen0);
427 vmcr |= FIELD_PREP(ICH_VMCR_EL2_VENG1, vmcrp->grpen1);
428
429 cpu_if->vgic_vmcr = vmcr;
430 }
431
vgic_v3_get_vmcr(struct kvm_vcpu * vcpu,struct vgic_vmcr * vmcrp)432 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
433 {
434 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
435 u32 model = vcpu->kvm->arch.vgic.vgic_model;
436 u32 vmcr;
437
438 vmcr = cpu_if->vgic_vmcr;
439
440 if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
441 vmcrp->ackctl = FIELD_GET(ICH_VMCR_EL2_VAckCtl, vmcr);
442 vmcrp->fiqen = FIELD_GET(ICH_VMCR_EL2_VFIQEn, vmcr);
443 } else {
444 /*
445 * When emulating GICv3 on GICv3 with SRE=1 on the
446 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
447 */
448 vmcrp->fiqen = 1;
449 vmcrp->ackctl = 0;
450 }
451
452 vmcrp->cbpr = FIELD_GET(ICH_VMCR_EL2_VCBPR, vmcr);
453 vmcrp->eoim = FIELD_GET(ICH_VMCR_EL2_VEOIM, vmcr);
454 vmcrp->abpr = FIELD_GET(ICH_VMCR_EL2_VBPR1, vmcr);
455 vmcrp->bpr = FIELD_GET(ICH_VMCR_EL2_VBPR0, vmcr);
456 vmcrp->pmr = FIELD_GET(ICH_VMCR_EL2_VPMR, vmcr);
457 vmcrp->grpen0 = FIELD_GET(ICH_VMCR_EL2_VENG0, vmcr);
458 vmcrp->grpen1 = FIELD_GET(ICH_VMCR_EL2_VENG1, vmcr);
459 }
460
461 #define INITIAL_PENDBASER_VALUE \
462 (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \
463 GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \
464 GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable))
465
vgic_v3_reset(struct kvm_vcpu * vcpu)466 void vgic_v3_reset(struct kvm_vcpu *vcpu)
467 {
468 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
469
470 /*
471 * By forcing VMCR to zero, the GIC will restore the binary
472 * points to their reset values. Anything else resets to zero
473 * anyway.
474 */
475 vgic_v3->vgic_vmcr = 0;
476
477 /*
478 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
479 * way, so we force SRE to 1 to demonstrate this to the guest.
480 * Also, we don't support any form of IRQ/FIQ bypass.
481 * This goes with the spec allowing the value to be RAO/WI.
482 */
483 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
484 vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
485 ICC_SRE_EL1_DFB |
486 ICC_SRE_EL1_SRE);
487 vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
488 } else {
489 vgic_v3->vgic_sre = 0;
490 }
491
492 vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits,
493 kvm_vgic_global_state.ich_vtr_el2);
494 vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
495 kvm_vgic_global_state.ich_vtr_el2) + 1;
496 }
497
vcpu_set_ich_hcr(struct kvm_vcpu * vcpu)498 void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
499 {
500 struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
501
502 if (!vgic_is_v3(vcpu->kvm))
503 return;
504
505 /* Hide GICv3 sysreg if necessary */
506 if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 ||
507 !irqchip_in_kernel(vcpu->kvm))
508 vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
509 ICH_HCR_EL2_TC);
510 }
511
vgic_v3_lpi_sync_pending_status(struct kvm * kvm,struct vgic_irq * irq)512 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
513 {
514 struct kvm_vcpu *vcpu;
515 int byte_offset, bit_nr;
516 gpa_t pendbase, ptr;
517 bool status;
518 u8 val;
519 int ret;
520 unsigned long flags;
521
522 retry:
523 vcpu = irq->target_vcpu;
524 if (!vcpu)
525 return 0;
526
527 pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
528
529 byte_offset = irq->intid / BITS_PER_BYTE;
530 bit_nr = irq->intid % BITS_PER_BYTE;
531 ptr = pendbase + byte_offset;
532
533 ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
534 if (ret)
535 return ret;
536
537 status = val & (1 << bit_nr);
538
539 raw_spin_lock_irqsave(&irq->irq_lock, flags);
540 if (irq->target_vcpu != vcpu) {
541 raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
542 goto retry;
543 }
544 irq->pending_latch = status;
545 vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
546
547 if (status) {
548 /* clear consumed data */
549 val &= ~(1 << bit_nr);
550 ret = vgic_write_guest_lock(kvm, ptr, &val, 1);
551 if (ret)
552 return ret;
553 }
554 return 0;
555 }
556
557 /*
558 * The deactivation of the doorbell interrupt will trigger the
559 * unmapping of the associated vPE.
560 */
unmap_all_vpes(struct kvm * kvm)561 static void unmap_all_vpes(struct kvm *kvm)
562 {
563 struct vgic_dist *dist = &kvm->arch.vgic;
564 int i;
565
566 for (i = 0; i < dist->its_vm.nr_vpes; i++)
567 free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i));
568 }
569
map_all_vpes(struct kvm * kvm)570 static void map_all_vpes(struct kvm *kvm)
571 {
572 struct vgic_dist *dist = &kvm->arch.vgic;
573 int i;
574
575 for (i = 0; i < dist->its_vm.nr_vpes; i++)
576 WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i),
577 dist->its_vm.vpes[i]->irq));
578 }
579
580 /*
581 * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
582 * kvm lock and all vcpu lock must be held
583 */
vgic_v3_save_pending_tables(struct kvm * kvm)584 int vgic_v3_save_pending_tables(struct kvm *kvm)
585 {
586 struct vgic_dist *dist = &kvm->arch.vgic;
587 struct vgic_irq *irq;
588 gpa_t last_ptr = ~(gpa_t)0;
589 bool vlpi_avail = false;
590 unsigned long index;
591 int ret = 0;
592 u8 val;
593
594 if (unlikely(!vgic_initialized(kvm)))
595 return -ENXIO;
596
597 /*
598 * A preparation for getting any VLPI states.
599 * The above vgic initialized check also ensures that the allocation
600 * and enabling of the doorbells have already been done.
601 */
602 if (kvm_vgic_global_state.has_gicv4_1) {
603 unmap_all_vpes(kvm);
604 vlpi_avail = true;
605 }
606
607 xa_for_each(&dist->lpi_xa, index, irq) {
608 int byte_offset, bit_nr;
609 struct kvm_vcpu *vcpu;
610 gpa_t pendbase, ptr;
611 bool is_pending;
612 bool stored;
613
614 vcpu = irq->target_vcpu;
615 if (!vcpu)
616 continue;
617
618 pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
619
620 byte_offset = irq->intid / BITS_PER_BYTE;
621 bit_nr = irq->intid % BITS_PER_BYTE;
622 ptr = pendbase + byte_offset;
623
624 if (ptr != last_ptr) {
625 ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
626 if (ret)
627 goto out;
628 last_ptr = ptr;
629 }
630
631 stored = val & (1U << bit_nr);
632
633 is_pending = irq->pending_latch;
634
635 if (irq->hw && vlpi_avail)
636 vgic_v4_get_vlpi_state(irq, &is_pending);
637
638 if (stored == is_pending)
639 continue;
640
641 if (is_pending)
642 val |= 1 << bit_nr;
643 else
644 val &= ~(1 << bit_nr);
645
646 ret = vgic_write_guest_lock(kvm, ptr, &val, 1);
647 if (ret)
648 goto out;
649 }
650
651 out:
652 if (vlpi_avail)
653 map_all_vpes(kvm);
654
655 return ret;
656 }
657
658 /**
659 * vgic_v3_rdist_overlap - check if a region overlaps with any
660 * existing redistributor region
661 *
662 * @kvm: kvm handle
663 * @base: base of the region
664 * @size: size of region
665 *
666 * Return: true if there is an overlap
667 */
vgic_v3_rdist_overlap(struct kvm * kvm,gpa_t base,size_t size)668 bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size)
669 {
670 struct vgic_dist *d = &kvm->arch.vgic;
671 struct vgic_redist_region *rdreg;
672
673 list_for_each_entry(rdreg, &d->rd_regions, list) {
674 if ((base + size > rdreg->base) &&
675 (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg)))
676 return true;
677 }
678 return false;
679 }
680
681 /*
682 * Check for overlapping regions and for regions crossing the end of memory
683 * for base addresses which have already been set.
684 */
vgic_v3_check_base(struct kvm * kvm)685 bool vgic_v3_check_base(struct kvm *kvm)
686 {
687 struct vgic_dist *d = &kvm->arch.vgic;
688 struct vgic_redist_region *rdreg;
689
690 if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
691 d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
692 return false;
693
694 list_for_each_entry(rdreg, &d->rd_regions, list) {
695 size_t sz = vgic_v3_rd_region_size(kvm, rdreg);
696
697 if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF,
698 rdreg->base, SZ_64K, sz))
699 return false;
700 }
701
702 if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base))
703 return true;
704
705 return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base,
706 KVM_VGIC_V3_DIST_SIZE);
707 }
708
709 /**
710 * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one
711 * which has free space to put a new rdist region.
712 *
713 * @rd_regions: redistributor region list head
714 *
715 * A redistributor regions maps n redistributors, n = region size / (2 x 64kB).
716 * Stride between redistributors is 0 and regions are filled in the index order.
717 *
718 * Return: the redist region handle, if any, that has space to map a new rdist
719 * region.
720 */
vgic_v3_rdist_free_slot(struct list_head * rd_regions)721 struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions)
722 {
723 struct vgic_redist_region *rdreg;
724
725 list_for_each_entry(rdreg, rd_regions, list) {
726 if (!vgic_v3_redist_region_full(rdreg))
727 return rdreg;
728 }
729 return NULL;
730 }
731
vgic_v3_rdist_region_from_index(struct kvm * kvm,u32 index)732 struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
733 u32 index)
734 {
735 struct list_head *rd_regions = &kvm->arch.vgic.rd_regions;
736 struct vgic_redist_region *rdreg;
737
738 list_for_each_entry(rdreg, rd_regions, list) {
739 if (rdreg->index == index)
740 return rdreg;
741 }
742 return NULL;
743 }
744
745
vgic_v3_map_resources(struct kvm * kvm)746 int vgic_v3_map_resources(struct kvm *kvm)
747 {
748 struct vgic_dist *dist = &kvm->arch.vgic;
749 struct kvm_vcpu *vcpu;
750 unsigned long c;
751
752 kvm_for_each_vcpu(c, vcpu, kvm) {
753 struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
754
755 if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
756 kvm_debug("vcpu %ld redistributor base not set\n", c);
757 return -ENXIO;
758 }
759 }
760
761 if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
762 kvm_debug("Need to set vgic distributor addresses first\n");
763 return -ENXIO;
764 }
765
766 if (!vgic_v3_check_base(kvm)) {
767 kvm_debug("VGIC redist and dist frames overlap\n");
768 return -EINVAL;
769 }
770
771 /*
772 * For a VGICv3 we require the userland to explicitly initialize
773 * the VGIC before we need to use it.
774 */
775 if (!vgic_initialized(kvm)) {
776 return -EBUSY;
777 }
778
779 if (kvm_vgic_global_state.has_gicv4_1)
780 vgic_v4_configure_vsgis(kvm);
781
782 return 0;
783 }
784
785 DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
786 DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat);
787
early_group0_trap_cfg(char * buf)788 static int __init early_group0_trap_cfg(char *buf)
789 {
790 return kstrtobool(buf, &group0_trap);
791 }
792 early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg);
793
early_group1_trap_cfg(char * buf)794 static int __init early_group1_trap_cfg(char *buf)
795 {
796 return kstrtobool(buf, &group1_trap);
797 }
798 early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
799
early_common_trap_cfg(char * buf)800 static int __init early_common_trap_cfg(char *buf)
801 {
802 return kstrtobool(buf, &common_trap);
803 }
804 early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
805
early_gicv4_enable(char * buf)806 static int __init early_gicv4_enable(char *buf)
807 {
808 return kstrtobool(buf, &gicv4_enable);
809 }
810 early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
811
812 static const struct midr_range broken_seis[] = {
813 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
814 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
815 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
816 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
817 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
818 MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
819 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
820 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
821 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
822 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
823 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
824 MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
825 {},
826 };
827
vgic_v3_broken_seis(void)828 static bool vgic_v3_broken_seis(void)
829 {
830 return (is_kernel_in_hyp_mode() &&
831 is_midr_in_range_list(broken_seis) &&
832 (read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS));
833 }
834
kvm_compute_ich_hcr_trap_bits(struct alt_instr * alt,__le32 * origptr,__le32 * updptr,int nr_inst)835 void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
836 __le32 *origptr, __le32 *updptr,
837 int nr_inst)
838 {
839 u32 insn, oinsn, rd;
840 u64 hcr = 0;
841
842 if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
843 group0_trap = true;
844 group1_trap = true;
845 }
846
847 if (vgic_v3_broken_seis()) {
848 /* We know that these machines have ICH_HCR_EL2.TDIR */
849 group0_trap = true;
850 group1_trap = true;
851 dir_trap = true;
852 }
853
854 if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR))
855 common_trap = true;
856
857 if (group0_trap)
858 hcr |= ICH_HCR_EL2_TALL0;
859 if (group1_trap)
860 hcr |= ICH_HCR_EL2_TALL1;
861 if (common_trap)
862 hcr |= ICH_HCR_EL2_TC;
863 if (dir_trap)
864 hcr |= ICH_HCR_EL2_TDIR;
865
866 /* Compute target register */
867 oinsn = le32_to_cpu(*origptr);
868 rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
869
870 /* movz rd, #(val & 0xffff) */
871 insn = aarch64_insn_gen_movewide(rd,
872 (u16)hcr,
873 0,
874 AARCH64_INSN_VARIANT_64BIT,
875 AARCH64_INSN_MOVEWIDE_ZERO);
876 *updptr = cpu_to_le32(insn);
877 }
878
vgic_v3_enable_cpuif_traps(void)879 void vgic_v3_enable_cpuif_traps(void)
880 {
881 u64 traps = vgic_ich_hcr_trap_bits();
882
883 if (traps) {
884 kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
885 (traps & ICH_HCR_EL2_TALL0) ? "G0" : "",
886 (traps & ICH_HCR_EL2_TALL1) ? "G1" : "",
887 (traps & ICH_HCR_EL2_TC) ? "C" : "",
888 (traps & ICH_HCR_EL2_TDIR) ? "D" : "");
889 static_branch_enable(&vgic_v3_cpuif_trap);
890 }
891 }
892
893 /**
894 * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
895 * @info: pointer to the GIC description
896 *
897 * Returns 0 if the VGICv3 has been probed successfully, returns an error code
898 * otherwise
899 */
vgic_v3_probe(const struct gic_kvm_info * info)900 int vgic_v3_probe(const struct gic_kvm_info *info)
901 {
902 u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
903 bool has_v2;
904 int ret;
905
906 has_v2 = ich_vtr_el2 >> 63;
907 ich_vtr_el2 = (u32)ich_vtr_el2;
908
909 /*
910 * The ListRegs field is 5 bits, but there is an architectural
911 * maximum of 16 list registers. Just ignore bit 4...
912 */
913 kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
914 kvm_vgic_global_state.can_emulate_gicv2 = false;
915 kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
916
917 /* GICv4 support? */
918 if (info->has_v4) {
919 kvm_vgic_global_state.has_gicv4 = gicv4_enable;
920 kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable;
921 kvm_info("GICv4%s support %s\n",
922 kvm_vgic_global_state.has_gicv4_1 ? ".1" : "",
923 str_enabled_disabled(gicv4_enable));
924 }
925
926 kvm_vgic_global_state.vcpu_base = 0;
927
928 if (!info->vcpu.start) {
929 kvm_info("GICv3: no GICV resource entry\n");
930 } else if (!has_v2) {
931 pr_warn(FW_BUG "CPU interface incapable of MMIO access\n");
932 } else if (!PAGE_ALIGNED(info->vcpu.start)) {
933 pr_warn("GICV physical address 0x%llx not page aligned\n",
934 (unsigned long long)info->vcpu.start);
935 } else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
936 kvm_vgic_global_state.vcpu_base = info->vcpu.start;
937 kvm_vgic_global_state.can_emulate_gicv2 = true;
938 ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
939 if (ret) {
940 kvm_err("Cannot register GICv2 KVM device.\n");
941 return ret;
942 }
943 kvm_info("vgic-v2@%llx\n", info->vcpu.start);
944 }
945 ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
946 if (ret) {
947 kvm_err("Cannot register GICv3 KVM device.\n");
948 kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2);
949 return ret;
950 }
951
952 if (kvm_vgic_global_state.vcpu_base == 0)
953 kvm_info("disabling GICv2 emulation\n");
954
955 /*
956 * Flip the static branch if the HW supports v2, even if we're
957 * not using it (such as in protected mode).
958 */
959 if (has_v2)
960 static_branch_enable(&vgic_v3_has_v2_compat);
961
962 if (vgic_v3_broken_seis()) {
963 kvm_info("GICv3 with broken locally generated SEI\n");
964 kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
965 }
966
967 vgic_v3_enable_cpuif_traps();
968
969 kvm_vgic_global_state.vctrl_base = NULL;
970 kvm_vgic_global_state.type = VGIC_V3;
971 kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
972
973 return 0;
974 }
975
vgic_v3_load(struct kvm_vcpu * vcpu)976 void vgic_v3_load(struct kvm_vcpu *vcpu)
977 {
978 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
979
980 /* If the vgic is nested, perform the full state loading */
981 if (vgic_state_is_nested(vcpu)) {
982 vgic_v3_load_nested(vcpu);
983 return;
984 }
985
986 if (likely(!is_protected_kvm_enabled()))
987 kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
988
989 if (has_vhe())
990 __vgic_v3_activate_traps(cpu_if);
991
992 WARN_ON(vgic_v4_load(vcpu));
993 }
994
vgic_v3_put(struct kvm_vcpu * vcpu)995 void vgic_v3_put(struct kvm_vcpu *vcpu)
996 {
997 struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
998
999 if (vgic_state_is_nested(vcpu)) {
1000 vgic_v3_put_nested(vcpu);
1001 return;
1002 }
1003
1004 if (likely(!is_protected_kvm_enabled()))
1005 kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
1006 WARN_ON(vgic_v4_put(vcpu));
1007
1008 if (has_vhe())
1009 __vgic_v3_deactivate_traps(cpu_if);
1010 }
1011