xref: /linux/arch/arm64/kvm/vgic/vgic-v3.c (revision 51d90a15fedf8366cb96ef68d0ea2d0bf15417d2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 #include <linux/irqchip/arm-gic-v3.h>
4 #include <linux/irq.h>
5 #include <linux/irqdomain.h>
6 #include <linux/kstrtox.h>
7 #include <linux/kvm.h>
8 #include <linux/kvm_host.h>
9 #include <linux/string_choices.h>
10 #include <kvm/arm_vgic.h>
11 #include <asm/kvm_hyp.h>
12 #include <asm/kvm_mmu.h>
13 #include <asm/kvm_asm.h>
14 
15 #include "vgic-mmio.h"
16 #include "vgic.h"
17 
18 static bool group0_trap;
19 static bool group1_trap;
20 static bool common_trap;
21 static bool dir_trap;
22 static bool gicv4_enable;
23 
vgic_v3_configure_hcr(struct kvm_vcpu * vcpu,struct ap_list_summary * als)24 void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu,
25 			   struct ap_list_summary *als)
26 {
27 	struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
28 
29 	if (!irqchip_in_kernel(vcpu->kvm))
30 		return;
31 
32 	cpuif->vgic_hcr = ICH_HCR_EL2_En;
33 
34 	if (irqs_pending_outside_lrs(als))
35 		cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE;
36 	if (irqs_active_outside_lrs(als))
37 		cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE;
38 	if (irqs_outside_lrs(als))
39 		cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
40 
41 	if (!als->nr_sgi)
42 		cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount;
43 
44 	cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ?
45 		ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE;
46 	cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ?
47 		ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE;
48 
49 	/*
50 	 * Dealing with EOImode=1 is a massive source of headache. Not
51 	 * only do we need to track that we have active interrupts
52 	 * outside of the LRs and force DIR to be trapped, we also
53 	 * need to deal with SPIs that can be deactivated on another
54 	 * CPU.
55 	 *
56 	 * On systems that do not implement TDIR, force the bit in the
57 	 * shadow state anyway to avoid IPI-ing on these poor sods.
58 	 *
59 	 * Note that we set the trap irrespective of EOIMode, as that
60 	 * can change behind our back without any warning...
61 	 */
62 	if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) ||
63 	    irqs_active_outside_lrs(als)		     ||
64 	    atomic_read(&vcpu->kvm->arch.vgic.active_spis))
65 		cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR;
66 }
67 
lr_signals_eoi_mi(u64 lr_val)68 static bool lr_signals_eoi_mi(u64 lr_val)
69 {
70 	return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) &&
71 	       !(lr_val & ICH_LR_HW);
72 }
73 
vgic_v3_fold_lr(struct kvm_vcpu * vcpu,u64 val)74 static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val)
75 {
76 	struct vgic_irq *irq;
77 	bool is_v2_sgi = false;
78 	bool deactivated;
79 	u32 intid;
80 
81 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
82 		intid = val & ICH_LR_VIRTUAL_ID_MASK;
83 	} else {
84 		intid = val & GICH_LR_VIRTUALID;
85 		is_v2_sgi = vgic_irq_is_sgi(intid);
86 	}
87 
88 	irq = vgic_get_vcpu_irq(vcpu, intid);
89 	if (!irq)	/* An LPI could have been unmapped. */
90 		return;
91 
92 	scoped_guard(raw_spinlock, &irq->irq_lock) {
93 		/* Always preserve the active bit for !LPIs, note deactivation */
94 		if (irq->intid >= VGIC_MIN_LPI)
95 			val &= ~ICH_LR_ACTIVE_BIT;
96 		deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
97 		irq->active = !!(val & ICH_LR_ACTIVE_BIT);
98 
99 		/* Edge is the only case where we preserve the pending bit */
100 		if (irq->config == VGIC_CONFIG_EDGE &&
101 		    (val & ICH_LR_PENDING_BIT))
102 			irq->pending_latch = true;
103 
104 		/*
105 		 * Clear soft pending state when level irqs have been acked.
106 		 */
107 		if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
108 			irq->pending_latch = false;
109 
110 		if (is_v2_sgi) {
111 			u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val);
112 
113 			if (irq->active)
114 				irq->active_source = cpuid;
115 
116 			if (val & ICH_LR_PENDING_BIT)
117 				irq->source |= BIT(cpuid);
118 		}
119 
120 		/* Handle resampling for mapped interrupts if required */
121 		vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
122 
123 		irq->on_lr = false;
124 	}
125 
126 	/* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */
127 	if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) {
128 		kvm_notify_acked_irq(vcpu->kvm, 0,
129 				     intid - VGIC_NR_PRIVATE_IRQS);
130 		atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis);
131 	}
132 
133 	vgic_put_irq(vcpu->kvm, irq);
134 }
135 
136 static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
137 
vgic_v3_deactivate_phys(u32 intid)138 static void vgic_v3_deactivate_phys(u32 intid)
139 {
140 	if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
141 		gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI);
142 	else
143 		gic_write_dir(intid);
144 }
145 
vgic_v3_fold_lr_state(struct kvm_vcpu * vcpu)146 void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
147 {
148 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
149 	struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
150 	u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr);
151 	struct vgic_irq *irq;
152 
153 	DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
154 
155 	for (int lr = 0; lr < cpuif->used_lrs; lr++)
156 		vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]);
157 
158 	/*
159 	 * EOIMode=0: use EOIcount to emulate deactivation. We are
160 	 * guaranteed to deactivate in reverse order of the activation, so
161 	 * just pick one active interrupt after the other in the ap_list,
162 	 * and replay the deactivation as if the CPU was doing it. We also
163 	 * rely on priority drop to have taken place, and the list to be
164 	 * sorted by priority.
165 	 */
166 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
167 		u64 lr;
168 
169 		/*
170 		 * I would have loved to write this using a scoped_guard(),
171 		 * but using 'continue' here is a total train wreck.
172 		 */
173 		if (!eoicount) {
174 			break;
175 		} else {
176 			guard(raw_spinlock)(&irq->irq_lock);
177 
178 			if (!(likely(vgic_target_oracle(irq) == vcpu) &&
179 			      irq->active))
180 				continue;
181 
182 			lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
183 		}
184 
185 		if (lr & ICH_LR_HW)
186 			vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
187 
188 		vgic_v3_fold_lr(vcpu, lr);
189 		eoicount--;
190 	}
191 
192 	cpuif->used_lrs = 0;
193 }
194 
vgic_v3_deactivate(struct kvm_vcpu * vcpu,u64 val)195 void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val)
196 {
197 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
198 	struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
199 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
200 	struct kvm_vcpu *target_vcpu = NULL;
201 	bool mmio = false, is_v2_sgi;
202 	struct vgic_irq *irq;
203 	unsigned long flags;
204 	u64 lr = 0;
205 	u8 cpuid;
206 
207 	/* Snapshot CPUID, and remove it from the INTID */
208 	cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
209 	val &= ~GENMASK_ULL(12, 10);
210 
211 	is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
212 		     val < VGIC_NR_SGIS);
213 
214 	/*
215 	 * We only deal with DIR when EOIMode==1, and only for SGI,
216 	 * PPI or SPI.
217 	 */
218 	if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) ||
219 	    val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)
220 		return;
221 
222 	/* Make sure we're in the same context as LR handling */
223 	local_irq_save(flags);
224 
225 	irq = vgic_get_vcpu_irq(vcpu, val);
226 	if (WARN_ON_ONCE(!irq))
227 		goto out;
228 
229 	/*
230 	 * EOIMode=1: we must rely on traps to handle deactivate of
231 	 * overflowing interrupts, as there is no ordering guarantee and
232 	 * EOIcount isn't being incremented. Priority drop will have taken
233 	 * place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs.
234 	 *
235 	 * Three possibities:
236 	 *
237 	 * - The irq is not queued on any CPU, and there is nothing to
238 	 *   do,
239 	 *
240 	 * - Or the irq is in an LR, meaning that its state is not
241 	 *   directly observable. Treat it bluntly by making it as if
242 	 *   this was a write to GICD_ICACTIVER, which will force an
243 	 *   exit on all vcpus. If it hurts, don't do that.
244 	 *
245 	 * - Or the irq is active, but not in an LR, and we can
246 	 *   directly deactivate it by building a pseudo-LR, fold it,
247 	 *   and queue a request to prune the resulting ap_list,
248 	 *
249 	 * Special care must be taken to match the source CPUID when
250 	 * deactivating a GICv2 SGI.
251 	 */
252 	scoped_guard(raw_spinlock, &irq->irq_lock) {
253 		target_vcpu = irq->vcpu;
254 
255 		/* Not on any ap_list? */
256 		if (!target_vcpu)
257 			goto put;
258 
259 		/*
260 		 * Urgh. We're deactivating something that we cannot
261 		 * observe yet... Big hammer time.
262 		 */
263 		if (irq->on_lr) {
264 			mmio = true;
265 			goto put;
266 		}
267 
268 		/* GICv2 SGI: check that the cpuid matches */
269 		if (is_v2_sgi && irq->active_source != cpuid) {
270 			target_vcpu = NULL;
271 			goto put;
272 		}
273 
274 		/* (with a Dalek voice) DEACTIVATE!!!! */
275 		lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
276 	}
277 
278 	if (lr & ICH_LR_HW)
279 		vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
280 
281 	vgic_v3_fold_lr(vcpu, lr);
282 
283 put:
284 	vgic_put_irq(vcpu->kvm, irq);
285 
286 out:
287 	local_irq_restore(flags);
288 
289 	if (mmio)
290 		vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
291 
292 	/* Force the ap_list to be pruned */
293 	if (target_vcpu)
294 		kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
295 }
296 
297 /* Requires the irq to be locked already */
vgic_v3_compute_lr(struct kvm_vcpu * vcpu,struct vgic_irq * irq)298 static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
299 {
300 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
301 	u64 val = irq->intid;
302 	bool allow_pending = true, is_v2_sgi;
303 
304 	WARN_ON(irq->on_lr);
305 
306 	is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
307 		     model == KVM_DEV_TYPE_ARM_VGIC_V2);
308 
309 	if (irq->active) {
310 		val |= ICH_LR_ACTIVE_BIT;
311 		if (is_v2_sgi)
312 			val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT;
313 		if (vgic_irq_is_multi_sgi(irq)) {
314 			allow_pending = false;
315 			val |= ICH_LR_EOI;
316 		}
317 	}
318 
319 	if (irq->hw && !vgic_irq_needs_resampling(irq)) {
320 		val |= ICH_LR_HW;
321 		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
322 		/*
323 		 * Never set pending+active on a HW interrupt, as the
324 		 * pending state is kept at the physical distributor
325 		 * level.
326 		 */
327 		if (irq->active)
328 			allow_pending = false;
329 	} else {
330 		if (irq->config == VGIC_CONFIG_LEVEL) {
331 			val |= ICH_LR_EOI;
332 
333 			/*
334 			 * Software resampling doesn't work very well
335 			 * if we allow P+A, so let's not do that.
336 			 */
337 			if (irq->active)
338 				allow_pending = false;
339 		}
340 	}
341 
342 	if (allow_pending && irq_is_pending(irq)) {
343 		val |= ICH_LR_PENDING_BIT;
344 
345 		if (is_v2_sgi) {
346 			u32 src = ffs(irq->source);
347 
348 			if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
349 					   irq->intid))
350 				return 0;
351 
352 			val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
353 			if (irq->source & ~BIT(src - 1))
354 				val |= ICH_LR_EOI;
355 		}
356 	}
357 
358 	if (irq->group)
359 		val |= ICH_LR_GROUP;
360 
361 	val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
362 
363 	return val;
364 }
365 
vgic_v3_populate_lr(struct kvm_vcpu * vcpu,struct vgic_irq * irq,int lr)366 void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
367 {
368 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
369 	u64 val = vgic_v3_compute_lr(vcpu, irq);
370 
371 	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
372 
373 	if (val & ICH_LR_PENDING_BIT) {
374 		if (irq->config == VGIC_CONFIG_EDGE)
375 			irq->pending_latch = false;
376 
377 		if (vgic_irq_is_sgi(irq->intid) &&
378 		    model == KVM_DEV_TYPE_ARM_VGIC_V2) {
379 			u32 src = ffs(irq->source);
380 
381 			irq->source &= ~BIT(src - 1);
382 			if (irq->source)
383 				irq->pending_latch = true;
384 		}
385 	}
386 
387 	/*
388 	 * Level-triggered mapped IRQs are special because we only observe
389 	 * rising edges as input to the VGIC.  We therefore lower the line
390 	 * level here, so that we can take new virtual IRQs.  See
391 	 * vgic_v3_fold_lr_state for more info.
392 	 */
393 	if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
394 		irq->line_level = false;
395 
396 	irq->on_lr = true;
397 }
398 
vgic_v3_clear_lr(struct kvm_vcpu * vcpu,int lr)399 void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
400 {
401 	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0;
402 }
403 
vgic_v3_set_vmcr(struct kvm_vcpu * vcpu,struct vgic_vmcr * vmcrp)404 void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
405 {
406 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
407 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
408 	u32 vmcr;
409 
410 	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
411 		vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) &
412 			ICH_VMCR_ACK_CTL_MASK;
413 		vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) &
414 			ICH_VMCR_FIQ_EN_MASK;
415 	} else {
416 		/*
417 		 * When emulating GICv3 on GICv3 with SRE=1 on the
418 		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
419 		 */
420 		vmcr = ICH_VMCR_FIQ_EN_MASK;
421 	}
422 
423 	vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK;
424 	vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK;
425 	vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
426 	vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
427 	vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
428 	vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK;
429 	vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK;
430 
431 	cpu_if->vgic_vmcr = vmcr;
432 }
433 
vgic_v3_get_vmcr(struct kvm_vcpu * vcpu,struct vgic_vmcr * vmcrp)434 void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
435 {
436 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
437 	u32 model = vcpu->kvm->arch.vgic.vgic_model;
438 	u32 vmcr;
439 
440 	vmcr = cpu_if->vgic_vmcr;
441 
442 	if (model == KVM_DEV_TYPE_ARM_VGIC_V2) {
443 		vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >>
444 			ICH_VMCR_ACK_CTL_SHIFT;
445 		vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >>
446 			ICH_VMCR_FIQ_EN_SHIFT;
447 	} else {
448 		/*
449 		 * When emulating GICv3 on GICv3 with SRE=1 on the
450 		 * VFIQEn bit is RES1 and the VAckCtl bit is RES0.
451 		 */
452 		vmcrp->fiqen = 1;
453 		vmcrp->ackctl = 0;
454 	}
455 
456 	vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT;
457 	vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT;
458 	vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
459 	vmcrp->bpr  = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
460 	vmcrp->pmr  = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
461 	vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT;
462 	vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT;
463 }
464 
465 #define INITIAL_PENDBASER_VALUE						  \
466 	(GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)		| \
467 	GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner)	| \
468 	GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable))
469 
vgic_v3_reset(struct kvm_vcpu * vcpu)470 void vgic_v3_reset(struct kvm_vcpu *vcpu)
471 {
472 	struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
473 
474 	/*
475 	 * By forcing VMCR to zero, the GIC will restore the binary
476 	 * points to their reset values. Anything else resets to zero
477 	 * anyway.
478 	 */
479 	vgic_v3->vgic_vmcr = 0;
480 
481 	/*
482 	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
483 	 * way, so we force SRE to 1 to demonstrate this to the guest.
484 	 * Also, we don't support any form of IRQ/FIQ bypass.
485 	 * This goes with the spec allowing the value to be RAO/WI.
486 	 */
487 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
488 		vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB |
489 				     ICC_SRE_EL1_DFB |
490 				     ICC_SRE_EL1_SRE);
491 		vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE;
492 	} else {
493 		vgic_v3->vgic_sre = 0;
494 	}
495 
496 	vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits,
497 						    kvm_vgic_global_state.ich_vtr_el2);
498 	vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
499 						     kvm_vgic_global_state.ich_vtr_el2) + 1;
500 }
501 
vcpu_set_ich_hcr(struct kvm_vcpu * vcpu)502 void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
503 {
504 	struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
505 
506 	if (!vgic_is_v3(vcpu->kvm))
507 		return;
508 
509 	/* Hide GICv3 sysreg if necessary */
510 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 ||
511 	    !irqchip_in_kernel(vcpu->kvm))
512 		vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
513 				      ICH_HCR_EL2_TC);
514 }
515 
vgic_v3_lpi_sync_pending_status(struct kvm * kvm,struct vgic_irq * irq)516 int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
517 {
518 	struct kvm_vcpu *vcpu;
519 	int byte_offset, bit_nr;
520 	gpa_t pendbase, ptr;
521 	bool status;
522 	u8 val;
523 	int ret;
524 	unsigned long flags;
525 
526 retry:
527 	vcpu = irq->target_vcpu;
528 	if (!vcpu)
529 		return 0;
530 
531 	pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
532 
533 	byte_offset = irq->intid / BITS_PER_BYTE;
534 	bit_nr = irq->intid % BITS_PER_BYTE;
535 	ptr = pendbase + byte_offset;
536 
537 	ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
538 	if (ret)
539 		return ret;
540 
541 	status = val & (1 << bit_nr);
542 
543 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
544 	if (irq->target_vcpu != vcpu) {
545 		raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
546 		goto retry;
547 	}
548 	irq->pending_latch = status;
549 	vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
550 
551 	if (status) {
552 		/* clear consumed data */
553 		val &= ~(1 << bit_nr);
554 		ret = vgic_write_guest_lock(kvm, ptr, &val, 1);
555 		if (ret)
556 			return ret;
557 	}
558 	return 0;
559 }
560 
561 /*
562  * The deactivation of the doorbell interrupt will trigger the
563  * unmapping of the associated vPE.
564  */
unmap_all_vpes(struct kvm * kvm)565 static void unmap_all_vpes(struct kvm *kvm)
566 {
567 	struct vgic_dist *dist = &kvm->arch.vgic;
568 	int i;
569 
570 	for (i = 0; i < dist->its_vm.nr_vpes; i++)
571 		free_irq(dist->its_vm.vpes[i]->irq, kvm_get_vcpu(kvm, i));
572 }
573 
map_all_vpes(struct kvm * kvm)574 static void map_all_vpes(struct kvm *kvm)
575 {
576 	struct vgic_dist *dist = &kvm->arch.vgic;
577 	int i;
578 
579 	for (i = 0; i < dist->its_vm.nr_vpes; i++)
580 		WARN_ON(vgic_v4_request_vpe_irq(kvm_get_vcpu(kvm, i),
581 						dist->its_vm.vpes[i]->irq));
582 }
583 
584 /*
585  * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
586  * kvm lock and all vcpu lock must be held
587  */
vgic_v3_save_pending_tables(struct kvm * kvm)588 int vgic_v3_save_pending_tables(struct kvm *kvm)
589 {
590 	struct vgic_dist *dist = &kvm->arch.vgic;
591 	struct vgic_irq *irq;
592 	gpa_t last_ptr = ~(gpa_t)0;
593 	bool vlpi_avail = false;
594 	unsigned long index;
595 	int ret = 0;
596 	u8 val;
597 
598 	if (unlikely(!vgic_initialized(kvm)))
599 		return -ENXIO;
600 
601 	/*
602 	 * A preparation for getting any VLPI states.
603 	 * The above vgic initialized check also ensures that the allocation
604 	 * and enabling of the doorbells have already been done.
605 	 */
606 	if (kvm_vgic_global_state.has_gicv4_1) {
607 		unmap_all_vpes(kvm);
608 		vlpi_avail = true;
609 	}
610 
611 	xa_for_each(&dist->lpi_xa, index, irq) {
612 		int byte_offset, bit_nr;
613 		struct kvm_vcpu *vcpu;
614 		gpa_t pendbase, ptr;
615 		bool is_pending;
616 		bool stored;
617 
618 		vcpu = irq->target_vcpu;
619 		if (!vcpu)
620 			continue;
621 
622 		pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
623 
624 		byte_offset = irq->intid / BITS_PER_BYTE;
625 		bit_nr = irq->intid % BITS_PER_BYTE;
626 		ptr = pendbase + byte_offset;
627 
628 		if (ptr != last_ptr) {
629 			ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
630 			if (ret)
631 				goto out;
632 			last_ptr = ptr;
633 		}
634 
635 		stored = val & (1U << bit_nr);
636 
637 		is_pending = irq->pending_latch;
638 
639 		if (irq->hw && vlpi_avail)
640 			vgic_v4_get_vlpi_state(irq, &is_pending);
641 
642 		if (stored == is_pending)
643 			continue;
644 
645 		if (is_pending)
646 			val |= 1 << bit_nr;
647 		else
648 			val &= ~(1 << bit_nr);
649 
650 		ret = vgic_write_guest_lock(kvm, ptr, &val, 1);
651 		if (ret)
652 			goto out;
653 	}
654 
655 out:
656 	if (vlpi_avail)
657 		map_all_vpes(kvm);
658 
659 	return ret;
660 }
661 
662 /**
663  * vgic_v3_rdist_overlap - check if a region overlaps with any
664  * existing redistributor region
665  *
666  * @kvm: kvm handle
667  * @base: base of the region
668  * @size: size of region
669  *
670  * Return: true if there is an overlap
671  */
vgic_v3_rdist_overlap(struct kvm * kvm,gpa_t base,size_t size)672 bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size)
673 {
674 	struct vgic_dist *d = &kvm->arch.vgic;
675 	struct vgic_redist_region *rdreg;
676 
677 	list_for_each_entry(rdreg, &d->rd_regions, list) {
678 		if ((base + size > rdreg->base) &&
679 			(base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg)))
680 			return true;
681 	}
682 	return false;
683 }
684 
685 /*
686  * Check for overlapping regions and for regions crossing the end of memory
687  * for base addresses which have already been set.
688  */
vgic_v3_check_base(struct kvm * kvm)689 bool vgic_v3_check_base(struct kvm *kvm)
690 {
691 	struct vgic_dist *d = &kvm->arch.vgic;
692 	struct vgic_redist_region *rdreg;
693 
694 	if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) &&
695 	    d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
696 		return false;
697 
698 	list_for_each_entry(rdreg, &d->rd_regions, list) {
699 		size_t sz = vgic_v3_rd_region_size(kvm, rdreg);
700 
701 		if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF,
702 				       rdreg->base, SZ_64K, sz))
703 			return false;
704 	}
705 
706 	if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base))
707 		return true;
708 
709 	return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base,
710 				      KVM_VGIC_V3_DIST_SIZE);
711 }
712 
713 /**
714  * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one
715  * which has free space to put a new rdist region.
716  *
717  * @rd_regions: redistributor region list head
718  *
719  * A redistributor regions maps n redistributors, n = region size / (2 x 64kB).
720  * Stride between redistributors is 0 and regions are filled in the index order.
721  *
722  * Return: the redist region handle, if any, that has space to map a new rdist
723  * region.
724  */
vgic_v3_rdist_free_slot(struct list_head * rd_regions)725 struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions)
726 {
727 	struct vgic_redist_region *rdreg;
728 
729 	list_for_each_entry(rdreg, rd_regions, list) {
730 		if (!vgic_v3_redist_region_full(rdreg))
731 			return rdreg;
732 	}
733 	return NULL;
734 }
735 
vgic_v3_rdist_region_from_index(struct kvm * kvm,u32 index)736 struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
737 							   u32 index)
738 {
739 	struct list_head *rd_regions = &kvm->arch.vgic.rd_regions;
740 	struct vgic_redist_region *rdreg;
741 
742 	list_for_each_entry(rdreg, rd_regions, list) {
743 		if (rdreg->index == index)
744 			return rdreg;
745 	}
746 	return NULL;
747 }
748 
749 
vgic_v3_map_resources(struct kvm * kvm)750 int vgic_v3_map_resources(struct kvm *kvm)
751 {
752 	struct vgic_dist *dist = &kvm->arch.vgic;
753 	struct kvm_vcpu *vcpu;
754 	unsigned long c;
755 
756 	kvm_for_each_vcpu(c, vcpu, kvm) {
757 		struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
758 
759 		if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) {
760 			kvm_debug("vcpu %ld redistributor base not set\n", c);
761 			return -ENXIO;
762 		}
763 	}
764 
765 	if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) {
766 		kvm_debug("Need to set vgic distributor addresses first\n");
767 		return -ENXIO;
768 	}
769 
770 	if (!vgic_v3_check_base(kvm)) {
771 		kvm_debug("VGIC redist and dist frames overlap\n");
772 		return -EINVAL;
773 	}
774 
775 	/*
776 	 * For a VGICv3 we require the userland to explicitly initialize
777 	 * the VGIC before we need to use it.
778 	 */
779 	if (!vgic_initialized(kvm)) {
780 		return -EBUSY;
781 	}
782 
783 	if (kvm_vgic_global_state.has_gicv4_1)
784 		vgic_v4_configure_vsgis(kvm);
785 
786 	return 0;
787 }
788 
789 DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap);
790 DEFINE_STATIC_KEY_FALSE(vgic_v3_has_v2_compat);
791 
early_group0_trap_cfg(char * buf)792 static int __init early_group0_trap_cfg(char *buf)
793 {
794 	return kstrtobool(buf, &group0_trap);
795 }
796 early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg);
797 
early_group1_trap_cfg(char * buf)798 static int __init early_group1_trap_cfg(char *buf)
799 {
800 	return kstrtobool(buf, &group1_trap);
801 }
802 early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg);
803 
early_common_trap_cfg(char * buf)804 static int __init early_common_trap_cfg(char *buf)
805 {
806 	return kstrtobool(buf, &common_trap);
807 }
808 early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
809 
early_gicv4_enable(char * buf)810 static int __init early_gicv4_enable(char *buf)
811 {
812 	return kstrtobool(buf, &gicv4_enable);
813 }
814 early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
815 
816 static const struct midr_range broken_seis[] = {
817 	MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
818 	MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
819 	MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
820 	MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
821 	MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
822 	MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
823 	MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
824 	MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
825 	MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
826 	MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
827 	MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
828 	MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
829 	{},
830 };
831 
vgic_v3_broken_seis(void)832 static bool vgic_v3_broken_seis(void)
833 {
834 	return (is_kernel_in_hyp_mode() &&
835 		is_midr_in_range_list(broken_seis) &&
836 		(read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS));
837 }
838 
kvm_compute_ich_hcr_trap_bits(struct alt_instr * alt,__le32 * origptr,__le32 * updptr,int nr_inst)839 void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
840 					   __le32 *origptr, __le32 *updptr,
841 					   int nr_inst)
842 {
843 	u32 insn, oinsn, rd;
844 	u64 hcr = 0;
845 
846 	if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
847 		group0_trap = true;
848 		group1_trap = true;
849 	}
850 
851 	if (vgic_v3_broken_seis()) {
852 		/* We know that these machines have ICH_HCR_EL2.TDIR */
853 		group0_trap = true;
854 		group1_trap = true;
855 		dir_trap = true;
856 	}
857 
858 	if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR))
859 		common_trap = true;
860 
861 	if (group0_trap)
862 		hcr |= ICH_HCR_EL2_TALL0;
863 	if (group1_trap)
864 		hcr |= ICH_HCR_EL2_TALL1;
865 	if (common_trap)
866 		hcr |= ICH_HCR_EL2_TC;
867 	if (dir_trap)
868 		hcr |= ICH_HCR_EL2_TDIR;
869 
870 	/* Compute target register */
871 	oinsn = le32_to_cpu(*origptr);
872 	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
873 
874 	/* movz rd, #(val & 0xffff) */
875 	insn = aarch64_insn_gen_movewide(rd,
876 					 (u16)hcr,
877 					 0,
878 					 AARCH64_INSN_VARIANT_64BIT,
879 					 AARCH64_INSN_MOVEWIDE_ZERO);
880 	*updptr = cpu_to_le32(insn);
881 }
882 
883 /**
884  * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller
885  * @info:	pointer to the GIC description
886  *
887  * Returns 0 if the VGICv3 has been probed successfully, returns an error code
888  * otherwise
889  */
vgic_v3_probe(const struct gic_kvm_info * info)890 int vgic_v3_probe(const struct gic_kvm_info *info)
891 {
892 	u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
893 	bool has_v2;
894 	u64 traps;
895 	int ret;
896 
897 	has_v2 = ich_vtr_el2 >> 63;
898 	ich_vtr_el2 = (u32)ich_vtr_el2;
899 
900 	/*
901 	 * The ListRegs field is 5 bits, but there is an architectural
902 	 * maximum of 16 list registers. Just ignore bit 4...
903 	 */
904 	kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
905 	kvm_vgic_global_state.can_emulate_gicv2 = false;
906 	kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
907 
908 	/* GICv4 support? */
909 	if (info->has_v4) {
910 		kvm_vgic_global_state.has_gicv4 = gicv4_enable;
911 		kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable;
912 		kvm_info("GICv4%s support %s\n",
913 			 kvm_vgic_global_state.has_gicv4_1 ? ".1" : "",
914 			 str_enabled_disabled(gicv4_enable));
915 	}
916 
917 	kvm_vgic_global_state.vcpu_base = 0;
918 
919 	if (!info->vcpu.start) {
920 		kvm_info("GICv3: no GICV resource entry\n");
921 	} else if (!has_v2) {
922 		pr_warn(FW_BUG "CPU interface incapable of MMIO access\n");
923 	} else if (!PAGE_ALIGNED(info->vcpu.start)) {
924 		pr_warn("GICV physical address 0x%llx not page aligned\n",
925 			(unsigned long long)info->vcpu.start);
926 	} else if (kvm_get_mode() != KVM_MODE_PROTECTED) {
927 		kvm_vgic_global_state.vcpu_base = info->vcpu.start;
928 		kvm_vgic_global_state.can_emulate_gicv2 = true;
929 		ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
930 		if (ret) {
931 			kvm_err("Cannot register GICv2 KVM device.\n");
932 			return ret;
933 		}
934 		kvm_info("vgic-v2@%llx\n", info->vcpu.start);
935 	}
936 	ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
937 	if (ret) {
938 		kvm_err("Cannot register GICv3 KVM device.\n");
939 		kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2);
940 		return ret;
941 	}
942 
943 	if (kvm_vgic_global_state.vcpu_base == 0)
944 		kvm_info("disabling GICv2 emulation\n");
945 
946 	/*
947 	 * Flip the static branch if the HW supports v2, even if we're
948 	 * not using it (such as in protected mode).
949 	 */
950 	if (has_v2)
951 		static_branch_enable(&vgic_v3_has_v2_compat);
952 
953 	if (vgic_v3_broken_seis()) {
954 		kvm_info("GICv3 with broken locally generated SEI\n");
955 		kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
956 	}
957 
958 	traps = vgic_ich_hcr_trap_bits();
959 	if (traps) {
960 		kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
961 			 (traps & ICH_HCR_EL2_TALL0) ? "G0" : "",
962 			 (traps & ICH_HCR_EL2_TALL1) ? "G1" : "",
963 			 (traps & ICH_HCR_EL2_TC)    ? "C"  : "",
964 			 (traps & ICH_HCR_EL2_TDIR)  ? "D"  : "");
965 		static_branch_enable(&vgic_v3_cpuif_trap);
966 	}
967 
968 	kvm_vgic_global_state.vctrl_base = NULL;
969 	kvm_vgic_global_state.type = VGIC_V3;
970 	kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
971 
972 	return 0;
973 }
974 
vgic_v3_load(struct kvm_vcpu * vcpu)975 void vgic_v3_load(struct kvm_vcpu *vcpu)
976 {
977 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
978 
979 	/* If the vgic is nested, perform the full state loading */
980 	if (vgic_state_is_nested(vcpu)) {
981 		vgic_v3_load_nested(vcpu);
982 		return;
983 	}
984 
985 	if (likely(!is_protected_kvm_enabled()))
986 		kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
987 
988 	if (has_vhe())
989 		__vgic_v3_activate_traps(cpu_if);
990 
991 	WARN_ON(vgic_v4_load(vcpu));
992 }
993 
vgic_v3_put(struct kvm_vcpu * vcpu)994 void vgic_v3_put(struct kvm_vcpu *vcpu)
995 {
996 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
997 
998 	if (vgic_state_is_nested(vcpu)) {
999 		vgic_v3_put_nested(vcpu);
1000 		return;
1001 	}
1002 
1003 	if (likely(!is_protected_kvm_enabled()))
1004 		kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
1005 	WARN_ON(vgic_v4_put(vcpu));
1006 
1007 	if (has_vhe())
1008 		__vgic_v3_deactivate_traps(cpu_if);
1009 }
1010