xref: /linux/arch/arm64/kvm/hyp/nvhe/hyp-main.c (revision c98d767b34574be82b74d77d02264a830ae1cadd)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2020 - Google Inc
4  * Author: Andrew Scull <ascull@google.com>
5  */
6 
7 #include <hyp/adjust_pc.h>
8 #include <hyp/switch.h>
9 
10 #include <asm/pgtable-types.h>
11 #include <asm/kvm_asm.h>
12 #include <asm/kvm_emulate.h>
13 #include <asm/kvm_host.h>
14 #include <asm/kvm_hyp.h>
15 #include <asm/kvm_hypevents.h>
16 #include <asm/kvm_mmu.h>
17 
18 #include <nvhe/ffa.h>
19 #include <nvhe/mem_protect.h>
20 #include <nvhe/mm.h>
21 #include <nvhe/pkvm.h>
22 #include <nvhe/trace.h>
23 #include <nvhe/trap_handler.h>
24 
25 DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
26 
27 /* Number of implemented GICv3 LRs. Used by flush_hyp_vcpu(). */
28 unsigned int hyp_gicv3_nr_lr;
29 
30 void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
31 
32 static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
33 {
34 	__vcpu_assign_sys_reg(vcpu, ZCR_EL1, read_sysreg_el1(SYS_ZCR));
35 	/*
36 	 * On saving/restoring guest sve state, always use the maximum VL for
37 	 * the guest. The layout of the data when saving the sve state depends
38 	 * on the VL, so use a consistent (i.e., the maximum) guest VL.
39 	 */
40 	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
41 	sve_save_state(kern_hyp_va(vcpu->arch.sve_state), true);
42 	fpsimd_save_common(&vcpu->arch.ctxt.fp_regs);
43 	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
44 }
45 
46 static void __hyp_sve_restore_host(void)
47 {
48 	struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
49 	struct arm64_sve_state *sve_regs = *host_data_ptr(sve_regs);
50 
51 	/*
52 	 * On saving/restoring host sve state, always use the maximum VL for
53 	 * the host. The layout of the data when saving the sve state depends
54 	 * on the VL, so use a consistent (i.e., the maximum) host VL.
55 	 *
56 	 * Note that this constrains the PE to the maximum shared VL
57 	 * that was discovered, if we wish to use larger VLs this will
58 	 * need to be revisited.
59 	 */
60 	write_sysreg_s(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, SYS_ZCR_EL2);
61 	sve_load_state(sve_regs, true);
62 	fpsimd_load_common(&hctxt->fp_regs);
63 	write_sysreg_el1(ctxt_sys_reg(hctxt, ZCR_EL1), SYS_ZCR);
64 }
65 
66 static void fpsimd_sve_flush(void)
67 {
68 	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
69 }
70 
71 static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
72 {
73 	struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
74 	bool has_fpmr;
75 
76 	if (!guest_owns_fp_regs())
77 		return;
78 
79 	/*
80 	 * Traps have been disabled by __deactivate_cptr_traps(), but there
81 	 * hasn't necessarily been a context synchronization event yet.
82 	 */
83 	isb();
84 
85 	if (vcpu_has_sve(vcpu))
86 		__hyp_sve_save_guest(vcpu);
87 	else
88 		fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
89 
90 	has_fpmr = kvm_has_fpmr(kern_hyp_va(vcpu->kvm));
91 	if (has_fpmr)
92 		__vcpu_assign_sys_reg(vcpu, FPMR, read_sysreg_s(SYS_FPMR));
93 
94 	if (system_supports_sve())
95 		__hyp_sve_restore_host();
96 	else
97 		fpsimd_load_state(&hctxt->fp_regs);
98 
99 	if (has_fpmr)
100 		write_sysreg_s(ctxt_sys_reg(hctxt, FPMR), SYS_FPMR);
101 
102 	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
103 }
104 
105 static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
106 {
107 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
108 
109 	hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner;
110 
111 	if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
112 		hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state;
113 	else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
114 		hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state;
115 }
116 
117 static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu)
118 {
119 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
120 
121 	if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu))
122 		host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state;
123 	else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu))
124 		host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state;
125 }
126 
127 static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
128 {
129 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
130 
131 	fpsimd_sve_flush();
132 	flush_debug_state(hyp_vcpu);
133 
134 	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;
135 
136 	/* __hyp_running_vcpu must be NULL in a guest context. */
137 	hyp_vcpu->vcpu.arch.ctxt.__hyp_running_vcpu = NULL;
138 
139 	hyp_vcpu->vcpu.arch.mdcr_el2	= host_vcpu->arch.mdcr_el2;
140 	/*
141 	 * HCR_EL2.VSE is host-owned (a pending virtual SError to inject), not a
142 	 * trap-control bit, so it must flow to the hyp vCPU alongside TWI/TWE
143 	 * for the vSError to be delivered. sync_hyp_vcpu() reflects it back.
144 	 */
145 	hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE | HCR_VSE);
146 	hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &
147 						 (HCR_TWI | HCR_TWE | HCR_VSE);
148 
149 	hyp_vcpu->vcpu.arch.iflags	= host_vcpu->arch.iflags;
150 
151 	hyp_vcpu->vcpu.arch.vsesr_el2	= host_vcpu->arch.vsesr_el2;
152 
153 	hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
154 
155 	/* Bound used_lrs by the number of implemented list registers. */
156 	hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3.used_lrs =
157 		min_t(unsigned int,
158 		      hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3.used_lrs,
159 		      hyp_gicv3_nr_lr);
160 
161 	hyp_vcpu->vcpu.arch.pid = host_vcpu->arch.pid;
162 }
163 
164 static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
165 {
166 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
167 	struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3;
168 	struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
169 	unsigned int i;
170 
171 	fpsimd_sve_sync(&hyp_vcpu->vcpu);
172 	sync_debug_state(hyp_vcpu);
173 
174 	host_vcpu->arch.ctxt		= hyp_vcpu->vcpu.arch.ctxt;
175 
176 	host_vcpu->arch.hcr_el2		= hyp_vcpu->vcpu.arch.hcr_el2;
177 
178 	host_vcpu->arch.fault		= hyp_vcpu->vcpu.arch.fault;
179 
180 	host_vcpu->arch.iflags		= hyp_vcpu->vcpu.arch.iflags;
181 
182 	host_cpu_if->vgic_hcr		= hyp_cpu_if->vgic_hcr;
183 	host_cpu_if->vgic_vmcr		= hyp_cpu_if->vgic_vmcr;
184 	for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
185 		host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
186 }
187 
188 static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt)
189 {
190 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
191 	DECLARE_REG(unsigned int, vcpu_idx, host_ctxt, 2);
192 	DECLARE_REG(u64, hcr_el2, host_ctxt, 3);
193 	struct pkvm_hyp_vcpu *hyp_vcpu;
194 
195 	hyp_vcpu = pkvm_load_hyp_vcpu(handle, vcpu_idx);
196 	if (!hyp_vcpu)
197 		return;
198 
199 	if (pkvm_hyp_vcpu_is_protected(hyp_vcpu)) {
200 		/* Propagate WFx trapping flags */
201 		hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI);
202 		hyp_vcpu->vcpu.arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI);
203 	} else {
204 		memcpy(&hyp_vcpu->vcpu.arch.fgt, hyp_vcpu->host_vcpu->arch.fgt,
205 		       sizeof(hyp_vcpu->vcpu.arch.fgt));
206 	}
207 }
208 
209 static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt)
210 {
211 	struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
212 
213 	if (hyp_vcpu)
214 		pkvm_put_hyp_vcpu(hyp_vcpu);
215 }
216 
217 static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
218 {
219 	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
220 	int ret;
221 
222 	if (unlikely(is_protected_kvm_enabled())) {
223 		struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
224 
225 		/*
226 		 * KVM (and pKVM) doesn't support SME guests for now, and
227 		 * ensures that SME features aren't enabled in pstate when
228 		 * loading a vcpu. Therefore, if SME features enabled the host
229 		 * is misbehaving.
230 		 */
231 		if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
232 			ret = -EINVAL;
233 			goto out;
234 		}
235 
236 		if (!hyp_vcpu) {
237 			ret = -EINVAL;
238 			goto out;
239 		}
240 
241 		flush_hyp_vcpu(hyp_vcpu);
242 
243 		ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
244 
245 		sync_hyp_vcpu(hyp_vcpu);
246 	} else {
247 		struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu);
248 
249 		/* The host is fully trusted, run its vCPU directly. */
250 		fpsimd_lazy_switch_to_guest(vcpu);
251 		ret = __kvm_vcpu_run(vcpu);
252 		fpsimd_lazy_switch_to_host(vcpu);
253 	}
254 out:
255 	cpu_reg(host_ctxt, 1) =  ret;
256 }
257 
258 static int pkvm_refill_memcache(struct pkvm_hyp_vcpu *hyp_vcpu)
259 {
260 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
261 
262 	return refill_memcache(&hyp_vcpu->vcpu.arch.pkvm_memcache,
263 			       host_vcpu->arch.pkvm_memcache.nr_pages,
264 			       &host_vcpu->arch.pkvm_memcache);
265 }
266 
267 static void handle___pkvm_host_donate_guest(struct kvm_cpu_context *host_ctxt)
268 {
269 	DECLARE_REG(u64, pfn, host_ctxt, 1);
270 	DECLARE_REG(u64, gfn, host_ctxt, 2);
271 	struct pkvm_hyp_vcpu *hyp_vcpu;
272 	int ret = -EINVAL;
273 
274 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
275 	if (!hyp_vcpu || !pkvm_hyp_vcpu_is_protected(hyp_vcpu))
276 		goto out;
277 
278 	ret = pkvm_refill_memcache(hyp_vcpu);
279 	if (ret)
280 		goto out;
281 
282 	ret = __pkvm_host_donate_guest(pfn, gfn, hyp_vcpu);
283 out:
284 	cpu_reg(host_ctxt, 1) =  ret;
285 }
286 
287 static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
288 {
289 	DECLARE_REG(u64, pfn, host_ctxt, 1);
290 	DECLARE_REG(u64, gfn, host_ctxt, 2);
291 	DECLARE_REG(u64, nr_pages, host_ctxt, 3);
292 	DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
293 	struct pkvm_hyp_vcpu *hyp_vcpu;
294 	int ret = -EINVAL;
295 
296 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
297 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
298 		goto out;
299 
300 	ret = pkvm_refill_memcache(hyp_vcpu);
301 	if (ret)
302 		goto out;
303 
304 	ret = __pkvm_host_share_guest(pfn, gfn, nr_pages, hyp_vcpu, prot);
305 out:
306 	cpu_reg(host_ctxt, 1) =  ret;
307 }
308 
309 static void handle___pkvm_host_unshare_guest(struct kvm_cpu_context *host_ctxt)
310 {
311 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
312 	DECLARE_REG(u64, gfn, host_ctxt, 2);
313 	DECLARE_REG(u64, nr_pages, host_ctxt, 3);
314 	struct pkvm_hyp_vm *hyp_vm;
315 	int ret = -EINVAL;
316 
317 	hyp_vm = get_np_pkvm_hyp_vm(handle);
318 	if (!hyp_vm)
319 		goto out;
320 
321 	ret = __pkvm_host_unshare_guest(gfn, nr_pages, hyp_vm);
322 	put_pkvm_hyp_vm(hyp_vm);
323 out:
324 	cpu_reg(host_ctxt, 1) =  ret;
325 }
326 
327 static void handle___pkvm_host_relax_perms_guest(struct kvm_cpu_context *host_ctxt)
328 {
329 	DECLARE_REG(u64, gfn, host_ctxt, 1);
330 	DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 2);
331 	struct pkvm_hyp_vcpu *hyp_vcpu;
332 	int ret = -EINVAL;
333 
334 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
335 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
336 		goto out;
337 
338 	ret = __pkvm_host_relax_perms_guest(gfn, hyp_vcpu, prot);
339 out:
340 	cpu_reg(host_ctxt, 1) = ret;
341 }
342 
343 static void handle___pkvm_host_wrprotect_guest(struct kvm_cpu_context *host_ctxt)
344 {
345 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
346 	DECLARE_REG(u64, gfn, host_ctxt, 2);
347 	DECLARE_REG(u64, nr_pages, host_ctxt, 3);
348 	struct pkvm_hyp_vm *hyp_vm;
349 	int ret = -EINVAL;
350 
351 	hyp_vm = get_np_pkvm_hyp_vm(handle);
352 	if (!hyp_vm)
353 		goto out;
354 
355 	ret = __pkvm_host_wrprotect_guest(gfn, nr_pages, hyp_vm);
356 	put_pkvm_hyp_vm(hyp_vm);
357 out:
358 	cpu_reg(host_ctxt, 1) = ret;
359 }
360 
361 static void handle___pkvm_host_test_clear_young_guest(struct kvm_cpu_context *host_ctxt)
362 {
363 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
364 	DECLARE_REG(u64, gfn, host_ctxt, 2);
365 	DECLARE_REG(u64, nr_pages, host_ctxt, 3);
366 	DECLARE_REG(bool, mkold, host_ctxt, 4);
367 	struct pkvm_hyp_vm *hyp_vm;
368 	int ret = -EINVAL;
369 
370 	hyp_vm = get_np_pkvm_hyp_vm(handle);
371 	if (!hyp_vm)
372 		goto out;
373 
374 	ret = __pkvm_host_test_clear_young_guest(gfn, nr_pages, mkold, hyp_vm);
375 	put_pkvm_hyp_vm(hyp_vm);
376 out:
377 	cpu_reg(host_ctxt, 1) = ret;
378 }
379 
380 static void handle___pkvm_host_mkyoung_guest(struct kvm_cpu_context *host_ctxt)
381 {
382 	DECLARE_REG(u64, gfn, host_ctxt, 1);
383 	struct pkvm_hyp_vcpu *hyp_vcpu;
384 	int ret = -EINVAL;
385 
386 	hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
387 	if (!hyp_vcpu || pkvm_hyp_vcpu_is_protected(hyp_vcpu))
388 		goto out;
389 
390 	ret = __pkvm_host_mkyoung_guest(gfn, hyp_vcpu);
391 out:
392 	cpu_reg(host_ctxt, 1) =  ret;
393 }
394 
395 static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
396 {
397 	DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
398 
399 	__kvm_adjust_pc(kern_hyp_va(vcpu));
400 }
401 
402 static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
403 {
404 	__kvm_flush_vm_context();
405 }
406 
407 static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
408 {
409 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
410 	DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
411 	DECLARE_REG(int, level, host_ctxt, 3);
412 
413 	__kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level);
414 }
415 
416 static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctxt)
417 {
418 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
419 	DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2);
420 	DECLARE_REG(int, level, host_ctxt, 3);
421 
422 	__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
423 }
424 
425 static void
426 handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
427 {
428 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
429 	DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
430 	DECLARE_REG(unsigned long, pages, host_ctxt, 3);
431 
432 	__kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
433 }
434 
435 static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
436 {
437 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
438 
439 	__kvm_tlb_flush_vmid(kern_hyp_va(mmu));
440 }
441 
442 static void handle___pkvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
443 {
444 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
445 	struct pkvm_hyp_vm *hyp_vm = get_np_pkvm_hyp_vm(handle);
446 
447 	if (!hyp_vm)
448 		return;
449 
450 	__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
451 	put_pkvm_hyp_vm(hyp_vm);
452 }
453 
454 static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
455 {
456 	DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
457 
458 	__kvm_flush_cpu_context(kern_hyp_va(mmu));
459 }
460 
461 static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
462 {
463 	__kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1));
464 }
465 
466 static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
467 {
468 	u64 tmp;
469 
470 	tmp = read_sysreg_el2(SYS_SCTLR);
471 	tmp |= SCTLR_ELx_DSSBS;
472 	write_sysreg_el2(tmp, SYS_SCTLR);
473 }
474 
475 static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
476 {
477 	cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
478 }
479 
480 static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
481 {
482 	__vgic_v3_init_lrs();
483 }
484 
485 static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
486 {
487 	DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
488 
489 	__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
490 }
491 
492 static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
493 {
494 	DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
495 
496 	__vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
497 }
498 
499 static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
500 {
501 	DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
502 	DECLARE_REG(unsigned long, size, host_ctxt, 2);
503 	DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 3);
504 	DECLARE_REG(u32, hyp_va_bits, host_ctxt, 4);
505 
506 	/*
507 	 * __pkvm_init() will return only if an error occurred, otherwise it
508 	 * will tail-call in __pkvm_init_finalise() which will have to deal
509 	 * with the host context directly.
510 	 */
511 	cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, per_cpu_base, hyp_va_bits);
512 }
513 
514 static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
515 {
516 	DECLARE_REG(enum arm64_hyp_spectre_vector, slot, host_ctxt, 1);
517 
518 	cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
519 }
520 
521 static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt)
522 {
523 	DECLARE_REG(u64, pfn, host_ctxt, 1);
524 
525 	cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn);
526 }
527 
528 static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt)
529 {
530 	DECLARE_REG(u64, pfn, host_ctxt, 1);
531 
532 	cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn);
533 }
534 
535 static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
536 {
537 	DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
538 	DECLARE_REG(size_t, size, host_ctxt, 2);
539 	DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
540 
541 	/*
542 	 * __pkvm_create_private_mapping() populates a pointer with the
543 	 * hypervisor start address of the allocation.
544 	 *
545 	 * However, handle___pkvm_create_private_mapping() hypercall crosses the
546 	 * EL1/EL2 boundary so the pointer would not be valid in this context.
547 	 *
548 	 * Instead pass the allocation address as the return value (or return
549 	 * ERR_PTR() on failure).
550 	 */
551 	unsigned long haddr;
552 	int err = __pkvm_create_private_mapping(phys, size, prot, &haddr);
553 
554 	if (err)
555 		haddr = (unsigned long)ERR_PTR(err);
556 
557 	cpu_reg(host_ctxt, 1) = haddr;
558 }
559 
560 static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
561 {
562 	cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
563 }
564 
565 static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt)
566 {
567 	cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm();
568 }
569 
570 static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt)
571 {
572 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
573 
574 	__pkvm_unreserve_vm(handle);
575 }
576 
577 static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
578 {
579 	DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
580 	DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2);
581 	DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3);
582 
583 	host_kvm = kern_hyp_va(host_kvm);
584 	cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva);
585 }
586 
587 static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
588 {
589 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
590 	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2);
591 	DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3);
592 
593 	host_vcpu = kern_hyp_va(host_vcpu);
594 	cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
595 }
596 
597 static void handle___pkvm_vcpu_in_poison_fault(struct kvm_cpu_context *host_ctxt)
598 {
599 	int ret;
600 	struct pkvm_hyp_vcpu *hyp_vcpu = pkvm_get_loaded_hyp_vcpu();
601 
602 	ret = hyp_vcpu ? __pkvm_vcpu_in_poison_fault(hyp_vcpu) : -EINVAL;
603 	cpu_reg(host_ctxt, 1) = ret;
604 }
605 
606 static void handle___pkvm_force_reclaim_guest_page(struct kvm_cpu_context *host_ctxt)
607 {
608 	DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
609 
610 	cpu_reg(host_ctxt, 1) = __pkvm_host_force_reclaim_page_guest(phys);
611 }
612 
613 static void handle___pkvm_reclaim_dying_guest_page(struct kvm_cpu_context *host_ctxt)
614 {
615 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
616 	DECLARE_REG(u64, gfn, host_ctxt, 2);
617 
618 	cpu_reg(host_ctxt, 1) = __pkvm_reclaim_dying_guest_page(handle, gfn);
619 }
620 
621 static void handle___pkvm_start_teardown_vm(struct kvm_cpu_context *host_ctxt)
622 {
623 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
624 
625 	cpu_reg(host_ctxt, 1) = __pkvm_start_teardown_vm(handle);
626 }
627 
628 static void handle___pkvm_finalize_teardown_vm(struct kvm_cpu_context *host_ctxt)
629 {
630 	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
631 
632 	cpu_reg(host_ctxt, 1) = __pkvm_finalize_teardown_vm(handle);
633 }
634 
635 static void handle___tracing_load(struct kvm_cpu_context *host_ctxt)
636 {
637 	DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1);
638 	DECLARE_REG(size_t, desc_size, host_ctxt, 2);
639 
640 	cpu_reg(host_ctxt, 1) = __tracing_load(desc_hva, desc_size);
641 }
642 
643 static void handle___tracing_unload(struct kvm_cpu_context *host_ctxt)
644 {
645 	__tracing_unload();
646 }
647 
648 static void handle___tracing_enable(struct kvm_cpu_context *host_ctxt)
649 {
650 	DECLARE_REG(bool, enable, host_ctxt, 1);
651 
652 	cpu_reg(host_ctxt, 1) = __tracing_enable(enable);
653 }
654 
655 static void handle___tracing_swap_reader(struct kvm_cpu_context *host_ctxt)
656 {
657 	DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
658 
659 	cpu_reg(host_ctxt, 1) = __tracing_swap_reader(cpu);
660 }
661 
662 static void handle___tracing_update_clock(struct kvm_cpu_context *host_ctxt)
663 {
664 	DECLARE_REG(u32, mult, host_ctxt, 1);
665 	DECLARE_REG(u32, shift, host_ctxt, 2);
666 	DECLARE_REG(u64, epoch_ns, host_ctxt, 3);
667 	DECLARE_REG(u64, epoch_cyc, host_ctxt, 4);
668 
669 	__tracing_update_clock(mult, shift, epoch_ns, epoch_cyc);
670 }
671 
672 static void handle___tracing_reset(struct kvm_cpu_context *host_ctxt)
673 {
674 	DECLARE_REG(unsigned int, cpu, host_ctxt, 1);
675 
676 	cpu_reg(host_ctxt, 1) = __tracing_reset(cpu);
677 }
678 
679 static void handle___tracing_enable_event(struct kvm_cpu_context *host_ctxt)
680 {
681 	DECLARE_REG(unsigned short, id, host_ctxt, 1);
682 	DECLARE_REG(bool, enable, host_ctxt, 2);
683 
684 	cpu_reg(host_ctxt, 1) = __tracing_enable_event(id, enable);
685 }
686 
687 static void handle___tracing_write_event(struct kvm_cpu_context *host_ctxt)
688 {
689 	DECLARE_REG(u64, id, host_ctxt, 1);
690 
691 	trace_selftest(id);
692 }
693 
694 static void handle___vgic_v5_save_apr(struct kvm_cpu_context *host_ctxt)
695 {
696 	DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
697 
698 	__vgic_v5_save_apr(kern_hyp_va(cpu_if));
699 }
700 
701 static void handle___vgic_v5_restore_vmcr_apr(struct kvm_cpu_context *host_ctxt)
702 {
703 	DECLARE_REG(struct vgic_v5_cpu_if *, cpu_if, host_ctxt, 1);
704 
705 	__vgic_v5_restore_vmcr_apr(kern_hyp_va(cpu_if));
706 }
707 
708 typedef void (*hcall_t)(struct kvm_cpu_context *);
709 
710 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
711 
712 static const hcall_t host_hcall[] = {
713 	/* ___kvm_hyp_init */
714 	HANDLE_FUNC(__pkvm_init),
715 	HANDLE_FUNC(__pkvm_create_private_mapping),
716 	HANDLE_FUNC(__pkvm_cpu_set_vector),
717 	HANDLE_FUNC(__kvm_enable_ssbs),
718 	HANDLE_FUNC(__vgic_v3_init_lrs),
719 	HANDLE_FUNC(__vgic_v3_get_gic_config),
720 	HANDLE_FUNC(__pkvm_prot_finalize),
721 
722 	HANDLE_FUNC(__kvm_adjust_pc),
723 	HANDLE_FUNC(__kvm_vcpu_run),
724 	HANDLE_FUNC(__kvm_flush_vm_context),
725 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
726 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
727 	HANDLE_FUNC(__kvm_tlb_flush_vmid),
728 	HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
729 	HANDLE_FUNC(__kvm_flush_cpu_context),
730 	HANDLE_FUNC(__kvm_timer_set_cntvoff),
731 	HANDLE_FUNC(__tracing_load),
732 	HANDLE_FUNC(__tracing_unload),
733 	HANDLE_FUNC(__tracing_enable),
734 	HANDLE_FUNC(__tracing_swap_reader),
735 	HANDLE_FUNC(__tracing_update_clock),
736 	HANDLE_FUNC(__tracing_reset),
737 	HANDLE_FUNC(__tracing_enable_event),
738 	HANDLE_FUNC(__tracing_write_event),
739 	HANDLE_FUNC(__vgic_v3_save_aprs),
740 	HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
741 	HANDLE_FUNC(__vgic_v5_save_apr),
742 	HANDLE_FUNC(__vgic_v5_restore_vmcr_apr),
743 
744 	HANDLE_FUNC(__pkvm_host_share_hyp),
745 	HANDLE_FUNC(__pkvm_host_unshare_hyp),
746 	HANDLE_FUNC(__pkvm_host_donate_guest),
747 	HANDLE_FUNC(__pkvm_host_share_guest),
748 	HANDLE_FUNC(__pkvm_host_unshare_guest),
749 	HANDLE_FUNC(__pkvm_host_relax_perms_guest),
750 	HANDLE_FUNC(__pkvm_host_wrprotect_guest),
751 	HANDLE_FUNC(__pkvm_host_test_clear_young_guest),
752 	HANDLE_FUNC(__pkvm_host_mkyoung_guest),
753 	HANDLE_FUNC(__pkvm_reserve_vm),
754 	HANDLE_FUNC(__pkvm_unreserve_vm),
755 	HANDLE_FUNC(__pkvm_init_vm),
756 	HANDLE_FUNC(__pkvm_init_vcpu),
757 	HANDLE_FUNC(__pkvm_vcpu_in_poison_fault),
758 	HANDLE_FUNC(__pkvm_force_reclaim_guest_page),
759 	HANDLE_FUNC(__pkvm_reclaim_dying_guest_page),
760 	HANDLE_FUNC(__pkvm_start_teardown_vm),
761 	HANDLE_FUNC(__pkvm_finalize_teardown_vm),
762 	HANDLE_FUNC(__pkvm_vcpu_load),
763 	HANDLE_FUNC(__pkvm_vcpu_put),
764 	HANDLE_FUNC(__pkvm_tlb_flush_vmid),
765 };
766 
767 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
768 {
769 	DECLARE_REG(unsigned long, id, host_ctxt, 0);
770 	unsigned long hcall_min = 0, hcall_max = __KVM_HOST_SMCCC_FUNC_MAX;
771 	hcall_t hfn;
772 
773 	BUILD_BUG_ON(ARRAY_SIZE(host_hcall) != __KVM_HOST_SMCCC_FUNC_MAX);
774 
775 	/*
776 	 * If pKVM has been initialised then reject any calls to the
777 	 * early "privileged" hypercalls. Note that we cannot reject
778 	 * calls to __pkvm_prot_finalize for two reasons: (1) The static
779 	 * key used to determine initialisation must be toggled prior to
780 	 * finalisation and (2) finalisation is performed on a per-CPU
781 	 * basis. This is all fine, however, since __pkvm_prot_finalize
782 	 * returns -EPERM after the first call for a given CPU.
783 	 */
784 	if (static_branch_unlikely(&kvm_protected_mode_initialized)) {
785 		hcall_min = __KVM_HOST_SMCCC_FUNC_MIN_PKVM;
786 	} else {
787 		hcall_max = __KVM_HOST_SMCCC_FUNC_PKVM_ONLY;
788 	}
789 
790 	id &= ~ARM_SMCCC_CALL_HINTS;
791 	id -= KVM_HOST_SMCCC_ID(0);
792 
793 	if (unlikely(id < hcall_min || id >= hcall_max))
794 		goto inval;
795 
796 	hfn = host_hcall[id];
797 	if (unlikely(!hfn))
798 		goto inval;
799 
800 	cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS;
801 	hfn(host_ctxt);
802 
803 	return;
804 inval:
805 	cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
806 }
807 
808 static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
809 {
810 	trace_hyp_exit(host_ctxt, HYP_REASON_SMC);
811 	__kvm_hyp_host_forward_smc(host_ctxt);
812 	trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
813 }
814 
815 static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
816 {
817 	DECLARE_REG(u64, func_id, host_ctxt, 0);
818 	u64 esr = read_sysreg_el2(SYS_ESR);
819 	bool handled;
820 
821 	if (esr & ESR_ELx_xVC_IMM_MASK) {
822 		cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
823 		goto exit_skip_instr;
824 	}
825 
826 	func_id &= ~ARM_SMCCC_CALL_HINTS;
827 	if (upper_32_bits(func_id)) {
828 		cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED;
829 		goto exit_skip_instr;
830 	}
831 
832 	handled = kvm_host_psci_handler(host_ctxt, func_id);
833 	if (!handled)
834 		handled = kvm_host_ffa_handler(host_ctxt, func_id);
835 	if (!handled)
836 		default_host_smc_handler(host_ctxt);
837 
838 exit_skip_instr:
839 	/* SMC was trapped, move ELR past the current PC. */
840 	kvm_skip_host_instr();
841 }
842 
843 void inject_host_exception(u64 esr)
844 {
845 	u64 sctlr, spsr_el1, spsr_el2, exc_offset = except_type_sync;
846 	const u64 spsr_mask = PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT |
847 			      PSR_V_BIT | PSR_DIT_BIT | PSR_PAN_BIT;
848 
849 	spsr_el1 = spsr_el2 = read_sysreg_el2(SYS_SPSR);
850 	switch (spsr_el1 & (PSR_MODE_MASK | PSR_MODE32_BIT)) {
851 	case PSR_MODE_EL0t:
852 		exc_offset += LOWER_EL_AArch64_VECTOR;
853 		break;
854 	case PSR_MODE_EL0t | PSR_MODE32_BIT:
855 		exc_offset += LOWER_EL_AArch32_VECTOR;
856 		break;
857 	default:
858 		exc_offset += CURRENT_EL_SP_ELx_VECTOR;
859 	}
860 
861 	spsr_el2 &= spsr_mask;
862 	spsr_el2 |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT |
863 		    PSR_MODE_EL1h;
864 
865 	sctlr = read_sysreg_el1(SYS_SCTLR);
866 	if (!(sctlr & SCTLR_EL1_SPAN))
867 		spsr_el2 |= PSR_PAN_BIT;
868 
869 	if (sctlr & SCTLR_ELx_DSSBS)
870 		spsr_el2 |= PSR_SSBS_BIT;
871 
872 	if (system_supports_mte())
873 		spsr_el2 |= PSR_TCO_BIT;
874 
875 	if (esr_fsc_is_translation_fault(esr))
876 		write_sysreg_el1(read_sysreg_el2(SYS_FAR), SYS_FAR);
877 
878 	write_sysreg_el1(esr, SYS_ESR);
879 	write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR);
880 	write_sysreg_el1(spsr_el1, SYS_SPSR);
881 	write_sysreg_el2(read_sysreg_el1(SYS_VBAR) + exc_offset, SYS_ELR);
882 	write_sysreg_el2(spsr_el2, SYS_SPSR);
883 }
884 
885 static void inject_host_undef64(void)
886 {
887 	inject_host_exception((ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT) |
888 			       ESR_ELx_IL);
889 }
890 
891 static bool handle_host_mte(u64 esr)
892 {
893 	switch (esr_sys64_to_sysreg(esr)) {
894 	case SYS_RGSR_EL1:
895 	case SYS_GCR_EL1:
896 	case SYS_TFSR_EL1:
897 	case SYS_TFSRE0_EL1:
898 		/* If we're here for any reason other than MTE, it's a bug. */
899 		if (read_sysreg(HCR_EL2) & HCR_ATA)
900 			return false;
901 		break;
902 	case SYS_GMID_EL1:
903 		/* If we're here for any reason other than MTE, it's a bug. */
904 		if (!(read_sysreg(HCR_EL2) & HCR_TID5))
905 			return false;
906 		break;
907 	default:
908 		return false;
909 	}
910 
911 	inject_host_undef64();
912 	return true;
913 }
914 
915 void handle_trap(struct kvm_cpu_context *host_ctxt)
916 {
917 	u64 esr = read_sysreg_el2(SYS_ESR);
918 
919 
920 	switch (ESR_ELx_EC(esr)) {
921 	case ESR_ELx_EC_HVC64:
922 		trace_hyp_enter(host_ctxt, HYP_REASON_HVC);
923 		handle_host_hcall(host_ctxt);
924 		break;
925 	case ESR_ELx_EC_SMC64:
926 		trace_hyp_enter(host_ctxt, HYP_REASON_SMC);
927 		handle_host_smc(host_ctxt);
928 		break;
929 	case ESR_ELx_EC_IABT_LOW:
930 	case ESR_ELx_EC_DABT_LOW:
931 		trace_hyp_enter(host_ctxt, HYP_REASON_HOST_ABORT);
932 		handle_host_mem_abort(host_ctxt);
933 		break;
934 	case ESR_ELx_EC_SYS64:
935 		if (handle_host_mte(esr))
936 			break;
937 		fallthrough;
938 	default:
939 		BUG();
940 	}
941 
942 	trace_hyp_exit(host_ctxt, HYP_REASON_ERET_HOST);
943 }
944