xref: /linux/arch/arm64/kvm/at.c (revision 1623bc27a85a93e82194c8d077eccc464efa67db)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 - Linaro Ltd
4  * Author: Jintack Lim <jintack.lim@linaro.org>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 
13 enum trans_regime {
14 	TR_EL10,
15 	TR_EL20,
16 	TR_EL2,
17 };
18 
19 struct s1_walk_info {
20 	u64	     		baddr;
21 	enum trans_regime	regime;
22 	unsigned int		max_oa_bits;
23 	unsigned int		pgshift;
24 	unsigned int		txsz;
25 	int 	     		sl;
26 	bool	     		hpd;
27 	bool			e0poe;
28 	bool			poe;
29 	bool			pan;
30 	bool	     		be;
31 	bool	     		s2;
32 };
33 
34 struct s1_walk_result {
35 	union {
36 		struct {
37 			u64	desc;
38 			u64	pa;
39 			s8	level;
40 			u8	APTable;
41 			bool	UXNTable;
42 			bool	PXNTable;
43 			bool	uwxn;
44 			bool	uov;
45 			bool	ur;
46 			bool	uw;
47 			bool	ux;
48 			bool	pwxn;
49 			bool	pov;
50 			bool	pr;
51 			bool	pw;
52 			bool	px;
53 		};
54 		struct {
55 			u8	fst;
56 			bool	ptw;
57 			bool	s2;
58 		};
59 	};
60 	bool	failed;
61 };
62 
63 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
64 {
65 	wr->fst		= fst;
66 	wr->ptw		= ptw;
67 	wr->s2		= s2;
68 	wr->failed	= true;
69 }
70 
71 #define S1_MMU_DISABLED		(-127)
72 
73 static int get_ia_size(struct s1_walk_info *wi)
74 {
75 	return 64 - wi->txsz;
76 }
77 
78 /* Return true if the IPA is out of the OA range */
79 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
80 {
81 	return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
82 }
83 
84 /* Return the translation regime that applies to an AT instruction */
85 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
86 {
87 	/*
88 	 * We only get here from guest EL2, so the translation
89 	 * regime AT applies to is solely defined by {E2H,TGE}.
90 	 */
91 	switch (op) {
92 	case OP_AT_S1E2R:
93 	case OP_AT_S1E2W:
94 	case OP_AT_S1E2A:
95 		return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
96 		break;
97 	default:
98 		return (vcpu_el2_e2h_is_set(vcpu) &&
99 			vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
100 	}
101 }
102 
103 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
104 {
105 	if (!kvm_has_s1pie(vcpu->kvm))
106 		return false;
107 
108 	switch (regime) {
109 	case TR_EL2:
110 	case TR_EL20:
111 		return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
112 	case TR_EL10:
113 		return  (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
114 			(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
115 	default:
116 		BUG();
117 	}
118 }
119 
120 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
121 {
122 	u64 val;
123 
124 	if (!kvm_has_s1poe(vcpu->kvm)) {
125 		wi->poe = wi->e0poe = false;
126 		return;
127 	}
128 
129 	switch (wi->regime) {
130 	case TR_EL2:
131 	case TR_EL20:
132 		val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
133 		wi->poe = val & TCR2_EL2_POE;
134 		wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
135 		break;
136 	case TR_EL10:
137 		if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
138 			wi->poe = wi->e0poe = false;
139 			return;
140 		}
141 
142 		val = __vcpu_sys_reg(vcpu, TCR2_EL1);
143 		wi->poe = val & TCR2_EL1x_POE;
144 		wi->e0poe = val & TCR2_EL1x_E0POE;
145 	}
146 }
147 
148 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
149 			 struct s1_walk_result *wr, u64 va)
150 {
151 	u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
152 	unsigned int stride, x;
153 	bool va55, tbi, lva, as_el0;
154 
155 	hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
156 
157 	wi->regime = compute_translation_regime(vcpu, op);
158 	as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
159 	wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
160 		  (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
161 
162 	va55 = va & BIT(55);
163 
164 	if (wi->regime == TR_EL2 && va55)
165 		goto addrsz;
166 
167 	wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
168 
169 	switch (wi->regime) {
170 	case TR_EL10:
171 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL1);
172 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL1);
173 		ttbr	= (va55 ?
174 			   vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
175 			   vcpu_read_sys_reg(vcpu, TTBR0_EL1));
176 		break;
177 	case TR_EL2:
178 	case TR_EL20:
179 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL2);
180 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL2);
181 		ttbr	= (va55 ?
182 			   vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
183 			   vcpu_read_sys_reg(vcpu, TTBR0_EL2));
184 		break;
185 	default:
186 		BUG();
187 	}
188 
189 	tbi = (wi->regime == TR_EL2 ?
190 	       FIELD_GET(TCR_EL2_TBI, tcr) :
191 	       (va55 ?
192 		FIELD_GET(TCR_TBI1, tcr) :
193 		FIELD_GET(TCR_TBI0, tcr)));
194 
195 	if (!tbi && (u64)sign_extend64(va, 55) != va)
196 		goto addrsz;
197 
198 	va = (u64)sign_extend64(va, 55);
199 
200 	/* Let's put the MMU disabled case aside immediately */
201 	switch (wi->regime) {
202 	case TR_EL10:
203 		/*
204 		 * If dealing with the EL1&0 translation regime, 3 things
205 		 * can disable the S1 translation:
206 		 *
207 		 * - HCR_EL2.DC = 1
208 		 * - HCR_EL2.{E2H,TGE} = {0,1}
209 		 * - SCTLR_EL1.M = 0
210 		 *
211 		 * The TGE part is interesting. If we have decided that this
212 		 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
213 		 * {0,x}, and we only need to test for TGE == 1.
214 		 */
215 		if (hcr & (HCR_DC | HCR_TGE)) {
216 			wr->level = S1_MMU_DISABLED;
217 			break;
218 		}
219 		fallthrough;
220 	case TR_EL2:
221 	case TR_EL20:
222 		if (!(sctlr & SCTLR_ELx_M))
223 			wr->level = S1_MMU_DISABLED;
224 		break;
225 	}
226 
227 	if (wr->level == S1_MMU_DISABLED) {
228 		if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
229 			goto addrsz;
230 
231 		wr->pa = va;
232 		return 0;
233 	}
234 
235 	wi->be = sctlr & SCTLR_ELx_EE;
236 
237 	wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
238 	wi->hpd &= (wi->regime == TR_EL2 ?
239 		    FIELD_GET(TCR_EL2_HPD, tcr) :
240 		    (va55 ?
241 		     FIELD_GET(TCR_HPD1, tcr) :
242 		     FIELD_GET(TCR_HPD0, tcr)));
243 	/* R_JHSVW */
244 	wi->hpd |= s1pie_enabled(vcpu, wi->regime);
245 
246 	/* Do we have POE? */
247 	compute_s1poe(vcpu, wi);
248 
249 	/* R_BVXDG */
250 	wi->hpd |= (wi->poe || wi->e0poe);
251 
252 	/* Someone was silly enough to encode TG0/TG1 differently */
253 	if (va55) {
254 		wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
255 		tg = FIELD_GET(TCR_TG1_MASK, tcr);
256 
257 		switch (tg << TCR_TG1_SHIFT) {
258 		case TCR_TG1_4K:
259 			wi->pgshift = 12;	 break;
260 		case TCR_TG1_16K:
261 			wi->pgshift = 14;	 break;
262 		case TCR_TG1_64K:
263 		default:	    /* IMPDEF: treat any other value as 64k */
264 			wi->pgshift = 16;	 break;
265 		}
266 	} else {
267 		wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
268 		tg = FIELD_GET(TCR_TG0_MASK, tcr);
269 
270 		switch (tg << TCR_TG0_SHIFT) {
271 		case TCR_TG0_4K:
272 			wi->pgshift = 12;	 break;
273 		case TCR_TG0_16K:
274 			wi->pgshift = 14;	 break;
275 		case TCR_TG0_64K:
276 		default:	    /* IMPDEF: treat any other value as 64k */
277 			wi->pgshift = 16;	 break;
278 		}
279 	}
280 
281 	/* R_PLCGL, R_YXNYW */
282 	if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
283 		if (wi->txsz > 39)
284 			goto transfault_l0;
285 	} else {
286 		if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
287 			goto transfault_l0;
288 	}
289 
290 	/* R_GTJBY, R_SXWGM */
291 	switch (BIT(wi->pgshift)) {
292 	case SZ_4K:
293 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
294 		lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
295 		break;
296 	case SZ_16K:
297 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
298 		lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
299 		break;
300 	case SZ_64K:
301 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
302 		break;
303 	}
304 
305 	if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
306 		goto transfault_l0;
307 
308 	ia_bits = get_ia_size(wi);
309 
310 	/* R_YYVYV, I_THCZK */
311 	if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
312 	    (va55 && va < GENMASK(63, ia_bits)))
313 		goto transfault_l0;
314 
315 	/* I_ZFSYQ */
316 	if (wi->regime != TR_EL2 &&
317 	    (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
318 		goto transfault_l0;
319 
320 	/* R_BNDVG and following statements */
321 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
322 	    as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
323 		goto transfault_l0;
324 
325 	/* AArch64.S1StartLevel() */
326 	stride = wi->pgshift - 3;
327 	wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
328 
329 	ps = (wi->regime == TR_EL2 ?
330 	      FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
331 
332 	wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
333 
334 	/* Compute minimal alignment */
335 	x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
336 
337 	wi->baddr = ttbr & TTBRx_EL1_BADDR;
338 
339 	/* R_VPBBF */
340 	if (check_output_size(wi->baddr, wi))
341 		goto addrsz;
342 
343 	wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
344 
345 	return 0;
346 
347 addrsz:				/* Address Size Fault level 0 */
348 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
349 	return -EFAULT;
350 
351 transfault_l0:			/* Translation Fault level 0 */
352 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
353 	return -EFAULT;
354 }
355 
356 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
357 		   struct s1_walk_result *wr, u64 va)
358 {
359 	u64 va_top, va_bottom, baddr, desc;
360 	int level, stride, ret;
361 
362 	level = wi->sl;
363 	stride = wi->pgshift - 3;
364 	baddr = wi->baddr;
365 
366 	va_top = get_ia_size(wi) - 1;
367 
368 	while (1) {
369 		u64 index, ipa;
370 
371 		va_bottom = (3 - level) * stride + wi->pgshift;
372 		index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
373 
374 		ipa = baddr | index;
375 
376 		if (wi->s2) {
377 			struct kvm_s2_trans s2_trans = {};
378 
379 			ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
380 			if (ret) {
381 				fail_s1_walk(wr,
382 					     (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
383 					     true, true);
384 				return ret;
385 			}
386 
387 			if (!kvm_s2_trans_readable(&s2_trans)) {
388 				fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
389 					     true, true);
390 
391 				return -EPERM;
392 			}
393 
394 			ipa = kvm_s2_trans_output(&s2_trans);
395 		}
396 
397 		ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
398 		if (ret) {
399 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
400 				     true, false);
401 			return ret;
402 		}
403 
404 		if (wi->be)
405 			desc = be64_to_cpu((__force __be64)desc);
406 		else
407 			desc = le64_to_cpu((__force __le64)desc);
408 
409 		/* Invalid descriptor */
410 		if (!(desc & BIT(0)))
411 			goto transfault;
412 
413 		/* Block mapping, check validity down the line */
414 		if (!(desc & BIT(1)))
415 			break;
416 
417 		/* Page mapping */
418 		if (level == 3)
419 			break;
420 
421 		/* Table handling */
422 		if (!wi->hpd) {
423 			wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
424 			wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
425 			wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
426 		}
427 
428 		baddr = desc & GENMASK_ULL(47, wi->pgshift);
429 
430 		/* Check for out-of-range OA */
431 		if (check_output_size(baddr, wi))
432 			goto addrsz;
433 
434 		/* Prepare for next round */
435 		va_top = va_bottom - 1;
436 		level++;
437 	}
438 
439 	/* Block mapping, check the validity of the level */
440 	if (!(desc & BIT(1))) {
441 		bool valid_block = false;
442 
443 		switch (BIT(wi->pgshift)) {
444 		case SZ_4K:
445 			valid_block = level == 1 || level == 2;
446 			break;
447 		case SZ_16K:
448 		case SZ_64K:
449 			valid_block = level == 2;
450 			break;
451 		}
452 
453 		if (!valid_block)
454 			goto transfault;
455 	}
456 
457 	if (check_output_size(desc & GENMASK(47, va_bottom), wi))
458 		goto addrsz;
459 
460 	va_bottom += contiguous_bit_shift(desc, wi, level);
461 
462 	wr->failed = false;
463 	wr->level = level;
464 	wr->desc = desc;
465 	wr->pa = desc & GENMASK(47, va_bottom);
466 	wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
467 
468 	return 0;
469 
470 addrsz:
471 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
472 	return -EINVAL;
473 transfault:
474 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
475 	return -ENOENT;
476 }
477 
478 struct mmu_config {
479 	u64	ttbr0;
480 	u64	ttbr1;
481 	u64	tcr;
482 	u64	mair;
483 	u64	tcr2;
484 	u64	pir;
485 	u64	pire0;
486 	u64	por_el0;
487 	u64	por_el1;
488 	u64	sctlr;
489 	u64	vttbr;
490 	u64	vtcr;
491 	u64	hcr;
492 };
493 
494 static void __mmu_config_save(struct mmu_config *config)
495 {
496 	config->ttbr0	= read_sysreg_el1(SYS_TTBR0);
497 	config->ttbr1	= read_sysreg_el1(SYS_TTBR1);
498 	config->tcr	= read_sysreg_el1(SYS_TCR);
499 	config->mair	= read_sysreg_el1(SYS_MAIR);
500 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
501 		config->tcr2	= read_sysreg_el1(SYS_TCR2);
502 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
503 			config->pir	= read_sysreg_el1(SYS_PIR);
504 			config->pire0	= read_sysreg_el1(SYS_PIRE0);
505 		}
506 		if (system_supports_poe()) {
507 			config->por_el1	= read_sysreg_el1(SYS_POR);
508 			config->por_el0	= read_sysreg_s(SYS_POR_EL0);
509 		}
510 	}
511 	config->sctlr	= read_sysreg_el1(SYS_SCTLR);
512 	config->vttbr	= read_sysreg(vttbr_el2);
513 	config->vtcr	= read_sysreg(vtcr_el2);
514 	config->hcr	= read_sysreg(hcr_el2);
515 }
516 
517 static void __mmu_config_restore(struct mmu_config *config)
518 {
519 	write_sysreg(config->hcr,	hcr_el2);
520 
521 	/*
522 	 * ARM errata 1165522 and 1530923 require TGE to be 1 before
523 	 * we update the guest state.
524 	 */
525 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
526 
527 	write_sysreg_el1(config->ttbr0,	SYS_TTBR0);
528 	write_sysreg_el1(config->ttbr1,	SYS_TTBR1);
529 	write_sysreg_el1(config->tcr,	SYS_TCR);
530 	write_sysreg_el1(config->mair,	SYS_MAIR);
531 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
532 		write_sysreg_el1(config->tcr2, SYS_TCR2);
533 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
534 			write_sysreg_el1(config->pir, SYS_PIR);
535 			write_sysreg_el1(config->pire0, SYS_PIRE0);
536 		}
537 		if (system_supports_poe()) {
538 			write_sysreg_el1(config->por_el1, SYS_POR);
539 			write_sysreg_s(config->por_el0, SYS_POR_EL0);
540 		}
541 	}
542 	write_sysreg_el1(config->sctlr,	SYS_SCTLR);
543 	write_sysreg(config->vttbr,	vttbr_el2);
544 	write_sysreg(config->vtcr,	vtcr_el2);
545 }
546 
547 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
548 {
549 	u64 host_pan;
550 	bool fail;
551 
552 	host_pan = read_sysreg_s(SYS_PSTATE_PAN);
553 	write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
554 
555 	switch (op) {
556 	case OP_AT_S1E1RP:
557 		fail = __kvm_at(OP_AT_S1E1RP, vaddr);
558 		break;
559 	case OP_AT_S1E1WP:
560 		fail = __kvm_at(OP_AT_S1E1WP, vaddr);
561 		break;
562 	}
563 
564 	write_sysreg_s(host_pan, SYS_PSTATE_PAN);
565 
566 	return fail;
567 }
568 
569 #define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
570 #define MEMATTR_NC		0b0100
571 #define MEMATTR_Wt		0b1000
572 #define MEMATTR_Wb		0b1100
573 #define MEMATTR_WbRaWa		0b1111
574 
575 #define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
576 
577 static u8 s2_memattr_to_attr(u8 memattr)
578 {
579 	memattr &= 0b1111;
580 
581 	switch (memattr) {
582 	case 0b0000:
583 	case 0b0001:
584 	case 0b0010:
585 	case 0b0011:
586 		return memattr << 2;
587 	case 0b0100:
588 		return MEMATTR(Wb, Wb);
589 	case 0b0101:
590 		return MEMATTR(NC, NC);
591 	case 0b0110:
592 		return MEMATTR(Wt, NC);
593 	case 0b0111:
594 		return MEMATTR(Wb, NC);
595 	case 0b1000:
596 		/* Reserved, assume NC */
597 		return MEMATTR(NC, NC);
598 	case 0b1001:
599 		return MEMATTR(NC, Wt);
600 	case 0b1010:
601 		return MEMATTR(Wt, Wt);
602 	case 0b1011:
603 		return MEMATTR(Wb, Wt);
604 	case 0b1100:
605 		/* Reserved, assume NC */
606 		return MEMATTR(NC, NC);
607 	case 0b1101:
608 		return MEMATTR(NC, Wb);
609 	case 0b1110:
610 		return MEMATTR(Wt, Wb);
611 	case 0b1111:
612 		return MEMATTR(Wb, Wb);
613 	default:
614 		unreachable();
615 	}
616 }
617 
618 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
619 {
620 	bool transient;
621 	u8 final = 0;
622 
623 	/* Upgrade transient s1 to non-transient to simplify things */
624 	switch (s1) {
625 	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
626 		transient = true;
627 		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
628 		break;
629 	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
630 		transient = true;
631 		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
632 		break;
633 	default:
634 		transient = false;
635 	}
636 
637 	/* S2CombineS1AttrHints() */
638 	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
639 	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
640 		final = MEMATTR_NC;
641 	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
642 		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
643 		final = MEMATTR_Wt;
644 	else
645 		final = MEMATTR_Wb;
646 
647 	if (final != MEMATTR_NC) {
648 		/* Inherit RaWa hints form S1 */
649 		if (transient) {
650 			switch (s1 & GENMASK(3, 2)) {
651 			case MEMATTR_Wt:
652 				final = 0;
653 				break;
654 			case MEMATTR_Wb:
655 				final = MEMATTR_NC;
656 				break;
657 			}
658 		}
659 
660 		final |= s1 & GENMASK(1, 0);
661 	}
662 
663 	return final;
664 }
665 
666 #define ATTR_NSH	0b00
667 #define ATTR_RSV	0b01
668 #define ATTR_OSH	0b10
669 #define ATTR_ISH	0b11
670 
671 static u8 compute_sh(u8 attr, u64 desc)
672 {
673 	u8 sh;
674 
675 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
676 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
677 		return ATTR_OSH;
678 
679 	sh = FIELD_GET(PTE_SHARED, desc);
680 	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
681 		sh = ATTR_NSH;
682 
683 	return sh;
684 }
685 
686 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
687 {
688 	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
689 		return ATTR_OSH;
690 	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
691 		return ATTR_ISH;
692 
693 	return ATTR_NSH;
694 }
695 
696 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
697 			   struct kvm_s2_trans *tr)
698 {
699 	u8 s1_parattr, s2_memattr, final_attr;
700 	u64 par;
701 
702 	/* If S2 has failed to translate, report the damage */
703 	if (tr->esr) {
704 		par = SYS_PAR_EL1_RES1;
705 		par |= SYS_PAR_EL1_F;
706 		par |= SYS_PAR_EL1_S;
707 		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
708 		return par;
709 	}
710 
711 	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
712 	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
713 
714 	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
715 		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
716 			s2_memattr &= ~BIT(3);
717 
718 		/* Combination of R_VRJSW and R_RHWZM */
719 		switch (s2_memattr) {
720 		case 0b0101:
721 			if (MEMATTR_IS_DEVICE(s1_parattr))
722 				final_attr = s1_parattr;
723 			else
724 				final_attr = MEMATTR(NC, NC);
725 			break;
726 		case 0b0110:
727 		case 0b1110:
728 			final_attr = MEMATTR(WbRaWa, WbRaWa);
729 			break;
730 		case 0b0111:
731 		case 0b1111:
732 			/* Preserve S1 attribute */
733 			final_attr = s1_parattr;
734 			break;
735 		case 0b0100:
736 		case 0b1100:
737 		case 0b1101:
738 			/* Reserved, do something non-silly */
739 			final_attr = s1_parattr;
740 			break;
741 		default:
742 			/*
743 			 * MemAttr[2]=0, Device from S2.
744 			 *
745 			 * FWB does not influence the way that stage 1
746 			 * memory types and attributes are combined
747 			 * with stage 2 Device type and attributes.
748 			 */
749 			final_attr = min(s2_memattr_to_attr(s2_memattr),
750 					 s1_parattr);
751 		}
752 	} else {
753 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
754 		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
755 
756 		if (MEMATTR_IS_DEVICE(s1_parattr) ||
757 		    MEMATTR_IS_DEVICE(s2_parattr)) {
758 			final_attr = min(s1_parattr, s2_parattr);
759 		} else {
760 			/* At this stage, this is memory vs memory */
761 			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
762 							 s2_parattr & 0xf);
763 			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
764 							 s2_parattr >> 4) << 4;
765 		}
766 	}
767 
768 	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
769 	    !MEMATTR_IS_DEVICE(final_attr))
770 		final_attr = MEMATTR(NC, NC);
771 
772 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
773 	par |= tr->output & GENMASK(47, 12);
774 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
775 			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
776 				     compute_sh(final_attr, tr->desc)));
777 
778 	return par;
779 }
780 
781 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
782 			  enum trans_regime regime)
783 {
784 	u64 par;
785 
786 	if (wr->failed) {
787 		par = SYS_PAR_EL1_RES1;
788 		par |= SYS_PAR_EL1_F;
789 		par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
790 		par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
791 		par |= wr->s2 ? SYS_PAR_EL1_S : 0;
792 	} else if (wr->level == S1_MMU_DISABLED) {
793 		/* MMU off or HCR_EL2.DC == 1 */
794 		par  = SYS_PAR_EL1_NSE;
795 		par |= wr->pa & GENMASK_ULL(47, 12);
796 
797 		if (regime == TR_EL10 &&
798 		    (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
799 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
800 					  MEMATTR(WbRaWa, WbRaWa));
801 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
802 		} else {
803 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
804 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
805 		}
806 	} else {
807 		u64 mair, sctlr;
808 		u8 sh;
809 
810 		par  = SYS_PAR_EL1_NSE;
811 
812 		mair = (regime == TR_EL10 ?
813 			vcpu_read_sys_reg(vcpu, MAIR_EL1) :
814 			vcpu_read_sys_reg(vcpu, MAIR_EL2));
815 
816 		mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
817 		mair &= 0xff;
818 
819 		sctlr = (regime == TR_EL10 ?
820 			 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
821 			 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
822 
823 		/* Force NC for memory if SCTLR_ELx.C is clear */
824 		if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
825 			mair = MEMATTR(NC, NC);
826 
827 		par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
828 		par |= wr->pa & GENMASK_ULL(47, 12);
829 
830 		sh = compute_sh(mair, wr->desc);
831 		par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
832 	}
833 
834 	return par;
835 }
836 
837 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
838 {
839 	u64 sctlr;
840 
841 	if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
842 		return false;
843 
844 	if (s1pie_enabled(vcpu, regime))
845 		return true;
846 
847 	if (regime == TR_EL10)
848 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
849 	else
850 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
851 
852 	return sctlr & SCTLR_EL1_EPAN;
853 }
854 
855 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
856 					  struct s1_walk_info *wi,
857 					  struct s1_walk_result *wr)
858 {
859 	bool wxn;
860 
861 	/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
862 	if (wi->regime != TR_EL2) {
863 		switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
864 		case 0b00:
865 			wr->pr = wr->pw = true;
866 			wr->ur = wr->uw = false;
867 			break;
868 		case 0b01:
869 			wr->pr = wr->pw = wr->ur = wr->uw = true;
870 			break;
871 		case 0b10:
872 			wr->pr = true;
873 			wr->pw = wr->ur = wr->uw = false;
874 			break;
875 		case 0b11:
876 			wr->pr = wr->ur = true;
877 			wr->pw = wr->uw = false;
878 			break;
879 		}
880 
881 		/* We don't use px for anything yet, but hey... */
882 		wr->px = !((wr->desc & PTE_PXN) || wr->uw);
883 		wr->ux = !(wr->desc & PTE_UXN);
884 	} else {
885 		wr->ur = wr->uw = wr->ux = false;
886 
887 		if (!(wr->desc & PTE_RDONLY)) {
888 			wr->pr = wr->pw = true;
889 		} else {
890 			wr->pr = true;
891 			wr->pw = false;
892 		}
893 
894 		/* XN maps to UXN */
895 		wr->px = !(wr->desc & PTE_UXN);
896 	}
897 
898 	switch (wi->regime) {
899 	case TR_EL2:
900 	case TR_EL20:
901 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
902 		break;
903 	case TR_EL10:
904 		wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
905 		break;
906 	}
907 
908 	wr->pwxn = wr->uwxn = wxn;
909 	wr->pov = wi->poe;
910 	wr->uov = wi->e0poe;
911 }
912 
913 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
914 						struct s1_walk_info *wi,
915 						struct s1_walk_result *wr)
916 {
917 	/* Hierarchical part of AArch64.S1DirectBasePermissions() */
918 	if (wi->regime != TR_EL2) {
919 		switch (wr->APTable) {
920 		case 0b00:
921 			break;
922 		case 0b01:
923 			wr->ur = wr->uw = false;
924 			break;
925 		case 0b10:
926 			wr->pw = wr->uw = false;
927 			break;
928 		case 0b11:
929 			wr->pw = wr->ur = wr->uw = false;
930 			break;
931 		}
932 
933 		wr->px &= !wr->PXNTable;
934 		wr->ux &= !wr->UXNTable;
935 	} else {
936 		if (wr->APTable & BIT(1))
937 			wr->pw = false;
938 
939 		/* XN maps to UXN */
940 		wr->px &= !wr->UXNTable;
941 	}
942 }
943 
944 #define perm_idx(v, r, i)	((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
945 
946 #define set_priv_perms(wr, r, w, x)	\
947 	do {				\
948 		(wr)->pr = (r);		\
949 		(wr)->pw = (w);		\
950 		(wr)->px = (x);		\
951 	} while (0)
952 
953 #define set_unpriv_perms(wr, r, w, x)	\
954 	do {				\
955 		(wr)->ur = (r);		\
956 		(wr)->uw = (w);		\
957 		(wr)->ux = (x);		\
958 	} while (0)
959 
960 #define set_priv_wxn(wr, v)		\
961 	do {				\
962 		(wr)->pwxn = (v);	\
963 	} while (0)
964 
965 #define set_unpriv_wxn(wr, v)		\
966 	do {				\
967 		(wr)->uwxn = (v);	\
968 	} while (0)
969 
970 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS  */
971 #define set_perms(w, wr, ip)						\
972 	do {								\
973 		/* R_LLZDZ */						\
974 		switch ((ip)) {						\
975 		case 0b0000:						\
976 			set_ ## w ## _perms((wr), false, false, false);	\
977 			break;						\
978 		case 0b0001:						\
979 			set_ ## w ## _perms((wr), true , false, false);	\
980 			break;						\
981 		case 0b0010:						\
982 			set_ ## w ## _perms((wr), false, false, true );	\
983 			break;						\
984 		case 0b0011:						\
985 			set_ ## w ## _perms((wr), true , false, true );	\
986 			break;						\
987 		case 0b0100:						\
988 			set_ ## w ## _perms((wr), false, false, false);	\
989 			break;						\
990 		case 0b0101:						\
991 			set_ ## w ## _perms((wr), true , true , false);	\
992 			break;						\
993 		case 0b0110:						\
994 			set_ ## w ## _perms((wr), true , true , true );	\
995 			break;						\
996 		case 0b0111:						\
997 			set_ ## w ## _perms((wr), true , true , true );	\
998 			break;						\
999 		case 0b1000:						\
1000 			set_ ## w ## _perms((wr), true , false, false);	\
1001 			break;						\
1002 		case 0b1001:						\
1003 			set_ ## w ## _perms((wr), true , false, false);	\
1004 			break;						\
1005 		case 0b1010:						\
1006 			set_ ## w ## _perms((wr), true , false, true );	\
1007 			break;						\
1008 		case 0b1011:						\
1009 			set_ ## w ## _perms((wr), false, false, false);	\
1010 			break;						\
1011 		case 0b1100:						\
1012 			set_ ## w ## _perms((wr), true , true , false);	\
1013 			break;						\
1014 		case 0b1101:						\
1015 			set_ ## w ## _perms((wr), false, false, false);	\
1016 			break;						\
1017 		case 0b1110:						\
1018 			set_ ## w ## _perms((wr), true , true , true );	\
1019 			break;						\
1020 		case 0b1111:						\
1021 			set_ ## w ## _perms((wr), false, false, false);	\
1022 			break;						\
1023 		}							\
1024 									\
1025 		/* R_HJYGR */						\
1026 		set_ ## w ## _wxn((wr), ((ip) == 0b0110));		\
1027 									\
1028 	} while (0)
1029 
1030 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1031 					    struct s1_walk_info *wi,
1032 					    struct s1_walk_result *wr)
1033 {
1034 	u8 up, pp, idx;
1035 
1036 	idx = pte_pi_index(wr->desc);
1037 
1038 	switch (wi->regime) {
1039 	case TR_EL10:
1040 		pp = perm_idx(vcpu, PIR_EL1, idx);
1041 		up = perm_idx(vcpu, PIRE0_EL1, idx);
1042 		break;
1043 	case TR_EL20:
1044 		pp = perm_idx(vcpu, PIR_EL2, idx);
1045 		up = perm_idx(vcpu, PIRE0_EL2, idx);
1046 		break;
1047 	case TR_EL2:
1048 		pp = perm_idx(vcpu, PIR_EL2, idx);
1049 		up = 0;
1050 		break;
1051 	}
1052 
1053 	set_perms(priv, wr, pp);
1054 
1055 	if (wi->regime != TR_EL2)
1056 		set_perms(unpriv, wr, up);
1057 	else
1058 		set_unpriv_perms(wr, false, false, false);
1059 
1060 	wr->pov = wi->poe && !(pp & BIT(3));
1061 	wr->uov = wi->e0poe && !(up & BIT(3));
1062 
1063 	/* R_VFPJF */
1064 	if (wr->px && wr->uw) {
1065 		set_priv_perms(wr, false, false, false);
1066 		set_unpriv_perms(wr, false, false, false);
1067 	}
1068 }
1069 
1070 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1071 					   struct s1_walk_info *wi,
1072 					   struct s1_walk_result *wr)
1073 {
1074 	u8 idx, pov_perms, uov_perms;
1075 
1076 	idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1077 
1078 	switch (wi->regime) {
1079 	case TR_EL10:
1080 		pov_perms = perm_idx(vcpu, POR_EL1, idx);
1081 		uov_perms = perm_idx(vcpu, POR_EL0, idx);
1082 		break;
1083 	case TR_EL20:
1084 		pov_perms = perm_idx(vcpu, POR_EL2, idx);
1085 		uov_perms = perm_idx(vcpu, POR_EL0, idx);
1086 		break;
1087 	case TR_EL2:
1088 		pov_perms = perm_idx(vcpu, POR_EL2, idx);
1089 		uov_perms = 0;
1090 		break;
1091 	}
1092 
1093 	if (pov_perms & ~POE_RXW)
1094 		pov_perms = POE_NONE;
1095 
1096 	if (wi->poe && wr->pov) {
1097 		wr->pr &= pov_perms & POE_R;
1098 		wr->px &= pov_perms & POE_X;
1099 		wr->pw &= pov_perms & POE_W;
1100 	}
1101 
1102 	if (uov_perms & ~POE_RXW)
1103 		uov_perms = POE_NONE;
1104 
1105 	if (wi->e0poe && wr->uov) {
1106 		wr->ur &= uov_perms & POE_R;
1107 		wr->ux &= uov_perms & POE_X;
1108 		wr->uw &= uov_perms & POE_W;
1109 	}
1110 }
1111 
1112 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1113 				   struct s1_walk_info *wi,
1114 				   struct s1_walk_result *wr)
1115 {
1116 	bool pan;
1117 
1118 	if (!s1pie_enabled(vcpu, wi->regime))
1119 		compute_s1_direct_permissions(vcpu, wi, wr);
1120 	else
1121 		compute_s1_indirect_permissions(vcpu, wi, wr);
1122 
1123 	if (!wi->hpd)
1124 		compute_s1_hierarchical_permissions(vcpu, wi, wr);
1125 
1126 	if (wi->poe || wi->e0poe)
1127 		compute_s1_overlay_permissions(vcpu, wi, wr);
1128 
1129 	/* R_QXXPC */
1130 	if (wr->pwxn) {
1131 		if (!wr->pov && wr->pw)
1132 			wr->px = false;
1133 		if (wr->pov && wr->px)
1134 			wr->pw = false;
1135 	}
1136 
1137 	/* R_NPBXC */
1138 	if (wr->uwxn) {
1139 		if (!wr->uov && wr->uw)
1140 			wr->ux = false;
1141 		if (wr->uov && wr->ux)
1142 			wr->uw = false;
1143 	}
1144 
1145 	pan = wi->pan && (wr->ur || wr->uw ||
1146 			  (pan3_enabled(vcpu, wi->regime) && wr->ux));
1147 	wr->pw &= !pan;
1148 	wr->pr &= !pan;
1149 }
1150 
1151 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1152 {
1153 	struct s1_walk_result wr = {};
1154 	struct s1_walk_info wi = {};
1155 	bool perm_fail = false;
1156 	int ret, idx;
1157 
1158 	ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
1159 	if (ret)
1160 		goto compute_par;
1161 
1162 	if (wr.level == S1_MMU_DISABLED)
1163 		goto compute_par;
1164 
1165 	idx = srcu_read_lock(&vcpu->kvm->srcu);
1166 
1167 	ret = walk_s1(vcpu, &wi, &wr, vaddr);
1168 
1169 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
1170 
1171 	if (ret)
1172 		goto compute_par;
1173 
1174 	compute_s1_permissions(vcpu, &wi, &wr);
1175 
1176 	switch (op) {
1177 	case OP_AT_S1E1RP:
1178 	case OP_AT_S1E1R:
1179 	case OP_AT_S1E2R:
1180 		perm_fail = !wr.pr;
1181 		break;
1182 	case OP_AT_S1E1WP:
1183 	case OP_AT_S1E1W:
1184 	case OP_AT_S1E2W:
1185 		perm_fail = !wr.pw;
1186 		break;
1187 	case OP_AT_S1E0R:
1188 		perm_fail = !wr.ur;
1189 		break;
1190 	case OP_AT_S1E0W:
1191 		perm_fail = !wr.uw;
1192 		break;
1193 	case OP_AT_S1E1A:
1194 	case OP_AT_S1E2A:
1195 		break;
1196 	default:
1197 		BUG();
1198 	}
1199 
1200 	if (perm_fail)
1201 		fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
1202 
1203 compute_par:
1204 	return compute_par_s1(vcpu, &wr, wi.regime);
1205 }
1206 
1207 /*
1208  * Return the PAR_EL1 value as the result of a valid translation.
1209  *
1210  * If the translation is unsuccessful, the value may only contain
1211  * PAR_EL1.F, and cannot be taken at face value. It isn't an
1212  * indication of the translation having failed, only that the fast
1213  * path did not succeed, *unless* it indicates a S1 permission fault.
1214  */
1215 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1216 {
1217 	struct mmu_config config;
1218 	struct kvm_s2_mmu *mmu;
1219 	bool fail;
1220 	u64 par;
1221 
1222 	par = SYS_PAR_EL1_F;
1223 
1224 	/*
1225 	 * We've trapped, so everything is live on the CPU. As we will
1226 	 * be switching contexts behind everybody's back, disable
1227 	 * interrupts while holding the mmu lock.
1228 	 */
1229 	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1230 
1231 	/*
1232 	 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1233 	 * the right one (as we trapped from vEL2). If not, save the
1234 	 * full MMU context.
1235 	 */
1236 	if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
1237 		goto skip_mmu_switch;
1238 
1239 	/*
1240 	 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1241 	 * find it (recycled by another vcpu, for example). When this
1242 	 * happens, admit defeat immediately and use the SW (slow) path.
1243 	 */
1244 	mmu = lookup_s2_mmu(vcpu);
1245 	if (!mmu)
1246 		return par;
1247 
1248 	__mmu_config_save(&config);
1249 
1250 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),	SYS_TTBR0);
1251 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),	SYS_TTBR1);
1252 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),	SYS_TCR);
1253 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),	SYS_MAIR);
1254 	if (kvm_has_tcr2(vcpu->kvm)) {
1255 		write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1256 		if (kvm_has_s1pie(vcpu->kvm)) {
1257 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1258 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1259 		}
1260 		if (kvm_has_s1poe(vcpu->kvm)) {
1261 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1262 			write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1263 		}
1264 	}
1265 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),	SYS_SCTLR);
1266 	__load_stage2(mmu, mmu->arch);
1267 
1268 skip_mmu_switch:
1269 	/* Clear TGE, enable S2 translation, we're rolling */
1270 	write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM,	hcr_el2);
1271 	isb();
1272 
1273 	switch (op) {
1274 	case OP_AT_S1E1RP:
1275 	case OP_AT_S1E1WP:
1276 		fail = at_s1e1p_fast(vcpu, op, vaddr);
1277 		break;
1278 	case OP_AT_S1E1R:
1279 		fail = __kvm_at(OP_AT_S1E1R, vaddr);
1280 		break;
1281 	case OP_AT_S1E1W:
1282 		fail = __kvm_at(OP_AT_S1E1W, vaddr);
1283 		break;
1284 	case OP_AT_S1E0R:
1285 		fail = __kvm_at(OP_AT_S1E0R, vaddr);
1286 		break;
1287 	case OP_AT_S1E0W:
1288 		fail = __kvm_at(OP_AT_S1E0W, vaddr);
1289 		break;
1290 	case OP_AT_S1E1A:
1291 		fail = __kvm_at(OP_AT_S1E1A, vaddr);
1292 		break;
1293 	default:
1294 		WARN_ON_ONCE(1);
1295 		fail = true;
1296 		break;
1297 	}
1298 
1299 	if (!fail)
1300 		par = read_sysreg_par();
1301 
1302 	if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
1303 		__mmu_config_restore(&config);
1304 
1305 	return par;
1306 }
1307 
1308 static bool par_check_s1_perm_fault(u64 par)
1309 {
1310 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1311 
1312 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1313 		 !(par & SYS_PAR_EL1_S));
1314 }
1315 
1316 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1317 {
1318 	u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1319 
1320 	/*
1321 	 * If PAR_EL1 reports that AT failed on a S1 permission fault, we
1322 	 * know for sure that the PTW was able to walk the S1 tables and
1323 	 * there's nothing else to do.
1324 	 *
1325 	 * If AT failed for any other reason, then we must walk the guest S1
1326 	 * to emulate the instruction.
1327 	 */
1328 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1329 		par = handle_at_slow(vcpu, op, vaddr);
1330 
1331 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1332 }
1333 
1334 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1335 {
1336 	u64 par;
1337 
1338 	/*
1339 	 * We've trapped, so everything is live on the CPU. As we will be
1340 	 * switching context behind everybody's back, disable interrupts...
1341 	 */
1342 	scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1343 		u64 val, hcr;
1344 		bool fail;
1345 
1346 		val = hcr = read_sysreg(hcr_el2);
1347 		val &= ~HCR_TGE;
1348 		val |= HCR_VM;
1349 
1350 		if (!vcpu_el2_e2h_is_set(vcpu))
1351 			val |= HCR_NV | HCR_NV1;
1352 
1353 		write_sysreg(val, hcr_el2);
1354 		isb();
1355 
1356 		par = SYS_PAR_EL1_F;
1357 
1358 		switch (op) {
1359 		case OP_AT_S1E2R:
1360 			fail = __kvm_at(OP_AT_S1E1R, vaddr);
1361 			break;
1362 		case OP_AT_S1E2W:
1363 			fail = __kvm_at(OP_AT_S1E1W, vaddr);
1364 			break;
1365 		case OP_AT_S1E2A:
1366 			fail = __kvm_at(OP_AT_S1E1A, vaddr);
1367 			break;
1368 		default:
1369 			WARN_ON_ONCE(1);
1370 			fail = true;
1371 		}
1372 
1373 		isb();
1374 
1375 		if (!fail)
1376 			par = read_sysreg_par();
1377 
1378 		write_sysreg(hcr, hcr_el2);
1379 		isb();
1380 	}
1381 
1382 	/* We failed the translation, let's replay it in slow motion */
1383 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1384 		par = handle_at_slow(vcpu, op, vaddr);
1385 
1386 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1387 }
1388 
1389 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1390 {
1391 	struct kvm_s2_trans out = {};
1392 	u64 ipa, par;
1393 	bool write;
1394 	int ret;
1395 
1396 	/* Do the stage-1 translation */
1397 	switch (op) {
1398 	case OP_AT_S12E1R:
1399 		op = OP_AT_S1E1R;
1400 		write = false;
1401 		break;
1402 	case OP_AT_S12E1W:
1403 		op = OP_AT_S1E1W;
1404 		write = true;
1405 		break;
1406 	case OP_AT_S12E0R:
1407 		op = OP_AT_S1E0R;
1408 		write = false;
1409 		break;
1410 	case OP_AT_S12E0W:
1411 		op = OP_AT_S1E0W;
1412 		write = true;
1413 		break;
1414 	default:
1415 		WARN_ON_ONCE(1);
1416 		return;
1417 	}
1418 
1419 	__kvm_at_s1e01(vcpu, op, vaddr);
1420 	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1421 	if (par & SYS_PAR_EL1_F)
1422 		return;
1423 
1424 	/*
1425 	 * If we only have a single stage of translation (E2H=0 or
1426 	 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1427 	 */
1428 	if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
1429 	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1430 		return;
1431 
1432 	/* Do the stage-2 translation */
1433 	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1434 	out.esr = 0;
1435 	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1436 	if (ret < 0)
1437 		return;
1438 
1439 	/* Check the access permission */
1440 	if (!out.esr &&
1441 	    ((!write && !out.readable) || (write && !out.writable)))
1442 		out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1443 
1444 	par = compute_par_s12(vcpu, par, &out);
1445 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1446 }
1447