xref: /linux/arch/arm64/kvm/at.c (revision 7f71507851fc7764b36a3221839607d3a45c2025)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 - Linaro Ltd
4  * Author: Jintack Lim <jintack.lim@linaro.org>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 
13 enum trans_regime {
14 	TR_EL10,
15 	TR_EL20,
16 	TR_EL2,
17 };
18 
19 struct s1_walk_info {
20 	u64	     		baddr;
21 	enum trans_regime	regime;
22 	unsigned int		max_oa_bits;
23 	unsigned int		pgshift;
24 	unsigned int		txsz;
25 	int 	     		sl;
26 	bool	     		hpd;
27 	bool			e0poe;
28 	bool			poe;
29 	bool			pan;
30 	bool	     		be;
31 	bool	     		s2;
32 };
33 
34 struct s1_walk_result {
35 	union {
36 		struct {
37 			u64	desc;
38 			u64	pa;
39 			s8	level;
40 			u8	APTable;
41 			bool	UXNTable;
42 			bool	PXNTable;
43 			bool	uwxn;
44 			bool	uov;
45 			bool	ur;
46 			bool	uw;
47 			bool	ux;
48 			bool	pwxn;
49 			bool	pov;
50 			bool	pr;
51 			bool	pw;
52 			bool	px;
53 		};
54 		struct {
55 			u8	fst;
56 			bool	ptw;
57 			bool	s2;
58 		};
59 	};
60 	bool	failed;
61 };
62 
63 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
64 {
65 	wr->fst		= fst;
66 	wr->ptw		= ptw;
67 	wr->s2		= s2;
68 	wr->failed	= true;
69 }
70 
71 #define S1_MMU_DISABLED		(-127)
72 
73 static int get_ia_size(struct s1_walk_info *wi)
74 {
75 	return 64 - wi->txsz;
76 }
77 
78 /* Return true if the IPA is out of the OA range */
79 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
80 {
81 	return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
82 }
83 
84 /* Return the translation regime that applies to an AT instruction */
85 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
86 {
87 	/*
88 	 * We only get here from guest EL2, so the translation
89 	 * regime AT applies to is solely defined by {E2H,TGE}.
90 	 */
91 	switch (op) {
92 	case OP_AT_S1E2R:
93 	case OP_AT_S1E2W:
94 	case OP_AT_S1E2A:
95 		return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
96 		break;
97 	default:
98 		return (vcpu_el2_e2h_is_set(vcpu) &&
99 			vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
100 	}
101 }
102 
103 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
104 {
105 	if (!kvm_has_s1pie(vcpu->kvm))
106 		return false;
107 
108 	switch (regime) {
109 	case TR_EL2:
110 	case TR_EL20:
111 		return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
112 	case TR_EL10:
113 		return  (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
114 			(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
115 	default:
116 		BUG();
117 	}
118 }
119 
120 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
121 {
122 	u64 val;
123 
124 	if (!kvm_has_s1poe(vcpu->kvm)) {
125 		wi->poe = wi->e0poe = false;
126 		return;
127 	}
128 
129 	switch (wi->regime) {
130 	case TR_EL2:
131 	case TR_EL20:
132 		val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
133 		wi->poe = val & TCR2_EL2_POE;
134 		wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
135 		break;
136 	case TR_EL10:
137 		if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
138 			wi->poe = wi->e0poe = false;
139 			return;
140 		}
141 
142 		val = __vcpu_sys_reg(vcpu, TCR2_EL1);
143 		wi->poe = val & TCR2_EL1x_POE;
144 		wi->e0poe = val & TCR2_EL1x_E0POE;
145 	}
146 }
147 
148 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
149 			 struct s1_walk_result *wr, u64 va)
150 {
151 	u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
152 	unsigned int stride, x;
153 	bool va55, tbi, lva, as_el0;
154 
155 	hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
156 
157 	wi->regime = compute_translation_regime(vcpu, op);
158 	as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
159 	wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
160 		  (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
161 
162 	va55 = va & BIT(55);
163 
164 	if (wi->regime == TR_EL2 && va55)
165 		goto addrsz;
166 
167 	wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
168 
169 	switch (wi->regime) {
170 	case TR_EL10:
171 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL1);
172 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL1);
173 		ttbr	= (va55 ?
174 			   vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
175 			   vcpu_read_sys_reg(vcpu, TTBR0_EL1));
176 		break;
177 	case TR_EL2:
178 	case TR_EL20:
179 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL2);
180 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL2);
181 		ttbr	= (va55 ?
182 			   vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
183 			   vcpu_read_sys_reg(vcpu, TTBR0_EL2));
184 		break;
185 	default:
186 		BUG();
187 	}
188 
189 	tbi = (wi->regime == TR_EL2 ?
190 	       FIELD_GET(TCR_EL2_TBI, tcr) :
191 	       (va55 ?
192 		FIELD_GET(TCR_TBI1, tcr) :
193 		FIELD_GET(TCR_TBI0, tcr)));
194 
195 	if (!tbi && (u64)sign_extend64(va, 55) != va)
196 		goto addrsz;
197 
198 	va = (u64)sign_extend64(va, 55);
199 
200 	/* Let's put the MMU disabled case aside immediately */
201 	switch (wi->regime) {
202 	case TR_EL10:
203 		/*
204 		 * If dealing with the EL1&0 translation regime, 3 things
205 		 * can disable the S1 translation:
206 		 *
207 		 * - HCR_EL2.DC = 1
208 		 * - HCR_EL2.{E2H,TGE} = {0,1}
209 		 * - SCTLR_EL1.M = 0
210 		 *
211 		 * The TGE part is interesting. If we have decided that this
212 		 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
213 		 * {0,x}, and we only need to test for TGE == 1.
214 		 */
215 		if (hcr & (HCR_DC | HCR_TGE)) {
216 			wr->level = S1_MMU_DISABLED;
217 			break;
218 		}
219 		fallthrough;
220 	case TR_EL2:
221 	case TR_EL20:
222 		if (!(sctlr & SCTLR_ELx_M))
223 			wr->level = S1_MMU_DISABLED;
224 		break;
225 	}
226 
227 	if (wr->level == S1_MMU_DISABLED) {
228 		if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
229 			goto addrsz;
230 
231 		wr->pa = va;
232 		return 0;
233 	}
234 
235 	wi->be = sctlr & SCTLR_ELx_EE;
236 
237 	wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
238 	wi->hpd &= (wi->regime == TR_EL2 ?
239 		    FIELD_GET(TCR_EL2_HPD, tcr) :
240 		    (va55 ?
241 		     FIELD_GET(TCR_HPD1, tcr) :
242 		     FIELD_GET(TCR_HPD0, tcr)));
243 	/* R_JHSVW */
244 	wi->hpd |= s1pie_enabled(vcpu, wi->regime);
245 
246 	/* Do we have POE? */
247 	compute_s1poe(vcpu, wi);
248 
249 	/* R_BVXDG */
250 	wi->hpd |= (wi->poe || wi->e0poe);
251 
252 	/* Someone was silly enough to encode TG0/TG1 differently */
253 	if (va55) {
254 		wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
255 		tg = FIELD_GET(TCR_TG1_MASK, tcr);
256 
257 		switch (tg << TCR_TG1_SHIFT) {
258 		case TCR_TG1_4K:
259 			wi->pgshift = 12;	 break;
260 		case TCR_TG1_16K:
261 			wi->pgshift = 14;	 break;
262 		case TCR_TG1_64K:
263 		default:	    /* IMPDEF: treat any other value as 64k */
264 			wi->pgshift = 16;	 break;
265 		}
266 	} else {
267 		wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
268 		tg = FIELD_GET(TCR_TG0_MASK, tcr);
269 
270 		switch (tg << TCR_TG0_SHIFT) {
271 		case TCR_TG0_4K:
272 			wi->pgshift = 12;	 break;
273 		case TCR_TG0_16K:
274 			wi->pgshift = 14;	 break;
275 		case TCR_TG0_64K:
276 		default:	    /* IMPDEF: treat any other value as 64k */
277 			wi->pgshift = 16;	 break;
278 		}
279 	}
280 
281 	/* R_PLCGL, R_YXNYW */
282 	if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
283 		if (wi->txsz > 39)
284 			goto transfault_l0;
285 	} else {
286 		if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
287 			goto transfault_l0;
288 	}
289 
290 	/* R_GTJBY, R_SXWGM */
291 	switch (BIT(wi->pgshift)) {
292 	case SZ_4K:
293 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
294 		lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
295 		break;
296 	case SZ_16K:
297 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
298 		lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
299 		break;
300 	case SZ_64K:
301 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
302 		break;
303 	}
304 
305 	if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
306 		goto transfault_l0;
307 
308 	ia_bits = get_ia_size(wi);
309 
310 	/* R_YYVYV, I_THCZK */
311 	if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
312 	    (va55 && va < GENMASK(63, ia_bits)))
313 		goto transfault_l0;
314 
315 	/* I_ZFSYQ */
316 	if (wi->regime != TR_EL2 &&
317 	    (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
318 		goto transfault_l0;
319 
320 	/* R_BNDVG and following statements */
321 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
322 	    as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
323 		goto transfault_l0;
324 
325 	/* AArch64.S1StartLevel() */
326 	stride = wi->pgshift - 3;
327 	wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
328 
329 	ps = (wi->regime == TR_EL2 ?
330 	      FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
331 
332 	wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
333 
334 	/* Compute minimal alignment */
335 	x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
336 
337 	wi->baddr = ttbr & TTBRx_EL1_BADDR;
338 
339 	/* R_VPBBF */
340 	if (check_output_size(wi->baddr, wi))
341 		goto addrsz;
342 
343 	wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
344 
345 	return 0;
346 
347 addrsz:				/* Address Size Fault level 0 */
348 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
349 	return -EFAULT;
350 
351 transfault_l0:			/* Translation Fault level 0 */
352 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
353 	return -EFAULT;
354 }
355 
356 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
357 		   struct s1_walk_result *wr, u64 va)
358 {
359 	u64 va_top, va_bottom, baddr, desc;
360 	int level, stride, ret;
361 
362 	level = wi->sl;
363 	stride = wi->pgshift - 3;
364 	baddr = wi->baddr;
365 
366 	va_top = get_ia_size(wi) - 1;
367 
368 	while (1) {
369 		u64 index, ipa;
370 
371 		va_bottom = (3 - level) * stride + wi->pgshift;
372 		index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
373 
374 		ipa = baddr | index;
375 
376 		if (wi->s2) {
377 			struct kvm_s2_trans s2_trans = {};
378 
379 			ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
380 			if (ret) {
381 				fail_s1_walk(wr,
382 					     (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
383 					     true, true);
384 				return ret;
385 			}
386 
387 			if (!kvm_s2_trans_readable(&s2_trans)) {
388 				fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
389 					     true, true);
390 
391 				return -EPERM;
392 			}
393 
394 			ipa = kvm_s2_trans_output(&s2_trans);
395 		}
396 
397 		ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
398 		if (ret) {
399 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
400 				     true, false);
401 			return ret;
402 		}
403 
404 		if (wi->be)
405 			desc = be64_to_cpu((__force __be64)desc);
406 		else
407 			desc = le64_to_cpu((__force __le64)desc);
408 
409 		/* Invalid descriptor */
410 		if (!(desc & BIT(0)))
411 			goto transfault;
412 
413 		/* Block mapping, check validity down the line */
414 		if (!(desc & BIT(1)))
415 			break;
416 
417 		/* Page mapping */
418 		if (level == 3)
419 			break;
420 
421 		/* Table handling */
422 		if (!wi->hpd) {
423 			wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
424 			wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
425 			wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
426 		}
427 
428 		baddr = desc & GENMASK_ULL(47, wi->pgshift);
429 
430 		/* Check for out-of-range OA */
431 		if (check_output_size(baddr, wi))
432 			goto addrsz;
433 
434 		/* Prepare for next round */
435 		va_top = va_bottom - 1;
436 		level++;
437 	}
438 
439 	/* Block mapping, check the validity of the level */
440 	if (!(desc & BIT(1))) {
441 		bool valid_block = false;
442 
443 		switch (BIT(wi->pgshift)) {
444 		case SZ_4K:
445 			valid_block = level == 1 || level == 2;
446 			break;
447 		case SZ_16K:
448 		case SZ_64K:
449 			valid_block = level == 2;
450 			break;
451 		}
452 
453 		if (!valid_block)
454 			goto transfault;
455 	}
456 
457 	if (check_output_size(desc & GENMASK(47, va_bottom), wi))
458 		goto addrsz;
459 
460 	va_bottom += contiguous_bit_shift(desc, wi, level);
461 
462 	wr->failed = false;
463 	wr->level = level;
464 	wr->desc = desc;
465 	wr->pa = desc & GENMASK(47, va_bottom);
466 	wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
467 
468 	return 0;
469 
470 addrsz:
471 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
472 	return -EINVAL;
473 transfault:
474 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
475 	return -ENOENT;
476 }
477 
478 struct mmu_config {
479 	u64	ttbr0;
480 	u64	ttbr1;
481 	u64	tcr;
482 	u64	mair;
483 	u64	tcr2;
484 	u64	pir;
485 	u64	pire0;
486 	u64	por_el0;
487 	u64	por_el1;
488 	u64	sctlr;
489 	u64	vttbr;
490 	u64	vtcr;
491 	u64	hcr;
492 };
493 
494 static void __mmu_config_save(struct mmu_config *config)
495 {
496 	config->ttbr0	= read_sysreg_el1(SYS_TTBR0);
497 	config->ttbr1	= read_sysreg_el1(SYS_TTBR1);
498 	config->tcr	= read_sysreg_el1(SYS_TCR);
499 	config->mair	= read_sysreg_el1(SYS_MAIR);
500 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
501 		config->tcr2	= read_sysreg_el1(SYS_TCR2);
502 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
503 			config->pir	= read_sysreg_el1(SYS_PIR);
504 			config->pire0	= read_sysreg_el1(SYS_PIRE0);
505 		}
506 		if (system_supports_poe()) {
507 			config->por_el1	= read_sysreg_el1(SYS_POR);
508 			config->por_el0	= read_sysreg_s(SYS_POR_EL0);
509 		}
510 	}
511 	config->sctlr	= read_sysreg_el1(SYS_SCTLR);
512 	config->vttbr	= read_sysreg(vttbr_el2);
513 	config->vtcr	= read_sysreg(vtcr_el2);
514 	config->hcr	= read_sysreg(hcr_el2);
515 }
516 
517 static void __mmu_config_restore(struct mmu_config *config)
518 {
519 	write_sysreg(config->hcr,	hcr_el2);
520 
521 	/*
522 	 * ARM errata 1165522 and 1530923 require TGE to be 1 before
523 	 * we update the guest state.
524 	 */
525 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
526 
527 	write_sysreg_el1(config->ttbr0,	SYS_TTBR0);
528 	write_sysreg_el1(config->ttbr1,	SYS_TTBR1);
529 	write_sysreg_el1(config->tcr,	SYS_TCR);
530 	write_sysreg_el1(config->mair,	SYS_MAIR);
531 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
532 		write_sysreg_el1(config->tcr2, SYS_TCR2);
533 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
534 			write_sysreg_el1(config->pir, SYS_PIR);
535 			write_sysreg_el1(config->pire0, SYS_PIRE0);
536 		}
537 		if (system_supports_poe()) {
538 			write_sysreg_el1(config->por_el1, SYS_POR);
539 			write_sysreg_s(config->por_el0, SYS_POR_EL0);
540 		}
541 	}
542 	write_sysreg_el1(config->sctlr,	SYS_SCTLR);
543 	write_sysreg(config->vttbr,	vttbr_el2);
544 	write_sysreg(config->vtcr,	vtcr_el2);
545 }
546 
547 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
548 {
549 	u64 host_pan;
550 	bool fail;
551 
552 	host_pan = read_sysreg_s(SYS_PSTATE_PAN);
553 	write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
554 
555 	switch (op) {
556 	case OP_AT_S1E1RP:
557 		fail = __kvm_at(OP_AT_S1E1RP, vaddr);
558 		break;
559 	case OP_AT_S1E1WP:
560 		fail = __kvm_at(OP_AT_S1E1WP, vaddr);
561 		break;
562 	}
563 
564 	write_sysreg_s(host_pan, SYS_PSTATE_PAN);
565 
566 	return fail;
567 }
568 
569 #define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
570 #define MEMATTR_NC		0b0100
571 #define MEMATTR_Wt		0b1000
572 #define MEMATTR_Wb		0b1100
573 #define MEMATTR_WbRaWa		0b1111
574 
575 #define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
576 
577 static u8 s2_memattr_to_attr(u8 memattr)
578 {
579 	memattr &= 0b1111;
580 
581 	switch (memattr) {
582 	case 0b0000:
583 	case 0b0001:
584 	case 0b0010:
585 	case 0b0011:
586 		return memattr << 2;
587 	case 0b0100:
588 		return MEMATTR(Wb, Wb);
589 	case 0b0101:
590 		return MEMATTR(NC, NC);
591 	case 0b0110:
592 		return MEMATTR(Wt, NC);
593 	case 0b0111:
594 		return MEMATTR(Wb, NC);
595 	case 0b1000:
596 		/* Reserved, assume NC */
597 		return MEMATTR(NC, NC);
598 	case 0b1001:
599 		return MEMATTR(NC, Wt);
600 	case 0b1010:
601 		return MEMATTR(Wt, Wt);
602 	case 0b1011:
603 		return MEMATTR(Wb, Wt);
604 	case 0b1100:
605 		/* Reserved, assume NC */
606 		return MEMATTR(NC, NC);
607 	case 0b1101:
608 		return MEMATTR(NC, Wb);
609 	case 0b1110:
610 		return MEMATTR(Wt, Wb);
611 	case 0b1111:
612 		return MEMATTR(Wb, Wb);
613 	default:
614 		unreachable();
615 	}
616 }
617 
618 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
619 {
620 	bool transient;
621 	u8 final = 0;
622 
623 	/* Upgrade transient s1 to non-transient to simplify things */
624 	switch (s1) {
625 	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
626 		transient = true;
627 		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
628 		break;
629 	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
630 		transient = true;
631 		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
632 		break;
633 	default:
634 		transient = false;
635 	}
636 
637 	/* S2CombineS1AttrHints() */
638 	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
639 	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
640 		final = MEMATTR_NC;
641 	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
642 		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
643 		final = MEMATTR_Wt;
644 	else
645 		final = MEMATTR_Wb;
646 
647 	if (final != MEMATTR_NC) {
648 		/* Inherit RaWa hints form S1 */
649 		if (transient) {
650 			switch (s1 & GENMASK(3, 2)) {
651 			case MEMATTR_Wt:
652 				final = 0;
653 				break;
654 			case MEMATTR_Wb:
655 				final = MEMATTR_NC;
656 				break;
657 			}
658 		}
659 
660 		final |= s1 & GENMASK(1, 0);
661 	}
662 
663 	return final;
664 }
665 
666 #define ATTR_NSH	0b00
667 #define ATTR_RSV	0b01
668 #define ATTR_OSH	0b10
669 #define ATTR_ISH	0b11
670 
671 static u8 compute_sh(u8 attr, u64 desc)
672 {
673 	u8 sh;
674 
675 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
676 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
677 		return ATTR_OSH;
678 
679 	sh = FIELD_GET(PTE_SHARED, desc);
680 	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
681 		sh = ATTR_NSH;
682 
683 	return sh;
684 }
685 
686 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
687 {
688 	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
689 		return ATTR_OSH;
690 	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
691 		return ATTR_ISH;
692 
693 	return ATTR_NSH;
694 }
695 
696 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
697 			   struct kvm_s2_trans *tr)
698 {
699 	u8 s1_parattr, s2_memattr, final_attr;
700 	u64 par;
701 
702 	/* If S2 has failed to translate, report the damage */
703 	if (tr->esr) {
704 		par = SYS_PAR_EL1_RES1;
705 		par |= SYS_PAR_EL1_F;
706 		par |= SYS_PAR_EL1_S;
707 		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
708 		return par;
709 	}
710 
711 	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
712 	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
713 
714 	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
715 		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
716 			s2_memattr &= ~BIT(3);
717 
718 		/* Combination of R_VRJSW and R_RHWZM */
719 		switch (s2_memattr) {
720 		case 0b0101:
721 			if (MEMATTR_IS_DEVICE(s1_parattr))
722 				final_attr = s1_parattr;
723 			else
724 				final_attr = MEMATTR(NC, NC);
725 			break;
726 		case 0b0110:
727 		case 0b1110:
728 			final_attr = MEMATTR(WbRaWa, WbRaWa);
729 			break;
730 		case 0b0111:
731 		case 0b1111:
732 			/* Preserve S1 attribute */
733 			final_attr = s1_parattr;
734 			break;
735 		case 0b0100:
736 		case 0b1100:
737 		case 0b1101:
738 			/* Reserved, do something non-silly */
739 			final_attr = s1_parattr;
740 			break;
741 		default:
742 			/* MemAttr[2]=0, Device from S2 */
743 			final_attr = s2_memattr & GENMASK(1,0) << 2;
744 		}
745 	} else {
746 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
747 		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
748 
749 		if (MEMATTR_IS_DEVICE(s1_parattr) ||
750 		    MEMATTR_IS_DEVICE(s2_parattr)) {
751 			final_attr = min(s1_parattr, s2_parattr);
752 		} else {
753 			/* At this stage, this is memory vs memory */
754 			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
755 							 s2_parattr & 0xf);
756 			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
757 							 s2_parattr >> 4) << 4;
758 		}
759 	}
760 
761 	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
762 	    !MEMATTR_IS_DEVICE(final_attr))
763 		final_attr = MEMATTR(NC, NC);
764 
765 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
766 	par |= tr->output & GENMASK(47, 12);
767 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
768 			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
769 				     compute_sh(final_attr, tr->desc)));
770 
771 	return par;
772 }
773 
774 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
775 			  enum trans_regime regime)
776 {
777 	u64 par;
778 
779 	if (wr->failed) {
780 		par = SYS_PAR_EL1_RES1;
781 		par |= SYS_PAR_EL1_F;
782 		par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
783 		par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
784 		par |= wr->s2 ? SYS_PAR_EL1_S : 0;
785 	} else if (wr->level == S1_MMU_DISABLED) {
786 		/* MMU off or HCR_EL2.DC == 1 */
787 		par  = SYS_PAR_EL1_NSE;
788 		par |= wr->pa & GENMASK_ULL(47, 12);
789 
790 		if (regime == TR_EL10 &&
791 		    (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
792 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
793 					  MEMATTR(WbRaWa, WbRaWa));
794 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
795 		} else {
796 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
797 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
798 		}
799 	} else {
800 		u64 mair, sctlr;
801 		u8 sh;
802 
803 		par  = SYS_PAR_EL1_NSE;
804 
805 		mair = (regime == TR_EL10 ?
806 			vcpu_read_sys_reg(vcpu, MAIR_EL1) :
807 			vcpu_read_sys_reg(vcpu, MAIR_EL2));
808 
809 		mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
810 		mair &= 0xff;
811 
812 		sctlr = (regime == TR_EL10 ?
813 			 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
814 			 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
815 
816 		/* Force NC for memory if SCTLR_ELx.C is clear */
817 		if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
818 			mair = MEMATTR(NC, NC);
819 
820 		par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
821 		par |= wr->pa & GENMASK_ULL(47, 12);
822 
823 		sh = compute_sh(mair, wr->desc);
824 		par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
825 	}
826 
827 	return par;
828 }
829 
830 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
831 {
832 	u64 sctlr;
833 
834 	if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
835 		return false;
836 
837 	if (s1pie_enabled(vcpu, regime))
838 		return true;
839 
840 	if (regime == TR_EL10)
841 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
842 	else
843 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
844 
845 	return sctlr & SCTLR_EL1_EPAN;
846 }
847 
848 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
849 					  struct s1_walk_info *wi,
850 					  struct s1_walk_result *wr)
851 {
852 	bool wxn;
853 
854 	/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
855 	if (wi->regime != TR_EL2) {
856 		switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
857 		case 0b00:
858 			wr->pr = wr->pw = true;
859 			wr->ur = wr->uw = false;
860 			break;
861 		case 0b01:
862 			wr->pr = wr->pw = wr->ur = wr->uw = true;
863 			break;
864 		case 0b10:
865 			wr->pr = true;
866 			wr->pw = wr->ur = wr->uw = false;
867 			break;
868 		case 0b11:
869 			wr->pr = wr->ur = true;
870 			wr->pw = wr->uw = false;
871 			break;
872 		}
873 
874 		/* We don't use px for anything yet, but hey... */
875 		wr->px = !((wr->desc & PTE_PXN) || wr->uw);
876 		wr->ux = !(wr->desc & PTE_UXN);
877 	} else {
878 		wr->ur = wr->uw = wr->ux = false;
879 
880 		if (!(wr->desc & PTE_RDONLY)) {
881 			wr->pr = wr->pw = true;
882 		} else {
883 			wr->pr = true;
884 			wr->pw = false;
885 		}
886 
887 		/* XN maps to UXN */
888 		wr->px = !(wr->desc & PTE_UXN);
889 	}
890 
891 	switch (wi->regime) {
892 	case TR_EL2:
893 	case TR_EL20:
894 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
895 		break;
896 	case TR_EL10:
897 		wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
898 		break;
899 	}
900 
901 	wr->pwxn = wr->uwxn = wxn;
902 	wr->pov = wi->poe;
903 	wr->uov = wi->e0poe;
904 }
905 
906 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
907 						struct s1_walk_info *wi,
908 						struct s1_walk_result *wr)
909 {
910 	/* Hierarchical part of AArch64.S1DirectBasePermissions() */
911 	if (wi->regime != TR_EL2) {
912 		switch (wr->APTable) {
913 		case 0b00:
914 			break;
915 		case 0b01:
916 			wr->ur = wr->uw = false;
917 			break;
918 		case 0b10:
919 			wr->pw = wr->uw = false;
920 			break;
921 		case 0b11:
922 			wr->pw = wr->ur = wr->uw = false;
923 			break;
924 		}
925 
926 		wr->px &= !wr->PXNTable;
927 		wr->ux &= !wr->UXNTable;
928 	} else {
929 		if (wr->APTable & BIT(1))
930 			wr->pw = false;
931 
932 		/* XN maps to UXN */
933 		wr->px &= !wr->UXNTable;
934 	}
935 }
936 
937 #define perm_idx(v, r, i)	((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
938 
939 #define set_priv_perms(wr, r, w, x)	\
940 	do {				\
941 		(wr)->pr = (r);		\
942 		(wr)->pw = (w);		\
943 		(wr)->px = (x);		\
944 	} while (0)
945 
946 #define set_unpriv_perms(wr, r, w, x)	\
947 	do {				\
948 		(wr)->ur = (r);		\
949 		(wr)->uw = (w);		\
950 		(wr)->ux = (x);		\
951 	} while (0)
952 
953 #define set_priv_wxn(wr, v)		\
954 	do {				\
955 		(wr)->pwxn = (v);	\
956 	} while (0)
957 
958 #define set_unpriv_wxn(wr, v)		\
959 	do {				\
960 		(wr)->uwxn = (v);	\
961 	} while (0)
962 
963 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS  */
964 #define set_perms(w, wr, ip)						\
965 	do {								\
966 		/* R_LLZDZ */						\
967 		switch ((ip)) {						\
968 		case 0b0000:						\
969 			set_ ## w ## _perms((wr), false, false, false);	\
970 			break;						\
971 		case 0b0001:						\
972 			set_ ## w ## _perms((wr), true , false, false);	\
973 			break;						\
974 		case 0b0010:						\
975 			set_ ## w ## _perms((wr), false, false, true );	\
976 			break;						\
977 		case 0b0011:						\
978 			set_ ## w ## _perms((wr), true , false, true );	\
979 			break;						\
980 		case 0b0100:						\
981 			set_ ## w ## _perms((wr), false, false, false);	\
982 			break;						\
983 		case 0b0101:						\
984 			set_ ## w ## _perms((wr), true , true , false);	\
985 			break;						\
986 		case 0b0110:						\
987 			set_ ## w ## _perms((wr), true , true , true );	\
988 			break;						\
989 		case 0b0111:						\
990 			set_ ## w ## _perms((wr), true , true , true );	\
991 			break;						\
992 		case 0b1000:						\
993 			set_ ## w ## _perms((wr), true , false, false);	\
994 			break;						\
995 		case 0b1001:						\
996 			set_ ## w ## _perms((wr), true , false, false);	\
997 			break;						\
998 		case 0b1010:						\
999 			set_ ## w ## _perms((wr), true , false, true );	\
1000 			break;						\
1001 		case 0b1011:						\
1002 			set_ ## w ## _perms((wr), false, false, false);	\
1003 			break;						\
1004 		case 0b1100:						\
1005 			set_ ## w ## _perms((wr), true , true , false);	\
1006 			break;						\
1007 		case 0b1101:						\
1008 			set_ ## w ## _perms((wr), false, false, false);	\
1009 			break;						\
1010 		case 0b1110:						\
1011 			set_ ## w ## _perms((wr), true , true , true );	\
1012 			break;						\
1013 		case 0b1111:						\
1014 			set_ ## w ## _perms((wr), false, false, false);	\
1015 			break;						\
1016 		}							\
1017 									\
1018 		/* R_HJYGR */						\
1019 		set_ ## w ## _wxn((wr), ((ip) == 0b0110));		\
1020 									\
1021 	} while (0)
1022 
1023 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1024 					    struct s1_walk_info *wi,
1025 					    struct s1_walk_result *wr)
1026 {
1027 	u8 up, pp, idx;
1028 
1029 	idx = pte_pi_index(wr->desc);
1030 
1031 	switch (wi->regime) {
1032 	case TR_EL10:
1033 		pp = perm_idx(vcpu, PIR_EL1, idx);
1034 		up = perm_idx(vcpu, PIRE0_EL1, idx);
1035 		break;
1036 	case TR_EL20:
1037 		pp = perm_idx(vcpu, PIR_EL2, idx);
1038 		up = perm_idx(vcpu, PIRE0_EL2, idx);
1039 		break;
1040 	case TR_EL2:
1041 		pp = perm_idx(vcpu, PIR_EL2, idx);
1042 		up = 0;
1043 		break;
1044 	}
1045 
1046 	set_perms(priv, wr, pp);
1047 
1048 	if (wi->regime != TR_EL2)
1049 		set_perms(unpriv, wr, up);
1050 	else
1051 		set_unpriv_perms(wr, false, false, false);
1052 
1053 	wr->pov = wi->poe && !(pp & BIT(3));
1054 	wr->uov = wi->e0poe && !(up & BIT(3));
1055 
1056 	/* R_VFPJF */
1057 	if (wr->px && wr->uw) {
1058 		set_priv_perms(wr, false, false, false);
1059 		set_unpriv_perms(wr, false, false, false);
1060 	}
1061 }
1062 
1063 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1064 					   struct s1_walk_info *wi,
1065 					   struct s1_walk_result *wr)
1066 {
1067 	u8 idx, pov_perms, uov_perms;
1068 
1069 	idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1070 
1071 	switch (wi->regime) {
1072 	case TR_EL10:
1073 		pov_perms = perm_idx(vcpu, POR_EL1, idx);
1074 		uov_perms = perm_idx(vcpu, POR_EL0, idx);
1075 		break;
1076 	case TR_EL20:
1077 		pov_perms = perm_idx(vcpu, POR_EL2, idx);
1078 		uov_perms = perm_idx(vcpu, POR_EL0, idx);
1079 		break;
1080 	case TR_EL2:
1081 		pov_perms = perm_idx(vcpu, POR_EL2, idx);
1082 		uov_perms = 0;
1083 		break;
1084 	}
1085 
1086 	if (pov_perms & ~POE_RXW)
1087 		pov_perms = POE_NONE;
1088 
1089 	if (wi->poe && wr->pov) {
1090 		wr->pr &= pov_perms & POE_R;
1091 		wr->px &= pov_perms & POE_X;
1092 		wr->pw &= pov_perms & POE_W;
1093 	}
1094 
1095 	if (uov_perms & ~POE_RXW)
1096 		uov_perms = POE_NONE;
1097 
1098 	if (wi->e0poe && wr->uov) {
1099 		wr->ur &= uov_perms & POE_R;
1100 		wr->ux &= uov_perms & POE_X;
1101 		wr->uw &= uov_perms & POE_W;
1102 	}
1103 }
1104 
1105 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1106 				   struct s1_walk_info *wi,
1107 				   struct s1_walk_result *wr)
1108 {
1109 	bool pan;
1110 
1111 	if (!s1pie_enabled(vcpu, wi->regime))
1112 		compute_s1_direct_permissions(vcpu, wi, wr);
1113 	else
1114 		compute_s1_indirect_permissions(vcpu, wi, wr);
1115 
1116 	if (!wi->hpd)
1117 		compute_s1_hierarchical_permissions(vcpu, wi, wr);
1118 
1119 	if (wi->poe || wi->e0poe)
1120 		compute_s1_overlay_permissions(vcpu, wi, wr);
1121 
1122 	/* R_QXXPC */
1123 	if (wr->pwxn) {
1124 		if (!wr->pov && wr->pw)
1125 			wr->px = false;
1126 		if (wr->pov && wr->px)
1127 			wr->pw = false;
1128 	}
1129 
1130 	/* R_NPBXC */
1131 	if (wr->uwxn) {
1132 		if (!wr->uov && wr->uw)
1133 			wr->ux = false;
1134 		if (wr->uov && wr->ux)
1135 			wr->uw = false;
1136 	}
1137 
1138 	pan = wi->pan && (wr->ur || wr->uw ||
1139 			  (pan3_enabled(vcpu, wi->regime) && wr->ux));
1140 	wr->pw &= !pan;
1141 	wr->pr &= !pan;
1142 }
1143 
1144 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1145 {
1146 	struct s1_walk_result wr = {};
1147 	struct s1_walk_info wi = {};
1148 	bool perm_fail = false;
1149 	int ret, idx;
1150 
1151 	ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
1152 	if (ret)
1153 		goto compute_par;
1154 
1155 	if (wr.level == S1_MMU_DISABLED)
1156 		goto compute_par;
1157 
1158 	idx = srcu_read_lock(&vcpu->kvm->srcu);
1159 
1160 	ret = walk_s1(vcpu, &wi, &wr, vaddr);
1161 
1162 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
1163 
1164 	if (ret)
1165 		goto compute_par;
1166 
1167 	compute_s1_permissions(vcpu, &wi, &wr);
1168 
1169 	switch (op) {
1170 	case OP_AT_S1E1RP:
1171 	case OP_AT_S1E1R:
1172 	case OP_AT_S1E2R:
1173 		perm_fail = !wr.pr;
1174 		break;
1175 	case OP_AT_S1E1WP:
1176 	case OP_AT_S1E1W:
1177 	case OP_AT_S1E2W:
1178 		perm_fail = !wr.pw;
1179 		break;
1180 	case OP_AT_S1E0R:
1181 		perm_fail = !wr.ur;
1182 		break;
1183 	case OP_AT_S1E0W:
1184 		perm_fail = !wr.uw;
1185 		break;
1186 	case OP_AT_S1E1A:
1187 	case OP_AT_S1E2A:
1188 		break;
1189 	default:
1190 		BUG();
1191 	}
1192 
1193 	if (perm_fail)
1194 		fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
1195 
1196 compute_par:
1197 	return compute_par_s1(vcpu, &wr, wi.regime);
1198 }
1199 
1200 /*
1201  * Return the PAR_EL1 value as the result of a valid translation.
1202  *
1203  * If the translation is unsuccessful, the value may only contain
1204  * PAR_EL1.F, and cannot be taken at face value. It isn't an
1205  * indication of the translation having failed, only that the fast
1206  * path did not succeed, *unless* it indicates a S1 permission fault.
1207  */
1208 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1209 {
1210 	struct mmu_config config;
1211 	struct kvm_s2_mmu *mmu;
1212 	bool fail;
1213 	u64 par;
1214 
1215 	par = SYS_PAR_EL1_F;
1216 
1217 	/*
1218 	 * We've trapped, so everything is live on the CPU. As we will
1219 	 * be switching contexts behind everybody's back, disable
1220 	 * interrupts while holding the mmu lock.
1221 	 */
1222 	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1223 
1224 	/*
1225 	 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1226 	 * the right one (as we trapped from vEL2). If not, save the
1227 	 * full MMU context.
1228 	 */
1229 	if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
1230 		goto skip_mmu_switch;
1231 
1232 	/*
1233 	 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1234 	 * find it (recycled by another vcpu, for example). When this
1235 	 * happens, admit defeat immediately and use the SW (slow) path.
1236 	 */
1237 	mmu = lookup_s2_mmu(vcpu);
1238 	if (!mmu)
1239 		return par;
1240 
1241 	__mmu_config_save(&config);
1242 
1243 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),	SYS_TTBR0);
1244 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),	SYS_TTBR1);
1245 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),	SYS_TCR);
1246 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),	SYS_MAIR);
1247 	if (kvm_has_tcr2(vcpu->kvm)) {
1248 		write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1249 		if (kvm_has_s1pie(vcpu->kvm)) {
1250 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1251 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1252 		}
1253 		if (kvm_has_s1poe(vcpu->kvm)) {
1254 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1255 			write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1256 		}
1257 	}
1258 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),	SYS_SCTLR);
1259 	__load_stage2(mmu, mmu->arch);
1260 
1261 skip_mmu_switch:
1262 	/* Clear TGE, enable S2 translation, we're rolling */
1263 	write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM,	hcr_el2);
1264 	isb();
1265 
1266 	switch (op) {
1267 	case OP_AT_S1E1RP:
1268 	case OP_AT_S1E1WP:
1269 		fail = at_s1e1p_fast(vcpu, op, vaddr);
1270 		break;
1271 	case OP_AT_S1E1R:
1272 		fail = __kvm_at(OP_AT_S1E1R, vaddr);
1273 		break;
1274 	case OP_AT_S1E1W:
1275 		fail = __kvm_at(OP_AT_S1E1W, vaddr);
1276 		break;
1277 	case OP_AT_S1E0R:
1278 		fail = __kvm_at(OP_AT_S1E0R, vaddr);
1279 		break;
1280 	case OP_AT_S1E0W:
1281 		fail = __kvm_at(OP_AT_S1E0W, vaddr);
1282 		break;
1283 	case OP_AT_S1E1A:
1284 		fail = __kvm_at(OP_AT_S1E1A, vaddr);
1285 		break;
1286 	default:
1287 		WARN_ON_ONCE(1);
1288 		fail = true;
1289 		break;
1290 	}
1291 
1292 	if (!fail)
1293 		par = read_sysreg_par();
1294 
1295 	if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
1296 		__mmu_config_restore(&config);
1297 
1298 	return par;
1299 }
1300 
1301 static bool par_check_s1_perm_fault(u64 par)
1302 {
1303 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1304 
1305 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1306 		 !(par & SYS_PAR_EL1_S));
1307 }
1308 
1309 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1310 {
1311 	u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1312 
1313 	/*
1314 	 * If PAR_EL1 reports that AT failed on a S1 permission fault, we
1315 	 * know for sure that the PTW was able to walk the S1 tables and
1316 	 * there's nothing else to do.
1317 	 *
1318 	 * If AT failed for any other reason, then we must walk the guest S1
1319 	 * to emulate the instruction.
1320 	 */
1321 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1322 		par = handle_at_slow(vcpu, op, vaddr);
1323 
1324 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1325 }
1326 
1327 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1328 {
1329 	u64 par;
1330 
1331 	/*
1332 	 * We've trapped, so everything is live on the CPU. As we will be
1333 	 * switching context behind everybody's back, disable interrupts...
1334 	 */
1335 	scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1336 		u64 val, hcr;
1337 		bool fail;
1338 
1339 		val = hcr = read_sysreg(hcr_el2);
1340 		val &= ~HCR_TGE;
1341 		val |= HCR_VM;
1342 
1343 		if (!vcpu_el2_e2h_is_set(vcpu))
1344 			val |= HCR_NV | HCR_NV1;
1345 
1346 		write_sysreg(val, hcr_el2);
1347 		isb();
1348 
1349 		par = SYS_PAR_EL1_F;
1350 
1351 		switch (op) {
1352 		case OP_AT_S1E2R:
1353 			fail = __kvm_at(OP_AT_S1E1R, vaddr);
1354 			break;
1355 		case OP_AT_S1E2W:
1356 			fail = __kvm_at(OP_AT_S1E1W, vaddr);
1357 			break;
1358 		case OP_AT_S1E2A:
1359 			fail = __kvm_at(OP_AT_S1E1A, vaddr);
1360 			break;
1361 		default:
1362 			WARN_ON_ONCE(1);
1363 			fail = true;
1364 		}
1365 
1366 		isb();
1367 
1368 		if (!fail)
1369 			par = read_sysreg_par();
1370 
1371 		write_sysreg(hcr, hcr_el2);
1372 		isb();
1373 	}
1374 
1375 	/* We failed the translation, let's replay it in slow motion */
1376 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1377 		par = handle_at_slow(vcpu, op, vaddr);
1378 
1379 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1380 }
1381 
1382 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1383 {
1384 	struct kvm_s2_trans out = {};
1385 	u64 ipa, par;
1386 	bool write;
1387 	int ret;
1388 
1389 	/* Do the stage-1 translation */
1390 	switch (op) {
1391 	case OP_AT_S12E1R:
1392 		op = OP_AT_S1E1R;
1393 		write = false;
1394 		break;
1395 	case OP_AT_S12E1W:
1396 		op = OP_AT_S1E1W;
1397 		write = true;
1398 		break;
1399 	case OP_AT_S12E0R:
1400 		op = OP_AT_S1E0R;
1401 		write = false;
1402 		break;
1403 	case OP_AT_S12E0W:
1404 		op = OP_AT_S1E0W;
1405 		write = true;
1406 		break;
1407 	default:
1408 		WARN_ON_ONCE(1);
1409 		return;
1410 	}
1411 
1412 	__kvm_at_s1e01(vcpu, op, vaddr);
1413 	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1414 	if (par & SYS_PAR_EL1_F)
1415 		return;
1416 
1417 	/*
1418 	 * If we only have a single stage of translation (E2H=0 or
1419 	 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1420 	 */
1421 	if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
1422 	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1423 		return;
1424 
1425 	/* Do the stage-2 translation */
1426 	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1427 	out.esr = 0;
1428 	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1429 	if (ret < 0)
1430 		return;
1431 
1432 	/* Check the access permission */
1433 	if (!out.esr &&
1434 	    ((!write && !out.readable) || (write && !out.writable)))
1435 		out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1436 
1437 	par = compute_par_s12(vcpu, par, &out);
1438 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1439 }
1440