xref: /linux/arch/arm64/kvm/at.c (revision 43db1111073049220381944af4a3b8a5400eda71)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 - Linaro Ltd
4  * Author: Jintack Lim <jintack.lim@linaro.org>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 
fail_s1_walk(struct s1_walk_result * wr,u8 fst,bool s1ptw)13 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
14 {
15 	wr->fst		= fst;
16 	wr->ptw		= s1ptw;
17 	wr->s2		= s1ptw;
18 	wr->failed	= true;
19 }
20 
21 #define S1_MMU_DISABLED		(-127)
22 
get_ia_size(struct s1_walk_info * wi)23 static int get_ia_size(struct s1_walk_info *wi)
24 {
25 	return 64 - wi->txsz;
26 }
27 
28 /* Return true if the IPA is out of the OA range */
check_output_size(u64 ipa,struct s1_walk_info * wi)29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
30 {
31 	return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
32 }
33 
34 /* Return the translation regime that applies to an AT instruction */
compute_translation_regime(struct kvm_vcpu * vcpu,u32 op)35 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
36 {
37 	/*
38 	 * We only get here from guest EL2, so the translation
39 	 * regime AT applies to is solely defined by {E2H,TGE}.
40 	 */
41 	switch (op) {
42 	case OP_AT_S1E2R:
43 	case OP_AT_S1E2W:
44 	case OP_AT_S1E2A:
45 		return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
46 		break;
47 	default:
48 		return (vcpu_el2_e2h_is_set(vcpu) &&
49 			vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
50 	}
51 }
52 
s1pie_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)53 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
54 {
55 	if (!kvm_has_s1pie(vcpu->kvm))
56 		return false;
57 
58 	switch (regime) {
59 	case TR_EL2:
60 	case TR_EL20:
61 		return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
62 	case TR_EL10:
63 		return  (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
64 			(__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
65 	default:
66 		BUG();
67 	}
68 }
69 
compute_s1poe(struct kvm_vcpu * vcpu,struct s1_walk_info * wi)70 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
71 {
72 	u64 val;
73 
74 	if (!kvm_has_s1poe(vcpu->kvm)) {
75 		wi->poe = wi->e0poe = false;
76 		return;
77 	}
78 
79 	switch (wi->regime) {
80 	case TR_EL2:
81 	case TR_EL20:
82 		val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
83 		wi->poe = val & TCR2_EL2_POE;
84 		wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
85 		break;
86 	case TR_EL10:
87 		if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
88 			wi->poe = wi->e0poe = false;
89 			return;
90 		}
91 
92 		val = __vcpu_sys_reg(vcpu, TCR2_EL1);
93 		wi->poe = val & TCR2_EL1_POE;
94 		wi->e0poe = val & TCR2_EL1_E0POE;
95 	}
96 }
97 
setup_s1_walk(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)98 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
99 			 struct s1_walk_result *wr, u64 va)
100 {
101 	u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
102 	unsigned int stride, x;
103 	bool va55, tbi, lva;
104 
105 	hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
106 
107 	va55 = va & BIT(55);
108 
109 	if (wi->regime == TR_EL2 && va55)
110 		goto addrsz;
111 
112 	wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
113 
114 	switch (wi->regime) {
115 	case TR_EL10:
116 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL1);
117 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL1);
118 		ttbr	= (va55 ?
119 			   vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
120 			   vcpu_read_sys_reg(vcpu, TTBR0_EL1));
121 		break;
122 	case TR_EL2:
123 	case TR_EL20:
124 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL2);
125 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL2);
126 		ttbr	= (va55 ?
127 			   vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
128 			   vcpu_read_sys_reg(vcpu, TTBR0_EL2));
129 		break;
130 	default:
131 		BUG();
132 	}
133 
134 	tbi = (wi->regime == TR_EL2 ?
135 	       FIELD_GET(TCR_EL2_TBI, tcr) :
136 	       (va55 ?
137 		FIELD_GET(TCR_TBI1, tcr) :
138 		FIELD_GET(TCR_TBI0, tcr)));
139 
140 	if (!tbi && (u64)sign_extend64(va, 55) != va)
141 		goto addrsz;
142 
143 	va = (u64)sign_extend64(va, 55);
144 
145 	/* Let's put the MMU disabled case aside immediately */
146 	switch (wi->regime) {
147 	case TR_EL10:
148 		/*
149 		 * If dealing with the EL1&0 translation regime, 3 things
150 		 * can disable the S1 translation:
151 		 *
152 		 * - HCR_EL2.DC = 1
153 		 * - HCR_EL2.{E2H,TGE} = {0,1}
154 		 * - SCTLR_EL1.M = 0
155 		 *
156 		 * The TGE part is interesting. If we have decided that this
157 		 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
158 		 * {0,x}, and we only need to test for TGE == 1.
159 		 */
160 		if (hcr & (HCR_DC | HCR_TGE)) {
161 			wr->level = S1_MMU_DISABLED;
162 			break;
163 		}
164 		fallthrough;
165 	case TR_EL2:
166 	case TR_EL20:
167 		if (!(sctlr & SCTLR_ELx_M))
168 			wr->level = S1_MMU_DISABLED;
169 		break;
170 	}
171 
172 	if (wr->level == S1_MMU_DISABLED) {
173 		if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
174 			goto addrsz;
175 
176 		wr->pa = va;
177 		return 0;
178 	}
179 
180 	wi->be = sctlr & SCTLR_ELx_EE;
181 
182 	wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
183 	wi->hpd &= (wi->regime == TR_EL2 ?
184 		    FIELD_GET(TCR_EL2_HPD, tcr) :
185 		    (va55 ?
186 		     FIELD_GET(TCR_HPD1, tcr) :
187 		     FIELD_GET(TCR_HPD0, tcr)));
188 	/* R_JHSVW */
189 	wi->hpd |= s1pie_enabled(vcpu, wi->regime);
190 
191 	/* Do we have POE? */
192 	compute_s1poe(vcpu, wi);
193 
194 	/* R_BVXDG */
195 	wi->hpd |= (wi->poe || wi->e0poe);
196 
197 	/* Someone was silly enough to encode TG0/TG1 differently */
198 	if (va55) {
199 		wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
200 		tg = FIELD_GET(TCR_TG1_MASK, tcr);
201 
202 		switch (tg << TCR_TG1_SHIFT) {
203 		case TCR_TG1_4K:
204 			wi->pgshift = 12;	 break;
205 		case TCR_TG1_16K:
206 			wi->pgshift = 14;	 break;
207 		case TCR_TG1_64K:
208 		default:	    /* IMPDEF: treat any other value as 64k */
209 			wi->pgshift = 16;	 break;
210 		}
211 	} else {
212 		wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
213 		tg = FIELD_GET(TCR_TG0_MASK, tcr);
214 
215 		switch (tg << TCR_TG0_SHIFT) {
216 		case TCR_TG0_4K:
217 			wi->pgshift = 12;	 break;
218 		case TCR_TG0_16K:
219 			wi->pgshift = 14;	 break;
220 		case TCR_TG0_64K:
221 		default:	    /* IMPDEF: treat any other value as 64k */
222 			wi->pgshift = 16;	 break;
223 		}
224 	}
225 
226 	/* R_PLCGL, R_YXNYW */
227 	if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
228 		if (wi->txsz > 39)
229 			goto transfault_l0;
230 	} else {
231 		if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
232 			goto transfault_l0;
233 	}
234 
235 	/* R_GTJBY, R_SXWGM */
236 	switch (BIT(wi->pgshift)) {
237 	case SZ_4K:
238 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
239 		lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
240 		break;
241 	case SZ_16K:
242 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
243 		lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
244 		break;
245 	case SZ_64K:
246 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
247 		break;
248 	}
249 
250 	if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
251 		goto transfault_l0;
252 
253 	ia_bits = get_ia_size(wi);
254 
255 	/* R_YYVYV, I_THCZK */
256 	if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
257 	    (va55 && va < GENMASK(63, ia_bits)))
258 		goto transfault_l0;
259 
260 	/* I_ZFSYQ */
261 	if (wi->regime != TR_EL2 &&
262 	    (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
263 		goto transfault_l0;
264 
265 	/* R_BNDVG and following statements */
266 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
267 	    wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
268 		goto transfault_l0;
269 
270 	/* AArch64.S1StartLevel() */
271 	stride = wi->pgshift - 3;
272 	wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
273 
274 	ps = (wi->regime == TR_EL2 ?
275 	      FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
276 
277 	wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
278 
279 	/* Compute minimal alignment */
280 	x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
281 
282 	wi->baddr = ttbr & TTBRx_EL1_BADDR;
283 
284 	/* R_VPBBF */
285 	if (check_output_size(wi->baddr, wi))
286 		goto addrsz;
287 
288 	wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
289 
290 	return 0;
291 
292 addrsz:				/* Address Size Fault level 0 */
293 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
294 	return -EFAULT;
295 
296 transfault_l0:			/* Translation Fault level 0 */
297 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
298 	return -EFAULT;
299 }
300 
walk_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)301 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
302 		   struct s1_walk_result *wr, u64 va)
303 {
304 	u64 va_top, va_bottom, baddr, desc;
305 	int level, stride, ret;
306 
307 	level = wi->sl;
308 	stride = wi->pgshift - 3;
309 	baddr = wi->baddr;
310 
311 	va_top = get_ia_size(wi) - 1;
312 
313 	while (1) {
314 		u64 index, ipa;
315 
316 		va_bottom = (3 - level) * stride + wi->pgshift;
317 		index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
318 
319 		ipa = baddr | index;
320 
321 		if (wi->s2) {
322 			struct kvm_s2_trans s2_trans = {};
323 
324 			ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
325 			if (ret) {
326 				fail_s1_walk(wr,
327 					     (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
328 					     true);
329 				return ret;
330 			}
331 
332 			if (!kvm_s2_trans_readable(&s2_trans)) {
333 				fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
334 					     true);
335 
336 				return -EPERM;
337 			}
338 
339 			ipa = kvm_s2_trans_output(&s2_trans);
340 		}
341 
342 		ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
343 		if (ret) {
344 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
345 			return ret;
346 		}
347 
348 		if (wi->be)
349 			desc = be64_to_cpu((__force __be64)desc);
350 		else
351 			desc = le64_to_cpu((__force __le64)desc);
352 
353 		/* Invalid descriptor */
354 		if (!(desc & BIT(0)))
355 			goto transfault;
356 
357 		/* Block mapping, check validity down the line */
358 		if (!(desc & BIT(1)))
359 			break;
360 
361 		/* Page mapping */
362 		if (level == 3)
363 			break;
364 
365 		/* Table handling */
366 		if (!wi->hpd) {
367 			wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
368 			wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
369 			wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
370 		}
371 
372 		baddr = desc & GENMASK_ULL(47, wi->pgshift);
373 
374 		/* Check for out-of-range OA */
375 		if (check_output_size(baddr, wi))
376 			goto addrsz;
377 
378 		/* Prepare for next round */
379 		va_top = va_bottom - 1;
380 		level++;
381 	}
382 
383 	/* Block mapping, check the validity of the level */
384 	if (!(desc & BIT(1))) {
385 		bool valid_block = false;
386 
387 		switch (BIT(wi->pgshift)) {
388 		case SZ_4K:
389 			valid_block = level == 1 || level == 2;
390 			break;
391 		case SZ_16K:
392 		case SZ_64K:
393 			valid_block = level == 2;
394 			break;
395 		}
396 
397 		if (!valid_block)
398 			goto transfault;
399 	}
400 
401 	if (check_output_size(desc & GENMASK(47, va_bottom), wi))
402 		goto addrsz;
403 
404 	if (!(desc & PTE_AF)) {
405 		fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
406 		return -EACCES;
407 	}
408 
409 	va_bottom += contiguous_bit_shift(desc, wi, level);
410 
411 	wr->failed = false;
412 	wr->level = level;
413 	wr->desc = desc;
414 	wr->pa = desc & GENMASK(47, va_bottom);
415 	wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
416 
417 	wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
418 	if (wr->nG) {
419 		u64 asid_ttbr, tcr;
420 
421 		switch (wi->regime) {
422 		case TR_EL10:
423 			tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
424 			asid_ttbr = ((tcr & TCR_A1) ?
425 				     vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
426 				     vcpu_read_sys_reg(vcpu, TTBR0_EL1));
427 			break;
428 		case TR_EL20:
429 			tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
430 			asid_ttbr = ((tcr & TCR_A1) ?
431 				     vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
432 				     vcpu_read_sys_reg(vcpu, TTBR0_EL2));
433 			break;
434 		default:
435 			BUG();
436 		}
437 
438 		wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
439 		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
440 		    !(tcr & TCR_ASID16))
441 			wr->asid &= GENMASK(7, 0);
442 	}
443 
444 	return 0;
445 
446 addrsz:
447 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
448 	return -EINVAL;
449 transfault:
450 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
451 	return -ENOENT;
452 }
453 
454 struct mmu_config {
455 	u64	ttbr0;
456 	u64	ttbr1;
457 	u64	tcr;
458 	u64	mair;
459 	u64	tcr2;
460 	u64	pir;
461 	u64	pire0;
462 	u64	por_el0;
463 	u64	por_el1;
464 	u64	sctlr;
465 	u64	vttbr;
466 	u64	vtcr;
467 };
468 
__mmu_config_save(struct mmu_config * config)469 static void __mmu_config_save(struct mmu_config *config)
470 {
471 	config->ttbr0	= read_sysreg_el1(SYS_TTBR0);
472 	config->ttbr1	= read_sysreg_el1(SYS_TTBR1);
473 	config->tcr	= read_sysreg_el1(SYS_TCR);
474 	config->mair	= read_sysreg_el1(SYS_MAIR);
475 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
476 		config->tcr2	= read_sysreg_el1(SYS_TCR2);
477 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
478 			config->pir	= read_sysreg_el1(SYS_PIR);
479 			config->pire0	= read_sysreg_el1(SYS_PIRE0);
480 		}
481 		if (system_supports_poe()) {
482 			config->por_el1	= read_sysreg_el1(SYS_POR);
483 			config->por_el0	= read_sysreg_s(SYS_POR_EL0);
484 		}
485 	}
486 	config->sctlr	= read_sysreg_el1(SYS_SCTLR);
487 	config->vttbr	= read_sysreg(vttbr_el2);
488 	config->vtcr	= read_sysreg(vtcr_el2);
489 }
490 
__mmu_config_restore(struct mmu_config * config)491 static void __mmu_config_restore(struct mmu_config *config)
492 {
493 	/*
494 	 * ARM errata 1165522 and 1530923 require TGE to be 1 before
495 	 * we update the guest state.
496 	 */
497 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
498 
499 	write_sysreg_el1(config->ttbr0,	SYS_TTBR0);
500 	write_sysreg_el1(config->ttbr1,	SYS_TTBR1);
501 	write_sysreg_el1(config->tcr,	SYS_TCR);
502 	write_sysreg_el1(config->mair,	SYS_MAIR);
503 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
504 		write_sysreg_el1(config->tcr2, SYS_TCR2);
505 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
506 			write_sysreg_el1(config->pir, SYS_PIR);
507 			write_sysreg_el1(config->pire0, SYS_PIRE0);
508 		}
509 		if (system_supports_poe()) {
510 			write_sysreg_el1(config->por_el1, SYS_POR);
511 			write_sysreg_s(config->por_el0, SYS_POR_EL0);
512 		}
513 	}
514 	write_sysreg_el1(config->sctlr,	SYS_SCTLR);
515 	write_sysreg(config->vttbr,	vttbr_el2);
516 	write_sysreg(config->vtcr,	vtcr_el2);
517 }
518 
at_s1e1p_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)519 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
520 {
521 	u64 host_pan;
522 	bool fail;
523 
524 	host_pan = read_sysreg_s(SYS_PSTATE_PAN);
525 	write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
526 
527 	switch (op) {
528 	case OP_AT_S1E1RP:
529 		fail = __kvm_at(OP_AT_S1E1RP, vaddr);
530 		break;
531 	case OP_AT_S1E1WP:
532 		fail = __kvm_at(OP_AT_S1E1WP, vaddr);
533 		break;
534 	}
535 
536 	write_sysreg_s(host_pan, SYS_PSTATE_PAN);
537 
538 	return fail;
539 }
540 
541 #define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
542 #define MEMATTR_NC		0b0100
543 #define MEMATTR_Wt		0b1000
544 #define MEMATTR_Wb		0b1100
545 #define MEMATTR_WbRaWa		0b1111
546 
547 #define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
548 
s2_memattr_to_attr(u8 memattr)549 static u8 s2_memattr_to_attr(u8 memattr)
550 {
551 	memattr &= 0b1111;
552 
553 	switch (memattr) {
554 	case 0b0000:
555 	case 0b0001:
556 	case 0b0010:
557 	case 0b0011:
558 		return memattr << 2;
559 	case 0b0100:
560 		return MEMATTR(Wb, Wb);
561 	case 0b0101:
562 		return MEMATTR(NC, NC);
563 	case 0b0110:
564 		return MEMATTR(Wt, NC);
565 	case 0b0111:
566 		return MEMATTR(Wb, NC);
567 	case 0b1000:
568 		/* Reserved, assume NC */
569 		return MEMATTR(NC, NC);
570 	case 0b1001:
571 		return MEMATTR(NC, Wt);
572 	case 0b1010:
573 		return MEMATTR(Wt, Wt);
574 	case 0b1011:
575 		return MEMATTR(Wb, Wt);
576 	case 0b1100:
577 		/* Reserved, assume NC */
578 		return MEMATTR(NC, NC);
579 	case 0b1101:
580 		return MEMATTR(NC, Wb);
581 	case 0b1110:
582 		return MEMATTR(Wt, Wb);
583 	case 0b1111:
584 		return MEMATTR(Wb, Wb);
585 	default:
586 		unreachable();
587 	}
588 }
589 
combine_s1_s2_attr(u8 s1,u8 s2)590 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
591 {
592 	bool transient;
593 	u8 final = 0;
594 
595 	/* Upgrade transient s1 to non-transient to simplify things */
596 	switch (s1) {
597 	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
598 		transient = true;
599 		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
600 		break;
601 	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
602 		transient = true;
603 		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
604 		break;
605 	default:
606 		transient = false;
607 	}
608 
609 	/* S2CombineS1AttrHints() */
610 	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
611 	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
612 		final = MEMATTR_NC;
613 	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
614 		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
615 		final = MEMATTR_Wt;
616 	else
617 		final = MEMATTR_Wb;
618 
619 	if (final != MEMATTR_NC) {
620 		/* Inherit RaWa hints form S1 */
621 		if (transient) {
622 			switch (s1 & GENMASK(3, 2)) {
623 			case MEMATTR_Wt:
624 				final = 0;
625 				break;
626 			case MEMATTR_Wb:
627 				final = MEMATTR_NC;
628 				break;
629 			}
630 		}
631 
632 		final |= s1 & GENMASK(1, 0);
633 	}
634 
635 	return final;
636 }
637 
638 #define ATTR_NSH	0b00
639 #define ATTR_RSV	0b01
640 #define ATTR_OSH	0b10
641 #define ATTR_ISH	0b11
642 
compute_sh(u8 attr,u64 desc)643 static u8 compute_sh(u8 attr, u64 desc)
644 {
645 	u8 sh;
646 
647 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
648 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
649 		return ATTR_OSH;
650 
651 	sh = FIELD_GET(PTE_SHARED, desc);
652 	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
653 		sh = ATTR_NSH;
654 
655 	return sh;
656 }
657 
combine_sh(u8 s1_sh,u8 s2_sh)658 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
659 {
660 	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
661 		return ATTR_OSH;
662 	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
663 		return ATTR_ISH;
664 
665 	return ATTR_NSH;
666 }
667 
compute_par_s12(struct kvm_vcpu * vcpu,u64 s1_par,struct kvm_s2_trans * tr)668 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
669 			   struct kvm_s2_trans *tr)
670 {
671 	u8 s1_parattr, s2_memattr, final_attr;
672 	u64 par;
673 
674 	/* If S2 has failed to translate, report the damage */
675 	if (tr->esr) {
676 		par = SYS_PAR_EL1_RES1;
677 		par |= SYS_PAR_EL1_F;
678 		par |= SYS_PAR_EL1_S;
679 		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
680 		return par;
681 	}
682 
683 	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
684 	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
685 
686 	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
687 		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
688 			s2_memattr &= ~BIT(3);
689 
690 		/* Combination of R_VRJSW and R_RHWZM */
691 		switch (s2_memattr) {
692 		case 0b0101:
693 			if (MEMATTR_IS_DEVICE(s1_parattr))
694 				final_attr = s1_parattr;
695 			else
696 				final_attr = MEMATTR(NC, NC);
697 			break;
698 		case 0b0110:
699 		case 0b1110:
700 			final_attr = MEMATTR(WbRaWa, WbRaWa);
701 			break;
702 		case 0b0111:
703 		case 0b1111:
704 			/* Preserve S1 attribute */
705 			final_attr = s1_parattr;
706 			break;
707 		case 0b0100:
708 		case 0b1100:
709 		case 0b1101:
710 			/* Reserved, do something non-silly */
711 			final_attr = s1_parattr;
712 			break;
713 		default:
714 			/*
715 			 * MemAttr[2]=0, Device from S2.
716 			 *
717 			 * FWB does not influence the way that stage 1
718 			 * memory types and attributes are combined
719 			 * with stage 2 Device type and attributes.
720 			 */
721 			final_attr = min(s2_memattr_to_attr(s2_memattr),
722 					 s1_parattr);
723 		}
724 	} else {
725 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
726 		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
727 
728 		if (MEMATTR_IS_DEVICE(s1_parattr) ||
729 		    MEMATTR_IS_DEVICE(s2_parattr)) {
730 			final_attr = min(s1_parattr, s2_parattr);
731 		} else {
732 			/* At this stage, this is memory vs memory */
733 			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
734 							 s2_parattr & 0xf);
735 			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
736 							 s2_parattr >> 4) << 4;
737 		}
738 	}
739 
740 	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
741 	    !MEMATTR_IS_DEVICE(final_attr))
742 		final_attr = MEMATTR(NC, NC);
743 
744 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
745 	par |= tr->output & GENMASK(47, 12);
746 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
747 			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
748 				     compute_sh(final_attr, tr->desc)));
749 
750 	return par;
751 }
752 
compute_par_s1(struct kvm_vcpu * vcpu,struct s1_walk_result * wr,enum trans_regime regime)753 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
754 			  enum trans_regime regime)
755 {
756 	u64 par;
757 
758 	if (wr->failed) {
759 		par = SYS_PAR_EL1_RES1;
760 		par |= SYS_PAR_EL1_F;
761 		par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
762 		par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
763 		par |= wr->s2 ? SYS_PAR_EL1_S : 0;
764 	} else if (wr->level == S1_MMU_DISABLED) {
765 		/* MMU off or HCR_EL2.DC == 1 */
766 		par  = SYS_PAR_EL1_NSE;
767 		par |= wr->pa & GENMASK_ULL(47, 12);
768 
769 		if (regime == TR_EL10 &&
770 		    (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
771 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
772 					  MEMATTR(WbRaWa, WbRaWa));
773 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
774 		} else {
775 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
776 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
777 		}
778 	} else {
779 		u64 mair, sctlr;
780 		u8 sh;
781 
782 		par  = SYS_PAR_EL1_NSE;
783 
784 		mair = (regime == TR_EL10 ?
785 			vcpu_read_sys_reg(vcpu, MAIR_EL1) :
786 			vcpu_read_sys_reg(vcpu, MAIR_EL2));
787 
788 		mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
789 		mair &= 0xff;
790 
791 		sctlr = (regime == TR_EL10 ?
792 			 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
793 			 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
794 
795 		/* Force NC for memory if SCTLR_ELx.C is clear */
796 		if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
797 			mair = MEMATTR(NC, NC);
798 
799 		par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
800 		par |= wr->pa & GENMASK_ULL(47, 12);
801 
802 		sh = compute_sh(mair, wr->desc);
803 		par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
804 	}
805 
806 	return par;
807 }
808 
pan3_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)809 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
810 {
811 	u64 sctlr;
812 
813 	if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
814 		return false;
815 
816 	if (s1pie_enabled(vcpu, regime))
817 		return true;
818 
819 	if (regime == TR_EL10)
820 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
821 	else
822 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
823 
824 	return sctlr & SCTLR_EL1_EPAN;
825 }
826 
compute_s1_direct_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)827 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
828 					  struct s1_walk_info *wi,
829 					  struct s1_walk_result *wr)
830 {
831 	bool wxn;
832 
833 	/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
834 	if (wi->regime != TR_EL2) {
835 		switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
836 		case 0b00:
837 			wr->pr = wr->pw = true;
838 			wr->ur = wr->uw = false;
839 			break;
840 		case 0b01:
841 			wr->pr = wr->pw = wr->ur = wr->uw = true;
842 			break;
843 		case 0b10:
844 			wr->pr = true;
845 			wr->pw = wr->ur = wr->uw = false;
846 			break;
847 		case 0b11:
848 			wr->pr = wr->ur = true;
849 			wr->pw = wr->uw = false;
850 			break;
851 		}
852 
853 		/* We don't use px for anything yet, but hey... */
854 		wr->px = !((wr->desc & PTE_PXN) || wr->uw);
855 		wr->ux = !(wr->desc & PTE_UXN);
856 	} else {
857 		wr->ur = wr->uw = wr->ux = false;
858 
859 		if (!(wr->desc & PTE_RDONLY)) {
860 			wr->pr = wr->pw = true;
861 		} else {
862 			wr->pr = true;
863 			wr->pw = false;
864 		}
865 
866 		/* XN maps to UXN */
867 		wr->px = !(wr->desc & PTE_UXN);
868 	}
869 
870 	switch (wi->regime) {
871 	case TR_EL2:
872 	case TR_EL20:
873 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
874 		break;
875 	case TR_EL10:
876 		wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
877 		break;
878 	}
879 
880 	wr->pwxn = wr->uwxn = wxn;
881 	wr->pov = wi->poe;
882 	wr->uov = wi->e0poe;
883 }
884 
compute_s1_hierarchical_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)885 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
886 						struct s1_walk_info *wi,
887 						struct s1_walk_result *wr)
888 {
889 	/* Hierarchical part of AArch64.S1DirectBasePermissions() */
890 	if (wi->regime != TR_EL2) {
891 		switch (wr->APTable) {
892 		case 0b00:
893 			break;
894 		case 0b01:
895 			wr->ur = wr->uw = false;
896 			break;
897 		case 0b10:
898 			wr->pw = wr->uw = false;
899 			break;
900 		case 0b11:
901 			wr->pw = wr->ur = wr->uw = false;
902 			break;
903 		}
904 
905 		wr->px &= !wr->PXNTable;
906 		wr->ux &= !wr->UXNTable;
907 	} else {
908 		if (wr->APTable & BIT(1))
909 			wr->pw = false;
910 
911 		/* XN maps to UXN */
912 		wr->px &= !wr->UXNTable;
913 	}
914 }
915 
916 #define perm_idx(v, r, i)	((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
917 
918 #define set_priv_perms(wr, r, w, x)	\
919 	do {				\
920 		(wr)->pr = (r);		\
921 		(wr)->pw = (w);		\
922 		(wr)->px = (x);		\
923 	} while (0)
924 
925 #define set_unpriv_perms(wr, r, w, x)	\
926 	do {				\
927 		(wr)->ur = (r);		\
928 		(wr)->uw = (w);		\
929 		(wr)->ux = (x);		\
930 	} while (0)
931 
932 #define set_priv_wxn(wr, v)		\
933 	do {				\
934 		(wr)->pwxn = (v);	\
935 	} while (0)
936 
937 #define set_unpriv_wxn(wr, v)		\
938 	do {				\
939 		(wr)->uwxn = (v);	\
940 	} while (0)
941 
942 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS  */
943 #define set_perms(w, wr, ip)						\
944 	do {								\
945 		/* R_LLZDZ */						\
946 		switch ((ip)) {						\
947 		case 0b0000:						\
948 			set_ ## w ## _perms((wr), false, false, false);	\
949 			break;						\
950 		case 0b0001:						\
951 			set_ ## w ## _perms((wr), true , false, false);	\
952 			break;						\
953 		case 0b0010:						\
954 			set_ ## w ## _perms((wr), false, false, true );	\
955 			break;						\
956 		case 0b0011:						\
957 			set_ ## w ## _perms((wr), true , false, true );	\
958 			break;						\
959 		case 0b0100:						\
960 			set_ ## w ## _perms((wr), false, false, false);	\
961 			break;						\
962 		case 0b0101:						\
963 			set_ ## w ## _perms((wr), true , true , false);	\
964 			break;						\
965 		case 0b0110:						\
966 			set_ ## w ## _perms((wr), true , true , true );	\
967 			break;						\
968 		case 0b0111:						\
969 			set_ ## w ## _perms((wr), true , true , true );	\
970 			break;						\
971 		case 0b1000:						\
972 			set_ ## w ## _perms((wr), true , false, false);	\
973 			break;						\
974 		case 0b1001:						\
975 			set_ ## w ## _perms((wr), true , false, false);	\
976 			break;						\
977 		case 0b1010:						\
978 			set_ ## w ## _perms((wr), true , false, true );	\
979 			break;						\
980 		case 0b1011:						\
981 			set_ ## w ## _perms((wr), false, false, false);	\
982 			break;						\
983 		case 0b1100:						\
984 			set_ ## w ## _perms((wr), true , true , false);	\
985 			break;						\
986 		case 0b1101:						\
987 			set_ ## w ## _perms((wr), false, false, false);	\
988 			break;						\
989 		case 0b1110:						\
990 			set_ ## w ## _perms((wr), true , true , true );	\
991 			break;						\
992 		case 0b1111:						\
993 			set_ ## w ## _perms((wr), false, false, false);	\
994 			break;						\
995 		}							\
996 									\
997 		/* R_HJYGR */						\
998 		set_ ## w ## _wxn((wr), ((ip) == 0b0110));		\
999 									\
1000 	} while (0)
1001 
compute_s1_indirect_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1002 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1003 					    struct s1_walk_info *wi,
1004 					    struct s1_walk_result *wr)
1005 {
1006 	u8 up, pp, idx;
1007 
1008 	idx = pte_pi_index(wr->desc);
1009 
1010 	switch (wi->regime) {
1011 	case TR_EL10:
1012 		pp = perm_idx(vcpu, PIR_EL1, idx);
1013 		up = perm_idx(vcpu, PIRE0_EL1, idx);
1014 		break;
1015 	case TR_EL20:
1016 		pp = perm_idx(vcpu, PIR_EL2, idx);
1017 		up = perm_idx(vcpu, PIRE0_EL2, idx);
1018 		break;
1019 	case TR_EL2:
1020 		pp = perm_idx(vcpu, PIR_EL2, idx);
1021 		up = 0;
1022 		break;
1023 	}
1024 
1025 	set_perms(priv, wr, pp);
1026 
1027 	if (wi->regime != TR_EL2)
1028 		set_perms(unpriv, wr, up);
1029 	else
1030 		set_unpriv_perms(wr, false, false, false);
1031 
1032 	wr->pov = wi->poe && !(pp & BIT(3));
1033 	wr->uov = wi->e0poe && !(up & BIT(3));
1034 
1035 	/* R_VFPJF */
1036 	if (wr->px && wr->uw) {
1037 		set_priv_perms(wr, false, false, false);
1038 		set_unpriv_perms(wr, false, false, false);
1039 	}
1040 }
1041 
compute_s1_overlay_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1042 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1043 					   struct s1_walk_info *wi,
1044 					   struct s1_walk_result *wr)
1045 {
1046 	u8 idx, pov_perms, uov_perms;
1047 
1048 	idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1049 
1050 	switch (wi->regime) {
1051 	case TR_EL10:
1052 		pov_perms = perm_idx(vcpu, POR_EL1, idx);
1053 		uov_perms = perm_idx(vcpu, POR_EL0, idx);
1054 		break;
1055 	case TR_EL20:
1056 		pov_perms = perm_idx(vcpu, POR_EL2, idx);
1057 		uov_perms = perm_idx(vcpu, POR_EL0, idx);
1058 		break;
1059 	case TR_EL2:
1060 		pov_perms = perm_idx(vcpu, POR_EL2, idx);
1061 		uov_perms = 0;
1062 		break;
1063 	}
1064 
1065 	if (pov_perms & ~POE_RWX)
1066 		pov_perms = POE_NONE;
1067 
1068 	if (wi->poe && wr->pov) {
1069 		wr->pr &= pov_perms & POE_R;
1070 		wr->pw &= pov_perms & POE_W;
1071 		wr->px &= pov_perms & POE_X;
1072 	}
1073 
1074 	if (uov_perms & ~POE_RWX)
1075 		uov_perms = POE_NONE;
1076 
1077 	if (wi->e0poe && wr->uov) {
1078 		wr->ur &= uov_perms & POE_R;
1079 		wr->uw &= uov_perms & POE_W;
1080 		wr->ux &= uov_perms & POE_X;
1081 	}
1082 }
1083 
compute_s1_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1084 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1085 				   struct s1_walk_info *wi,
1086 				   struct s1_walk_result *wr)
1087 {
1088 	bool pan;
1089 
1090 	if (!s1pie_enabled(vcpu, wi->regime))
1091 		compute_s1_direct_permissions(vcpu, wi, wr);
1092 	else
1093 		compute_s1_indirect_permissions(vcpu, wi, wr);
1094 
1095 	if (!wi->hpd)
1096 		compute_s1_hierarchical_permissions(vcpu, wi, wr);
1097 
1098 	if (wi->poe || wi->e0poe)
1099 		compute_s1_overlay_permissions(vcpu, wi, wr);
1100 
1101 	/* R_QXXPC */
1102 	if (wr->pwxn) {
1103 		if (!wr->pov && wr->pw)
1104 			wr->px = false;
1105 		if (wr->pov && wr->px)
1106 			wr->pw = false;
1107 	}
1108 
1109 	/* R_NPBXC */
1110 	if (wr->uwxn) {
1111 		if (!wr->uov && wr->uw)
1112 			wr->ux = false;
1113 		if (wr->uov && wr->ux)
1114 			wr->uw = false;
1115 	}
1116 
1117 	pan = wi->pan && (wr->ur || wr->uw ||
1118 			  (pan3_enabled(vcpu, wi->regime) && wr->ux));
1119 	wr->pw &= !pan;
1120 	wr->pr &= !pan;
1121 }
1122 
handle_at_slow(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1123 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1124 {
1125 	struct s1_walk_result wr = {};
1126 	struct s1_walk_info wi = {};
1127 	bool perm_fail = false;
1128 	int ret, idx;
1129 
1130 	wi.regime = compute_translation_regime(vcpu, op);
1131 	wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1132 	wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1133 		 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1134 
1135 	ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1136 	if (ret)
1137 		goto compute_par;
1138 
1139 	if (wr.level == S1_MMU_DISABLED)
1140 		goto compute_par;
1141 
1142 	idx = srcu_read_lock(&vcpu->kvm->srcu);
1143 
1144 	ret = walk_s1(vcpu, &wi, &wr, vaddr);
1145 
1146 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
1147 
1148 	if (ret)
1149 		goto compute_par;
1150 
1151 	compute_s1_permissions(vcpu, &wi, &wr);
1152 
1153 	switch (op) {
1154 	case OP_AT_S1E1RP:
1155 	case OP_AT_S1E1R:
1156 	case OP_AT_S1E2R:
1157 		perm_fail = !wr.pr;
1158 		break;
1159 	case OP_AT_S1E1WP:
1160 	case OP_AT_S1E1W:
1161 	case OP_AT_S1E2W:
1162 		perm_fail = !wr.pw;
1163 		break;
1164 	case OP_AT_S1E0R:
1165 		perm_fail = !wr.ur;
1166 		break;
1167 	case OP_AT_S1E0W:
1168 		perm_fail = !wr.uw;
1169 		break;
1170 	case OP_AT_S1E1A:
1171 	case OP_AT_S1E2A:
1172 		break;
1173 	default:
1174 		BUG();
1175 	}
1176 
1177 	if (perm_fail)
1178 		fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1179 
1180 compute_par:
1181 	return compute_par_s1(vcpu, &wr, wi.regime);
1182 }
1183 
1184 /*
1185  * Return the PAR_EL1 value as the result of a valid translation.
1186  *
1187  * If the translation is unsuccessful, the value may only contain
1188  * PAR_EL1.F, and cannot be taken at face value. It isn't an
1189  * indication of the translation having failed, only that the fast
1190  * path did not succeed, *unless* it indicates a S1 permission or
1191  * access fault.
1192  */
__kvm_at_s1e01_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1193 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1194 {
1195 	struct mmu_config config;
1196 	struct kvm_s2_mmu *mmu;
1197 	bool fail;
1198 	u64 par;
1199 
1200 	par = SYS_PAR_EL1_F;
1201 
1202 	/*
1203 	 * We've trapped, so everything is live on the CPU. As we will
1204 	 * be switching contexts behind everybody's back, disable
1205 	 * interrupts while holding the mmu lock.
1206 	 */
1207 	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1208 
1209 	/*
1210 	 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1211 	 * the right one (as we trapped from vEL2). If not, save the
1212 	 * full MMU context.
1213 	 */
1214 	if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
1215 		goto skip_mmu_switch;
1216 
1217 	/*
1218 	 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1219 	 * find it (recycled by another vcpu, for example). When this
1220 	 * happens, admit defeat immediately and use the SW (slow) path.
1221 	 */
1222 	mmu = lookup_s2_mmu(vcpu);
1223 	if (!mmu)
1224 		return par;
1225 
1226 	__mmu_config_save(&config);
1227 
1228 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),	SYS_TTBR0);
1229 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),	SYS_TTBR1);
1230 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),	SYS_TCR);
1231 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),	SYS_MAIR);
1232 	if (kvm_has_tcr2(vcpu->kvm)) {
1233 		write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1234 		if (kvm_has_s1pie(vcpu->kvm)) {
1235 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1236 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1237 		}
1238 		if (kvm_has_s1poe(vcpu->kvm)) {
1239 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1240 			write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1241 		}
1242 	}
1243 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),	SYS_SCTLR);
1244 	__load_stage2(mmu, mmu->arch);
1245 
1246 skip_mmu_switch:
1247 	/* Temporarily switch back to guest context */
1248 	write_sysreg_hcr(vcpu->arch.hcr_el2);
1249 	isb();
1250 
1251 	switch (op) {
1252 	case OP_AT_S1E1RP:
1253 	case OP_AT_S1E1WP:
1254 		fail = at_s1e1p_fast(vcpu, op, vaddr);
1255 		break;
1256 	case OP_AT_S1E1R:
1257 		fail = __kvm_at(OP_AT_S1E1R, vaddr);
1258 		break;
1259 	case OP_AT_S1E1W:
1260 		fail = __kvm_at(OP_AT_S1E1W, vaddr);
1261 		break;
1262 	case OP_AT_S1E0R:
1263 		fail = __kvm_at(OP_AT_S1E0R, vaddr);
1264 		break;
1265 	case OP_AT_S1E0W:
1266 		fail = __kvm_at(OP_AT_S1E0W, vaddr);
1267 		break;
1268 	case OP_AT_S1E1A:
1269 		fail = __kvm_at(OP_AT_S1E1A, vaddr);
1270 		break;
1271 	default:
1272 		WARN_ON_ONCE(1);
1273 		fail = true;
1274 		break;
1275 	}
1276 
1277 	if (!fail)
1278 		par = read_sysreg_par();
1279 
1280 	write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1281 
1282 	if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
1283 		__mmu_config_restore(&config);
1284 
1285 	return par;
1286 }
1287 
par_check_s1_perm_fault(u64 par)1288 static bool par_check_s1_perm_fault(u64 par)
1289 {
1290 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1291 
1292 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1293 		 !(par & SYS_PAR_EL1_S));
1294 }
1295 
par_check_s1_access_fault(u64 par)1296 static bool par_check_s1_access_fault(u64 par)
1297 {
1298 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1299 
1300 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1301 		 !(par & SYS_PAR_EL1_S));
1302 }
1303 
__kvm_at_s1e01(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1304 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1305 {
1306 	u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1307 
1308 	/*
1309 	 * If PAR_EL1 reports that AT failed on a S1 permission or access
1310 	 * fault, we know for sure that the PTW was able to walk the S1
1311 	 * tables and there's nothing else to do.
1312 	 *
1313 	 * If AT failed for any other reason, then we must walk the guest S1
1314 	 * to emulate the instruction.
1315 	 */
1316 	if ((par & SYS_PAR_EL1_F) &&
1317 	    !par_check_s1_perm_fault(par) &&
1318 	    !par_check_s1_access_fault(par))
1319 		par = handle_at_slow(vcpu, op, vaddr);
1320 
1321 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1322 }
1323 
__kvm_at_s1e2(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1324 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1325 {
1326 	u64 par;
1327 
1328 	/*
1329 	 * We've trapped, so everything is live on the CPU. As we will be
1330 	 * switching context behind everybody's back, disable interrupts...
1331 	 */
1332 	scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1333 		u64 val, hcr;
1334 		bool fail;
1335 
1336 		val = hcr = read_sysreg(hcr_el2);
1337 		val &= ~HCR_TGE;
1338 		val |= HCR_VM;
1339 
1340 		if (!vcpu_el2_e2h_is_set(vcpu))
1341 			val |= HCR_NV | HCR_NV1;
1342 
1343 		write_sysreg_hcr(val);
1344 		isb();
1345 
1346 		par = SYS_PAR_EL1_F;
1347 
1348 		switch (op) {
1349 		case OP_AT_S1E2R:
1350 			fail = __kvm_at(OP_AT_S1E1R, vaddr);
1351 			break;
1352 		case OP_AT_S1E2W:
1353 			fail = __kvm_at(OP_AT_S1E1W, vaddr);
1354 			break;
1355 		case OP_AT_S1E2A:
1356 			fail = __kvm_at(OP_AT_S1E1A, vaddr);
1357 			break;
1358 		default:
1359 			WARN_ON_ONCE(1);
1360 			fail = true;
1361 		}
1362 
1363 		isb();
1364 
1365 		if (!fail)
1366 			par = read_sysreg_par();
1367 
1368 		write_sysreg_hcr(hcr);
1369 		isb();
1370 	}
1371 
1372 	/* We failed the translation, let's replay it in slow motion */
1373 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1374 		par = handle_at_slow(vcpu, op, vaddr);
1375 
1376 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1377 }
1378 
__kvm_at_s12(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1379 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1380 {
1381 	struct kvm_s2_trans out = {};
1382 	u64 ipa, par;
1383 	bool write;
1384 	int ret;
1385 
1386 	/* Do the stage-1 translation */
1387 	switch (op) {
1388 	case OP_AT_S12E1R:
1389 		op = OP_AT_S1E1R;
1390 		write = false;
1391 		break;
1392 	case OP_AT_S12E1W:
1393 		op = OP_AT_S1E1W;
1394 		write = true;
1395 		break;
1396 	case OP_AT_S12E0R:
1397 		op = OP_AT_S1E0R;
1398 		write = false;
1399 		break;
1400 	case OP_AT_S12E0W:
1401 		op = OP_AT_S1E0W;
1402 		write = true;
1403 		break;
1404 	default:
1405 		WARN_ON_ONCE(1);
1406 		return;
1407 	}
1408 
1409 	__kvm_at_s1e01(vcpu, op, vaddr);
1410 	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1411 	if (par & SYS_PAR_EL1_F)
1412 		return;
1413 
1414 	/*
1415 	 * If we only have a single stage of translation (E2H=0 or
1416 	 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1417 	 */
1418 	if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
1419 	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1420 		return;
1421 
1422 	/* Do the stage-2 translation */
1423 	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1424 	out.esr = 0;
1425 	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1426 	if (ret < 0)
1427 		return;
1428 
1429 	/* Check the access permission */
1430 	if (!out.esr &&
1431 	    ((!write && !out.readable) || (write && !out.writable)))
1432 		out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1433 
1434 	par = compute_par_s12(vcpu, par, &out);
1435 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1436 }
1437 
1438 /*
1439  * Translate a VA for a given EL in a given translation regime, with
1440  * or without PAN. This requires wi->{regime, as_el0, pan} to be
1441  * set. The rest of the wi and wr should be 0-initialised.
1442  */
__kvm_translate_va(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)1443 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1444 		       struct s1_walk_result *wr, u64 va)
1445 {
1446 	int ret;
1447 
1448 	ret = setup_s1_walk(vcpu, wi, wr, va);
1449 	if (ret)
1450 		return ret;
1451 
1452 	if (wr->level == S1_MMU_DISABLED) {
1453 		wr->ur = wr->uw = wr->ux = true;
1454 		wr->pr = wr->pw = wr->px = true;
1455 	} else {
1456 		ret = walk_s1(vcpu, wi, wr, va);
1457 		if (ret)
1458 			return ret;
1459 
1460 		compute_s1_permissions(vcpu, wi, wr);
1461 	}
1462 
1463 	return 0;
1464 }
1465