xref: /linux/arch/arm64/kvm/at.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 - Linaro Ltd
4  * Author: Jintack Lim <jintack.lim@linaro.org>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 #include <asm/lsui.h>
13 
14 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
15 {
16 	wr->fst		= fst;
17 	wr->ptw		= s1ptw;
18 	wr->s2		= s1ptw;
19 	wr->failed	= true;
20 }
21 
22 #define S1_MMU_DISABLED		(-127)
23 
24 static int get_ia_size(struct s1_walk_info *wi)
25 {
26 	return 64 - wi->txsz;
27 }
28 
29 /* Return true if the IPA is out of the OA range */
30 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
31 {
32 	if (wi->pa52bit)
33 		return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
34 	return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
35 }
36 
37 static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
38 {
39 	switch (BIT(wi->pgshift)) {
40 	case SZ_64K:
41 	default:		/* IMPDEF: treat any other value as 64k */
42 		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
43 			return false;
44 		return ((wi->regime == TR_EL2 ?
45 			 FIELD_GET(TCR_EL2_PS_MASK, tcr) :
46 			 FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
47 	case SZ_16K:
48 		if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
49 			return false;
50 		break;
51 	case SZ_4K:
52 		if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
53 			return false;
54 		break;
55 	}
56 
57 	return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
58 }
59 
60 static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
61 {
62 	u64 addr;
63 
64 	if (!wi->pa52bit)
65 		return desc & GENMASK_ULL(47, wi->pgshift);
66 
67 	switch (BIT(wi->pgshift)) {
68 	case SZ_4K:
69 	case SZ_16K:
70 		addr = desc & GENMASK_ULL(49, wi->pgshift);
71 		addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
72 		break;
73 	case SZ_64K:
74 	default:	    /* IMPDEF: treat any other value as 64k */
75 		addr = desc & GENMASK_ULL(47, wi->pgshift);
76 		addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
77 		break;
78 	}
79 
80 	return addr;
81 }
82 
83 /* Return the translation regime that applies to an AT instruction */
84 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
85 {
86 	/*
87 	 * We only get here from guest EL2, so the translation
88 	 * regime AT applies to is solely defined by {E2H,TGE}.
89 	 */
90 	switch (op) {
91 	case OP_AT_S1E2R:
92 	case OP_AT_S1E2W:
93 	case OP_AT_S1E2A:
94 		return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
95 	default:
96 		return (vcpu_el2_e2h_is_set(vcpu) &&
97 			vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
98 	}
99 }
100 
101 static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
102 {
103 	if (regime == TR_EL10) {
104 		if (vcpu_has_nv(vcpu) &&
105 		    !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
106 			return 0;
107 
108 		return vcpu_read_sys_reg(vcpu, TCR2_EL1);
109 	}
110 
111 	return vcpu_read_sys_reg(vcpu, TCR2_EL2);
112 }
113 
114 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
115 {
116 	if (!kvm_has_s1pie(vcpu->kvm))
117 		return false;
118 
119 	/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
120 	return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
121 }
122 
123 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
124 {
125 	u64 val;
126 
127 	if (!kvm_has_s1poe(vcpu->kvm)) {
128 		wi->poe = wi->e0poe = false;
129 		return;
130 	}
131 
132 	val = effective_tcr2(vcpu, wi->regime);
133 
134 	/* Abuse TCR2_EL1_* for EL2 */
135 	wi->poe = val & TCR2_EL1_POE;
136 	wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
137 }
138 
139 #define _has_tgran(__r, __sz)					\
140 	({							\
141 		u64 _s1, _mmfr0 = __r;				\
142 								\
143 		_s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1,		\
144 				    TGRAN##__sz, _mmfr0);	\
145 								\
146 		_s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI;	\
147 	})
148 
149 static bool has_tgran(u64 mmfr0, unsigned int shift)
150 {
151 	switch (shift) {
152 	case 12:
153 		return _has_tgran(mmfr0, 4);
154 	case 14:
155 		return _has_tgran(mmfr0, 16);
156 	case 16:
157 		return _has_tgran(mmfr0, 64);
158 	default:
159 		BUG();
160 	}
161 }
162 
163 static unsigned int tcr_to_tg0_pgshift(u64 tcr)
164 {
165 	u64 tg0 = tcr & TCR_TG0_MASK;
166 
167 	switch (tg0) {
168 	case TCR_TG0_4K:
169 		return 12;
170 	case TCR_TG0_16K:
171 		return 14;
172 	case TCR_TG0_64K:
173 	default:	/* IMPDEF: treat any other value as 64k */
174 		return 16;
175 	}
176 }
177 
178 static unsigned int tcr_to_tg1_pgshift(u64 tcr)
179 {
180 	u64 tg1 = tcr & TCR_TG1_MASK;
181 
182 	switch (tg1) {
183 	case TCR_TG1_4K:
184 		return 12;
185 	case TCR_TG1_16K:
186 		return 14;
187 	case TCR_TG1_64K:
188 	default:	/* IMPDEF: treat any other value as 64k */
189 		return 16;
190 	}
191 }
192 
193 static unsigned int fallback_tgran_shift(u64 mmfr0)
194 {
195 	if (has_tgran(mmfr0, PAGE_SHIFT))
196 		return PAGE_SHIFT;
197 	else if (has_tgran(mmfr0, 12))
198 		return 12;
199 	else if (has_tgran(mmfr0, 14))
200 		return 14;
201 	else if (has_tgran(mmfr0, 16))
202 		return 16;
203 	else			/* Should be unreacheable */
204 		return PAGE_SHIFT;
205 }
206 
207 static unsigned int tcr_tg_pgshift(struct kvm *kvm, u64 tcr, bool upper_range)
208 {
209 	u64 mmfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
210 	unsigned int shift;
211 
212 	/* Someone was silly enough to encode TG0/TG1 differently */
213 	if (upper_range)
214 		shift = tcr_to_tg1_pgshift(tcr);
215 	else
216 		shift = tcr_to_tg0_pgshift(tcr);
217 
218 	/*
219 	 * If TGx is programmed to an unimplemented value (not advertised in
220 	 * ID_AA64MMFR0_EL1), we should treat it as if an implemented value is
221 	 * written, as per the architecture. Choose an available one while
222 	 * prioritizing PAGE_SIZE.
223 	 */
224 	if (!has_tgran(mmfr0, shift))
225 		return fallback_tgran_shift(mmfr0);
226 
227 	return shift;
228 }
229 
230 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
231 			 struct s1_walk_result *wr, u64 va)
232 {
233 	u64 hcr, sctlr, tcr, ps, ia_bits, ttbr;
234 	unsigned int stride, x;
235 	bool va55, tbi, lva, upper_range;
236 
237 	va55 = va & BIT(55);
238 	upper_range = va55 && wi->regime != TR_EL2;
239 
240 	if (vcpu_has_nv(vcpu)) {
241 		hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
242 		wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
243 	} else {
244 		WARN_ON_ONCE(wi->regime != TR_EL10);
245 		wi->s2 = false;
246 		hcr = 0;
247 	}
248 
249 	switch (wi->regime) {
250 	case TR_EL10:
251 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL1);
252 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL1);
253 		ttbr	= (va55 ?
254 			   vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
255 			   vcpu_read_sys_reg(vcpu, TTBR0_EL1));
256 		break;
257 	case TR_EL2:
258 	case TR_EL20:
259 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL2);
260 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL2);
261 		ttbr	= (va55 ?
262 			   vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
263 			   vcpu_read_sys_reg(vcpu, TTBR0_EL2));
264 		break;
265 	default:
266 		BUG();
267 	}
268 
269 	if (upper_range)
270 		wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
271 	else
272 		wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
273 
274 	wi->pgshift = tcr_tg_pgshift(vcpu->kvm, tcr, upper_range);
275 	wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
276 
277 	ia_bits = get_ia_size(wi);
278 
279 	/* AArch64.S1StartLevel() */
280 	stride = wi->pgshift - 3;
281 	wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
282 
283 	if (wi->regime == TR_EL2 && va55)
284 		goto addrsz;
285 
286 	tbi = (wi->regime == TR_EL2 ?
287 	       FIELD_GET(TCR_EL2_TBI, tcr) :
288 	       (va55 ?
289 		FIELD_GET(TCR_TBI1, tcr) :
290 		FIELD_GET(TCR_TBI0, tcr)));
291 
292 	if (!tbi && (u64)sign_extend64(va, 55) != va)
293 		goto addrsz;
294 
295 	wi->sh = (wi->regime == TR_EL2 ?
296 		  FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
297 		  (va55 ?
298 		   FIELD_GET(TCR_SH1_MASK, tcr) :
299 		   FIELD_GET(TCR_SH0_MASK, tcr)));
300 
301 	va = (u64)sign_extend64(va, 55);
302 
303 	/* Let's put the MMU disabled case aside immediately */
304 	switch (wi->regime) {
305 	case TR_EL10:
306 		/*
307 		 * If dealing with the EL1&0 translation regime, 3 things
308 		 * can disable the S1 translation:
309 		 *
310 		 * - HCR_EL2.DC = 1
311 		 * - HCR_EL2.{E2H,TGE} = {0,1}
312 		 * - SCTLR_EL1.M = 0
313 		 *
314 		 * The TGE part is interesting. If we have decided that this
315 		 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
316 		 * {0,x}, and we only need to test for TGE == 1.
317 		 */
318 		if (hcr & (HCR_DC | HCR_TGE)) {
319 			wr->level = S1_MMU_DISABLED;
320 			break;
321 		}
322 		fallthrough;
323 	case TR_EL2:
324 	case TR_EL20:
325 		if (!(sctlr & SCTLR_ELx_M))
326 			wr->level = S1_MMU_DISABLED;
327 		break;
328 	}
329 
330 	if (wr->level == S1_MMU_DISABLED) {
331 		if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
332 			goto addrsz;
333 
334 		wr->pa = va;
335 		return 0;
336 	}
337 
338 	wi->be = sctlr & SCTLR_ELx_EE;
339 
340 	wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
341 	wi->hpd &= (wi->regime == TR_EL2 ?
342 		    FIELD_GET(TCR_EL2_HPD, tcr) :
343 		    (va55 ?
344 		     FIELD_GET(TCR_HPD1, tcr) :
345 		     FIELD_GET(TCR_HPD0, tcr)));
346 	/* R_JHSVW */
347 	wi->hpd |= s1pie_enabled(vcpu, wi->regime);
348 
349 	/* Do we have POE? */
350 	compute_s1poe(vcpu, wi);
351 
352 	/* R_BVXDG */
353 	wi->hpd |= (wi->poe || wi->e0poe);
354 
355 	/* R_PLCGL, R_YXNYW */
356 	if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
357 		if (wi->txsz > 39)
358 			goto transfault;
359 	} else {
360 		if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
361 			goto transfault;
362 	}
363 
364 	/* R_GTJBY, R_SXWGM */
365 	switch (BIT(wi->pgshift)) {
366 	case SZ_4K:
367 	case SZ_16K:
368 		lva = wi->pa52bit;
369 		break;
370 	case SZ_64K:
371 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
372 		break;
373 	}
374 
375 	if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
376 		goto transfault;
377 
378 	/* R_YYVYV, I_THCZK */
379 	if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
380 	    (va55 && va < GENMASK(63, ia_bits)))
381 		goto transfault;
382 
383 	/* I_ZFSYQ */
384 	if (wi->regime != TR_EL2 &&
385 	    (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
386 		goto transfault;
387 
388 	/* R_BNDVG and following statements */
389 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
390 	    wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
391 		goto transfault;
392 
393 	ps = (wi->regime == TR_EL2 ?
394 	      FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
395 
396 	wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
397 
398 	/* Compute minimal alignment */
399 	x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
400 
401 	wi->baddr = ttbr & TTBRx_EL1_BADDR;
402 	if (wi->pa52bit) {
403 		/*
404 		 * Force the alignment on 64 bytes for top-level tables
405 		 * smaller than 8 entries, since TTBR.BADDR[5:2] are used to
406 		 * store bits [51:48] of the first level of lookup.
407 		 */
408 		x = max(x, 6);
409 
410 		wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
411 	}
412 
413 	/* R_VPBBF */
414 	if (check_output_size(wi->baddr, wi))
415 		goto addrsz;
416 
417 	wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
418 
419 	wi->ha  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
420 	wi->ha &= (wi->regime == TR_EL2 ?
421 		  FIELD_GET(TCR_EL2_HA, tcr) :
422 		  FIELD_GET(TCR_HA, tcr));
423 
424 	return 0;
425 
426 addrsz:
427 	/*
428 	 * Address Size Fault level 0 to indicate it comes from TTBR.
429 	 * yes, this is an oddity.
430 	 */
431 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
432 	return -EFAULT;
433 
434 transfault:
435 	/* Translation Fault on start level */
436 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
437 	return -EFAULT;
438 }
439 
440 static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
441 			    struct s1_walk_info *wi)
442 {
443 	u64 val;
444 	int r;
445 
446 	r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
447 	if (r)
448 		return r;
449 
450 	if (wi->be)
451 		*desc = be64_to_cpu((__force __be64)val);
452 	else
453 		*desc = le64_to_cpu((__force __le64)val);
454 
455 	return 0;
456 }
457 
458 static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
459 			    struct s1_walk_info *wi)
460 {
461 	if (wi->be) {
462 		old = (__force u64)cpu_to_be64(old);
463 		new = (__force u64)cpu_to_be64(new);
464 	} else {
465 		old = (__force u64)cpu_to_le64(old);
466 		new = (__force u64)cpu_to_le64(new);
467 	}
468 
469 	return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
470 }
471 
472 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
473 		   struct s1_walk_result *wr, u64 va)
474 {
475 	u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
476 	struct kvm_s2_trans s2_trans = {};
477 	int level, stride, ret;
478 
479 	level = wi->sl;
480 	stride = wi->pgshift - 3;
481 	baddr = wi->baddr;
482 
483 	va_top = get_ia_size(wi) - 1;
484 
485 	while (1) {
486 		u64 index;
487 
488 		va_bottom = (3 - level) * stride + wi->pgshift;
489 		index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
490 
491 		ipa = baddr | index;
492 
493 		if (wi->s2) {
494 			ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
495 			if (ret == -EAGAIN)
496 				return ret;
497 
498 			if (ret) {
499 				fail_s1_walk(wr,
500 					     (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
501 					     true);
502 				return ret;
503 			}
504 
505 			if (!kvm_s2_trans_readable(&s2_trans)) {
506 				fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
507 					     true);
508 
509 				return -EPERM;
510 			}
511 
512 			ipa = kvm_s2_trans_output(&s2_trans);
513 		}
514 
515 		if (wi->filter) {
516 			ret = wi->filter->fn(&(struct s1_walk_context)
517 					     {
518 						     .wi	= wi,
519 						     .table_ipa	= baddr,
520 						     .level	= level,
521 					     }, wi->filter->priv);
522 			if (ret)
523 				return ret;
524 		}
525 
526 		ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
527 		if (ret) {
528 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
529 			return ret;
530 		}
531 
532 		new_desc = desc;
533 
534 		/* Invalid descriptor */
535 		if (!(desc & BIT(0)))
536 			goto transfault;
537 
538 		/* Block mapping, check validity down the line */
539 		if (!(desc & BIT(1)))
540 			break;
541 
542 		/* Page mapping */
543 		if (level == 3)
544 			break;
545 
546 		/* Table handling */
547 		if (!wi->hpd) {
548 			wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
549 			wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
550 			wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
551 		}
552 
553 		baddr = desc_to_oa(wi, desc);
554 
555 		/* Check for out-of-range OA */
556 		if (check_output_size(baddr, wi))
557 			goto addrsz;
558 
559 		/* Prepare for next round */
560 		va_top = va_bottom - 1;
561 		level++;
562 	}
563 
564 	/* Block mapping, check the validity of the level */
565 	if (!(desc & BIT(1))) {
566 		bool valid_block = false;
567 		bool lpa = kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52);
568 
569 		switch (BIT(wi->pgshift)) {
570 		case SZ_4K:
571 			valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
572 			break;
573 		case SZ_16K:
574 			valid_block = level == 2 || (wi->pa52bit && level == 1);
575 			break;
576 		case SZ_64K:
577 			valid_block = level == 2 || (lpa && level == 1);
578 			break;
579 		}
580 
581 		if (!valid_block)
582 			goto transfault;
583 	}
584 
585 	baddr = desc_to_oa(wi, desc);
586 	if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
587 		goto addrsz;
588 
589 	if (wi->ha)
590 		new_desc |= PTE_AF;
591 
592 	if (new_desc != desc) {
593 		if (wi->s2 && !kvm_s2_trans_writable(&s2_trans)) {
594 			fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), true);
595 			return -EPERM;
596 		}
597 
598 		ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
599 		if (ret == -EAGAIN)
600 			return ret;
601 		if (ret) {
602 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
603 			return ret;
604 		}
605 
606 		desc = new_desc;
607 	}
608 
609 	if (!(desc & PTE_AF)) {
610 		fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
611 		return -EACCES;
612 	}
613 
614 	va_bottom += contiguous_bit_shift(desc, wi, level);
615 
616 	wr->failed = false;
617 	wr->level = level;
618 	wr->desc = desc;
619 	wr->pa = baddr & GENMASK(52, va_bottom);
620 	wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
621 
622 	wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
623 	if (wr->nG)
624 		wr->asid = get_asid_by_regime(vcpu, wi->regime);
625 
626 	return 0;
627 
628 addrsz:
629 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
630 	return -EINVAL;
631 transfault:
632 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
633 	return -ENOENT;
634 }
635 
636 struct mmu_config {
637 	u64	ttbr0;
638 	u64	ttbr1;
639 	u64	tcr;
640 	u64	mair;
641 	u64	tcr2;
642 	u64	pir;
643 	u64	pire0;
644 	u64	por_el0;
645 	u64	por_el1;
646 	u64	sctlr;
647 	u64	vttbr;
648 	u64	vtcr;
649 };
650 
651 static void __mmu_config_save(struct mmu_config *config)
652 {
653 	config->ttbr0	= read_sysreg_el1(SYS_TTBR0);
654 	config->ttbr1	= read_sysreg_el1(SYS_TTBR1);
655 	config->tcr	= read_sysreg_el1(SYS_TCR);
656 	config->mair	= read_sysreg_el1(SYS_MAIR);
657 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
658 		config->tcr2	= read_sysreg_el1(SYS_TCR2);
659 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
660 			config->pir	= read_sysreg_el1(SYS_PIR);
661 			config->pire0	= read_sysreg_el1(SYS_PIRE0);
662 		}
663 		if (system_supports_poe()) {
664 			config->por_el1	= read_sysreg_el1(SYS_POR);
665 			config->por_el0	= read_sysreg_s(SYS_POR_EL0);
666 		}
667 	}
668 	config->sctlr	= read_sysreg_el1(SYS_SCTLR);
669 	config->vttbr	= read_sysreg(vttbr_el2);
670 	config->vtcr	= read_sysreg(vtcr_el2);
671 }
672 
673 static void __mmu_config_restore(struct mmu_config *config)
674 {
675 	/*
676 	 * ARM errata 1165522 and 1530923 require TGE to be 1 before
677 	 * we update the guest state.
678 	 */
679 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
680 
681 	write_sysreg_el1(config->ttbr0,	SYS_TTBR0);
682 	write_sysreg_el1(config->ttbr1,	SYS_TTBR1);
683 	write_sysreg_el1(config->tcr,	SYS_TCR);
684 	write_sysreg_el1(config->mair,	SYS_MAIR);
685 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
686 		write_sysreg_el1(config->tcr2, SYS_TCR2);
687 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
688 			write_sysreg_el1(config->pir, SYS_PIR);
689 			write_sysreg_el1(config->pire0, SYS_PIRE0);
690 		}
691 		if (system_supports_poe()) {
692 			write_sysreg_el1(config->por_el1, SYS_POR);
693 			write_sysreg_s(config->por_el0, SYS_POR_EL0);
694 		}
695 	}
696 	write_sysreg_el1(config->sctlr,	SYS_SCTLR);
697 	write_sysreg(config->vttbr,	vttbr_el2);
698 	write_sysreg(config->vtcr,	vtcr_el2);
699 }
700 
701 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
702 {
703 	u64 host_pan;
704 	bool fail;
705 
706 	host_pan = read_sysreg_s(SYS_PSTATE_PAN);
707 	write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
708 
709 	switch (op) {
710 	case OP_AT_S1E1RP:
711 		fail = __kvm_at(OP_AT_S1E1RP, vaddr);
712 		break;
713 	case OP_AT_S1E1WP:
714 		fail = __kvm_at(OP_AT_S1E1WP, vaddr);
715 		break;
716 	}
717 
718 	write_sysreg_s(host_pan, SYS_PSTATE_PAN);
719 
720 	return fail;
721 }
722 
723 #define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
724 #define MEMATTR_NC		0b0100
725 #define MEMATTR_Wt		0b1000
726 #define MEMATTR_Wb		0b1100
727 #define MEMATTR_WbRaWa		0b1111
728 
729 #define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
730 
731 static u8 s2_memattr_to_attr(u8 memattr)
732 {
733 	memattr &= 0b1111;
734 
735 	switch (memattr) {
736 	case 0b0000:
737 	case 0b0001:
738 	case 0b0010:
739 	case 0b0011:
740 		return memattr << 2;
741 	case 0b0100:
742 		return MEMATTR(Wb, Wb);
743 	case 0b0101:
744 		return MEMATTR(NC, NC);
745 	case 0b0110:
746 		return MEMATTR(Wt, NC);
747 	case 0b0111:
748 		return MEMATTR(Wb, NC);
749 	case 0b1000:
750 		/* Reserved, assume NC */
751 		return MEMATTR(NC, NC);
752 	case 0b1001:
753 		return MEMATTR(NC, Wt);
754 	case 0b1010:
755 		return MEMATTR(Wt, Wt);
756 	case 0b1011:
757 		return MEMATTR(Wb, Wt);
758 	case 0b1100:
759 		/* Reserved, assume NC */
760 		return MEMATTR(NC, NC);
761 	case 0b1101:
762 		return MEMATTR(NC, Wb);
763 	case 0b1110:
764 		return MEMATTR(Wt, Wb);
765 	case 0b1111:
766 		return MEMATTR(Wb, Wb);
767 	default:
768 		unreachable();
769 	}
770 }
771 
772 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
773 {
774 	bool transient;
775 	u8 final = 0;
776 
777 	/* Upgrade transient s1 to non-transient to simplify things */
778 	switch (s1) {
779 	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
780 		transient = true;
781 		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
782 		break;
783 	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
784 		transient = true;
785 		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
786 		break;
787 	default:
788 		transient = false;
789 	}
790 
791 	/* S2CombineS1AttrHints() */
792 	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
793 	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
794 		final = MEMATTR_NC;
795 	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
796 		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
797 		final = MEMATTR_Wt;
798 	else
799 		final = MEMATTR_Wb;
800 
801 	if (final != MEMATTR_NC) {
802 		/* Inherit RaWa hints form S1 */
803 		if (transient) {
804 			switch (s1 & GENMASK(3, 2)) {
805 			case MEMATTR_Wt:
806 				final = 0;
807 				break;
808 			case MEMATTR_Wb:
809 				final = MEMATTR_NC;
810 				break;
811 			}
812 		}
813 
814 		final |= s1 & GENMASK(1, 0);
815 	}
816 
817 	return final;
818 }
819 
820 #define ATTR_NSH	0b00
821 #define ATTR_RSV	0b01
822 #define ATTR_OSH	0b10
823 #define ATTR_ISH	0b11
824 
825 static u8 compute_final_sh(u8 attr, u8 sh)
826 {
827 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
828 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
829 		return ATTR_OSH;
830 
831 	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
832 		sh = ATTR_NSH;
833 
834 	return sh;
835 }
836 
837 static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
838 			u8 attr)
839 {
840 	u8 sh;
841 
842 	/*
843 	 * non-52bit and LPA have their basic shareability described in the
844 	 * descriptor. LPA2 gets it from the corresponding field in TCR,
845 	 * conveniently recorded in the walk info.
846 	 */
847 	if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
848 		sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
849 	else
850 		sh = wi->sh;
851 
852 	return compute_final_sh(attr, sh);
853 }
854 
855 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
856 {
857 	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
858 		return ATTR_OSH;
859 	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
860 		return ATTR_ISH;
861 
862 	return ATTR_NSH;
863 }
864 
865 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
866 			   struct kvm_s2_trans *tr)
867 {
868 	u8 s1_parattr, s2_memattr, final_attr, s2_sh;
869 	u64 par;
870 
871 	/* If S2 has failed to translate, report the damage */
872 	if (tr->esr) {
873 		par = SYS_PAR_EL1_RES1;
874 		par |= SYS_PAR_EL1_F;
875 		par |= SYS_PAR_EL1_S;
876 		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
877 		return par;
878 	}
879 
880 	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
881 	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
882 
883 	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
884 		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
885 			s2_memattr &= ~BIT(3);
886 
887 		/* Combination of R_VRJSW and R_RHWZM */
888 		switch (s2_memattr) {
889 		case 0b0101:
890 			if (MEMATTR_IS_DEVICE(s1_parattr))
891 				final_attr = s1_parattr;
892 			else
893 				final_attr = MEMATTR(NC, NC);
894 			break;
895 		case 0b0110:
896 		case 0b1110:
897 			final_attr = MEMATTR(WbRaWa, WbRaWa);
898 			break;
899 		case 0b0111:
900 		case 0b1111:
901 			/* Preserve S1 attribute */
902 			final_attr = s1_parattr;
903 			break;
904 		case 0b0100:
905 		case 0b1100:
906 		case 0b1101:
907 			/* Reserved, do something non-silly */
908 			final_attr = s1_parattr;
909 			break;
910 		default:
911 			/*
912 			 * MemAttr[2]=0, Device from S2.
913 			 *
914 			 * FWB does not influence the way that stage 1
915 			 * memory types and attributes are combined
916 			 * with stage 2 Device type and attributes.
917 			 */
918 			final_attr = min(s2_memattr_to_attr(s2_memattr),
919 					 s1_parattr);
920 		}
921 	} else {
922 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
923 		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
924 
925 		if (MEMATTR_IS_DEVICE(s1_parattr) ||
926 		    MEMATTR_IS_DEVICE(s2_parattr)) {
927 			final_attr = min(s1_parattr, s2_parattr);
928 		} else {
929 			/* At this stage, this is memory vs memory */
930 			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
931 							 s2_parattr & 0xf);
932 			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
933 							 s2_parattr >> 4) << 4;
934 		}
935 	}
936 
937 	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
938 	    !MEMATTR_IS_DEVICE(final_attr))
939 		final_attr = MEMATTR(NC, NC);
940 
941 	s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
942 
943 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
944 	par |= tr->output & GENMASK(47, 12);
945 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
946 			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
947 				     compute_final_sh(final_attr, s2_sh)));
948 
949 	return par;
950 }
951 
952 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
953 			  struct s1_walk_result *wr)
954 {
955 	u64 par;
956 
957 	if (wr->failed) {
958 		par = SYS_PAR_EL1_RES1;
959 		par |= SYS_PAR_EL1_F;
960 		par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
961 		par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
962 		par |= wr->s2 ? SYS_PAR_EL1_S : 0;
963 	} else if (wr->level == S1_MMU_DISABLED) {
964 		/* MMU off or HCR_EL2.DC == 1 */
965 		par  = SYS_PAR_EL1_NSE;
966 		par |= wr->pa & SYS_PAR_EL1_PA;
967 
968 		if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
969 		    (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
970 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
971 					  MEMATTR(WbRaWa, WbRaWa));
972 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
973 		} else {
974 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
975 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
976 		}
977 	} else {
978 		u64 mair, sctlr;
979 		u8 sh;
980 
981 		par  = SYS_PAR_EL1_NSE;
982 
983 		mair = (wi->regime == TR_EL10 ?
984 			vcpu_read_sys_reg(vcpu, MAIR_EL1) :
985 			vcpu_read_sys_reg(vcpu, MAIR_EL2));
986 
987 		mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
988 		mair &= 0xff;
989 
990 		sctlr = (wi->regime == TR_EL10 ?
991 			 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
992 			 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
993 
994 		/* Force NC for memory if SCTLR_ELx.C is clear */
995 		if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
996 			mair = MEMATTR(NC, NC);
997 
998 		par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
999 		par |= wr->pa & SYS_PAR_EL1_PA;
1000 
1001 		sh = compute_s1_sh(wi, wr, mair);
1002 		par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
1003 	}
1004 
1005 	return par;
1006 }
1007 
1008 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
1009 {
1010 	u64 sctlr;
1011 
1012 	if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
1013 		return false;
1014 
1015 	if (s1pie_enabled(vcpu, regime))
1016 		return true;
1017 
1018 	if (regime == TR_EL10)
1019 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
1020 	else
1021 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
1022 
1023 	return sctlr & SCTLR_EL1_EPAN;
1024 }
1025 
1026 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
1027 					  struct s1_walk_info *wi,
1028 					  struct s1_walk_result *wr)
1029 {
1030 	bool wxn;
1031 
1032 	/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
1033 	if (wi->regime != TR_EL2) {
1034 		switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
1035 		case 0b00:
1036 			wr->pr = wr->pw = true;
1037 			wr->ur = wr->uw = false;
1038 			break;
1039 		case 0b01:
1040 			wr->pr = wr->pw = wr->ur = wr->uw = true;
1041 			break;
1042 		case 0b10:
1043 			wr->pr = true;
1044 			wr->pw = wr->ur = wr->uw = false;
1045 			break;
1046 		case 0b11:
1047 			wr->pr = wr->ur = true;
1048 			wr->pw = wr->uw = false;
1049 			break;
1050 		}
1051 
1052 		/* We don't use px for anything yet, but hey... */
1053 		wr->px = !((wr->desc & PTE_PXN) || wr->uw);
1054 		wr->ux = !(wr->desc & PTE_UXN);
1055 	} else {
1056 		wr->ur = wr->uw = wr->ux = false;
1057 
1058 		if (!(wr->desc & PTE_RDONLY)) {
1059 			wr->pr = wr->pw = true;
1060 		} else {
1061 			wr->pr = true;
1062 			wr->pw = false;
1063 		}
1064 
1065 		/* XN maps to UXN */
1066 		wr->px = !(wr->desc & PTE_UXN);
1067 	}
1068 
1069 	switch (wi->regime) {
1070 	case TR_EL2:
1071 	case TR_EL20:
1072 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
1073 		break;
1074 	case TR_EL10:
1075 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
1076 		break;
1077 	}
1078 
1079 	wr->pwxn = wr->uwxn = wxn;
1080 	wr->pov = wi->poe;
1081 	wr->uov = wi->e0poe;
1082 }
1083 
1084 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
1085 						struct s1_walk_info *wi,
1086 						struct s1_walk_result *wr)
1087 {
1088 	/* Hierarchical part of AArch64.S1DirectBasePermissions() */
1089 	if (wi->regime != TR_EL2) {
1090 		switch (wr->APTable) {
1091 		case 0b00:
1092 			break;
1093 		case 0b01:
1094 			wr->ur = wr->uw = false;
1095 			break;
1096 		case 0b10:
1097 			wr->pw = wr->uw = false;
1098 			break;
1099 		case 0b11:
1100 			wr->pw = wr->ur = wr->uw = false;
1101 			break;
1102 		}
1103 
1104 		wr->px &= !wr->PXNTable;
1105 		wr->ux &= !wr->UXNTable;
1106 	} else {
1107 		if (wr->APTable & BIT(1))
1108 			wr->pw = false;
1109 
1110 		/* XN maps to UXN */
1111 		wr->px &= !wr->UXNTable;
1112 	}
1113 }
1114 
1115 #define perm_idx(v, r, i)	((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
1116 
1117 #define set_priv_perms(wr, r, w, x)	\
1118 	do {				\
1119 		(wr)->pr = (r);		\
1120 		(wr)->pw = (w);		\
1121 		(wr)->px = (x);		\
1122 	} while (0)
1123 
1124 #define set_unpriv_perms(wr, r, w, x)	\
1125 	do {				\
1126 		(wr)->ur = (r);		\
1127 		(wr)->uw = (w);		\
1128 		(wr)->ux = (x);		\
1129 	} while (0)
1130 
1131 #define set_priv_wxn(wr, v)		\
1132 	do {				\
1133 		(wr)->pwxn = (v);	\
1134 	} while (0)
1135 
1136 #define set_unpriv_wxn(wr, v)		\
1137 	do {				\
1138 		(wr)->uwxn = (v);	\
1139 	} while (0)
1140 
1141 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS  */
1142 #define set_perms(w, wr, ip)						\
1143 	do {								\
1144 		/* R_LLZDZ */						\
1145 		switch ((ip)) {						\
1146 		case 0b0000:						\
1147 			set_ ## w ## _perms((wr), false, false, false);	\
1148 			break;						\
1149 		case 0b0001:						\
1150 			set_ ## w ## _perms((wr), true , false, false);	\
1151 			break;						\
1152 		case 0b0010:						\
1153 			set_ ## w ## _perms((wr), false, false, true );	\
1154 			break;						\
1155 		case 0b0011:						\
1156 			set_ ## w ## _perms((wr), true , false, true );	\
1157 			break;						\
1158 		case 0b0100:						\
1159 			set_ ## w ## _perms((wr), false, false, false);	\
1160 			break;						\
1161 		case 0b0101:						\
1162 			set_ ## w ## _perms((wr), true , true , false);	\
1163 			break;						\
1164 		case 0b0110:						\
1165 			set_ ## w ## _perms((wr), true , true , true );	\
1166 			break;						\
1167 		case 0b0111:						\
1168 			set_ ## w ## _perms((wr), true , true , true );	\
1169 			break;						\
1170 		case 0b1000:						\
1171 			set_ ## w ## _perms((wr), true , false, false);	\
1172 			break;						\
1173 		case 0b1001:						\
1174 			set_ ## w ## _perms((wr), true , false, false);	\
1175 			break;						\
1176 		case 0b1010:						\
1177 			set_ ## w ## _perms((wr), true , false, true );	\
1178 			break;						\
1179 		case 0b1011:						\
1180 			set_ ## w ## _perms((wr), false, false, false);	\
1181 			break;						\
1182 		case 0b1100:						\
1183 			set_ ## w ## _perms((wr), true , true , false);	\
1184 			break;						\
1185 		case 0b1101:						\
1186 			set_ ## w ## _perms((wr), false, false, false);	\
1187 			break;						\
1188 		case 0b1110:						\
1189 			set_ ## w ## _perms((wr), true , true , true );	\
1190 			break;						\
1191 		case 0b1111:						\
1192 			set_ ## w ## _perms((wr), false, false, false);	\
1193 			break;						\
1194 		}							\
1195 									\
1196 		/* R_HJYGR */						\
1197 		set_ ## w ## _wxn((wr), ((ip) == 0b0110));		\
1198 									\
1199 	} while (0)
1200 
1201 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1202 					    struct s1_walk_info *wi,
1203 					    struct s1_walk_result *wr)
1204 {
1205 	u8 up, pp, idx;
1206 
1207 	idx = pte_pi_index(wr->desc);
1208 
1209 	switch (wi->regime) {
1210 	case TR_EL10:
1211 		pp = perm_idx(vcpu, PIR_EL1, idx);
1212 		up = perm_idx(vcpu, PIRE0_EL1, idx);
1213 		break;
1214 	case TR_EL20:
1215 		pp = perm_idx(vcpu, PIR_EL2, idx);
1216 		up = perm_idx(vcpu, PIRE0_EL2, idx);
1217 		break;
1218 	case TR_EL2:
1219 		pp = perm_idx(vcpu, PIR_EL2, idx);
1220 		up = 0;
1221 		break;
1222 	}
1223 
1224 	set_perms(priv, wr, pp);
1225 
1226 	if (wi->regime != TR_EL2)
1227 		set_perms(unpriv, wr, up);
1228 	else
1229 		set_unpriv_perms(wr, false, false, false);
1230 
1231 	wr->pov = wi->poe && !(pp & BIT(3));
1232 	wr->uov = wi->e0poe && !(up & BIT(3));
1233 
1234 	/* R_VFPJF */
1235 	if (wr->px && wr->uw) {
1236 		set_priv_perms(wr, false, false, false);
1237 		set_unpriv_perms(wr, false, false, false);
1238 	}
1239 }
1240 
1241 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1242 					   struct s1_walk_info *wi,
1243 					   struct s1_walk_result *wr)
1244 {
1245 	u8 idx, pov_perms, uov_perms;
1246 
1247 	idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1248 
1249 	if (wr->pov) {
1250 		switch (wi->regime) {
1251 		case TR_EL10:
1252 			pov_perms = perm_idx(vcpu, POR_EL1, idx);
1253 			break;
1254 		case TR_EL20:
1255 			pov_perms = perm_idx(vcpu, POR_EL2, idx);
1256 			break;
1257 		case TR_EL2:
1258 			pov_perms = perm_idx(vcpu, POR_EL2, idx);
1259 			break;
1260 		}
1261 
1262 		if (pov_perms & ~POE_RWX)
1263 			pov_perms = POE_NONE;
1264 
1265 		/* R_QXXPC, S1PrivOverflow enabled */
1266 		if (wr->pwxn && (pov_perms & POE_X))
1267 			pov_perms &= ~POE_W;
1268 
1269 		wr->pr &= pov_perms & POE_R;
1270 		wr->pw &= pov_perms & POE_W;
1271 		wr->px &= pov_perms & POE_X;
1272 	}
1273 
1274 	if (wr->uov) {
1275 		switch (wi->regime) {
1276 		case TR_EL10:
1277 			uov_perms = perm_idx(vcpu, POR_EL0, idx);
1278 			break;
1279 		case TR_EL20:
1280 			uov_perms = perm_idx(vcpu, POR_EL0, idx);
1281 			break;
1282 		case TR_EL2:
1283 			uov_perms = 0;
1284 			break;
1285 		}
1286 
1287 		if (uov_perms & ~POE_RWX)
1288 			uov_perms = POE_NONE;
1289 
1290 		/* R_NPBXC, S1UnprivOverlay enabled */
1291 		if (wr->uwxn && (uov_perms & POE_X))
1292 			uov_perms &= ~POE_W;
1293 
1294 		wr->ur &= uov_perms & POE_R;
1295 		wr->uw &= uov_perms & POE_W;
1296 		wr->ux &= uov_perms & POE_X;
1297 	}
1298 }
1299 
1300 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1301 				   struct s1_walk_info *wi,
1302 				   struct s1_walk_result *wr)
1303 {
1304 	bool pan;
1305 
1306 	if (!s1pie_enabled(vcpu, wi->regime))
1307 		compute_s1_direct_permissions(vcpu, wi, wr);
1308 	else
1309 		compute_s1_indirect_permissions(vcpu, wi, wr);
1310 
1311 	if (!wi->hpd)
1312 		compute_s1_hierarchical_permissions(vcpu, wi, wr);
1313 
1314 	compute_s1_overlay_permissions(vcpu, wi, wr);
1315 
1316 	/* R_QXXPC, S1PrivOverlay disabled */
1317 	if (!wr->pov)
1318 		wr->px &= !(wr->pwxn && wr->pw);
1319 
1320 	/* R_NPBXC, S1UnprivOverlay disabled */
1321 	if (!wr->uov)
1322 		wr->ux &= !(wr->uwxn && wr->uw);
1323 
1324 	pan = wi->pan && (wr->ur || wr->uw ||
1325 			  (pan3_enabled(vcpu, wi->regime) && wr->ux));
1326 	wr->pw &= !pan;
1327 	wr->pr &= !pan;
1328 }
1329 
1330 static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
1331 {
1332 	struct s1_walk_result wr = {};
1333 	struct s1_walk_info wi = {};
1334 	bool perm_fail = false;
1335 	int ret, idx;
1336 
1337 	wi.regime = compute_translation_regime(vcpu, op);
1338 	wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1339 	wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1340 		 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1341 
1342 	ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1343 	if (ret)
1344 		goto compute_par;
1345 
1346 	if (wr.level == S1_MMU_DISABLED)
1347 		goto compute_par;
1348 
1349 	idx = srcu_read_lock(&vcpu->kvm->srcu);
1350 
1351 	ret = walk_s1(vcpu, &wi, &wr, vaddr);
1352 
1353 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
1354 
1355 	/*
1356 	 * Race to update a descriptor -- restart the walk.
1357 	 */
1358 	if (ret == -EAGAIN)
1359 		return ret;
1360 	if (ret)
1361 		goto compute_par;
1362 
1363 	compute_s1_permissions(vcpu, &wi, &wr);
1364 
1365 	switch (op) {
1366 	case OP_AT_S1E1RP:
1367 	case OP_AT_S1E1R:
1368 	case OP_AT_S1E2R:
1369 		perm_fail = !wr.pr;
1370 		break;
1371 	case OP_AT_S1E1WP:
1372 	case OP_AT_S1E1W:
1373 	case OP_AT_S1E2W:
1374 		perm_fail = !wr.pw;
1375 		break;
1376 	case OP_AT_S1E0R:
1377 		perm_fail = !wr.ur;
1378 		break;
1379 	case OP_AT_S1E0W:
1380 		perm_fail = !wr.uw;
1381 		break;
1382 	case OP_AT_S1E1A:
1383 	case OP_AT_S1E2A:
1384 		break;
1385 	default:
1386 		BUG();
1387 	}
1388 
1389 	if (perm_fail)
1390 		fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1391 
1392 compute_par:
1393 	*par = compute_par_s1(vcpu, &wi, &wr);
1394 	return 0;
1395 }
1396 
1397 /*
1398  * Return the PAR_EL1 value as the result of a valid translation.
1399  *
1400  * If the translation is unsuccessful, the value may only contain
1401  * PAR_EL1.F, and cannot be taken at face value. It isn't an
1402  * indication of the translation having failed, only that the fast
1403  * path did not succeed, *unless* it indicates a S1 permission or
1404  * access fault.
1405  */
1406 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1407 {
1408 	struct mmu_config config;
1409 	struct kvm_s2_mmu *mmu;
1410 	bool fail, mmu_cs;
1411 	u64 par;
1412 
1413 	par = SYS_PAR_EL1_F;
1414 
1415 	/*
1416 	 * We've trapped, so everything is live on the CPU. As we will
1417 	 * be switching contexts behind everybody's back, disable
1418 	 * interrupts while holding the mmu lock.
1419 	 */
1420 	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1421 
1422 	/*
1423 	 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1424 	 * the right one (as we trapped from vEL2). If not, save the
1425 	 * full MMU context.
1426 	 *
1427 	 * We are also guaranteed to be in the correct context if
1428 	 * we're not in a nested VM.
1429 	 */
1430 	mmu_cs = (vcpu_has_nv(vcpu) &&
1431 		  !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
1432 	if (!mmu_cs)
1433 		goto skip_mmu_switch;
1434 
1435 	/*
1436 	 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1437 	 * find it (recycled by another vcpu, for example). When this
1438 	 * happens, admit defeat immediately and use the SW (slow) path.
1439 	 */
1440 	mmu = lookup_s2_mmu(vcpu);
1441 	if (!mmu)
1442 		return par;
1443 
1444 	__mmu_config_save(&config);
1445 
1446 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),	SYS_TTBR0);
1447 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),	SYS_TTBR1);
1448 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),	SYS_TCR);
1449 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),	SYS_MAIR);
1450 	if (kvm_has_tcr2(vcpu->kvm)) {
1451 		write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1452 		if (kvm_has_s1pie(vcpu->kvm)) {
1453 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1454 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1455 		}
1456 		if (kvm_has_s1poe(vcpu->kvm)) {
1457 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1458 			write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1459 		}
1460 	}
1461 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),	SYS_SCTLR);
1462 	__load_stage2(mmu);
1463 
1464 skip_mmu_switch:
1465 	/* Temporarily switch back to guest context */
1466 	write_sysreg_hcr(vcpu->arch.hcr_el2);
1467 	isb();
1468 
1469 	switch (op) {
1470 	case OP_AT_S1E1RP:
1471 	case OP_AT_S1E1WP:
1472 		fail = at_s1e1p_fast(vcpu, op, vaddr);
1473 		break;
1474 	case OP_AT_S1E1R:
1475 		fail = __kvm_at(OP_AT_S1E1R, vaddr);
1476 		break;
1477 	case OP_AT_S1E1W:
1478 		fail = __kvm_at(OP_AT_S1E1W, vaddr);
1479 		break;
1480 	case OP_AT_S1E0R:
1481 		fail = __kvm_at(OP_AT_S1E0R, vaddr);
1482 		break;
1483 	case OP_AT_S1E0W:
1484 		fail = __kvm_at(OP_AT_S1E0W, vaddr);
1485 		break;
1486 	case OP_AT_S1E1A:
1487 		fail = __kvm_at(OP_AT_S1E1A, vaddr);
1488 		break;
1489 	default:
1490 		WARN_ON_ONCE(1);
1491 		fail = true;
1492 		break;
1493 	}
1494 
1495 	if (!fail)
1496 		par = read_sysreg_par();
1497 
1498 	write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1499 
1500 	if (mmu_cs)
1501 		__mmu_config_restore(&config);
1502 
1503 	return par;
1504 }
1505 
1506 static bool par_check_s1_perm_fault(u64 par)
1507 {
1508 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1509 
1510 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1511 		 !(par & SYS_PAR_EL1_S));
1512 }
1513 
1514 static bool par_check_s1_access_fault(u64 par)
1515 {
1516 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1517 
1518 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1519 		 !(par & SYS_PAR_EL1_S));
1520 }
1521 
1522 int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1523 {
1524 	u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1525 	int ret;
1526 
1527 	/*
1528 	 * If PAR_EL1 reports that AT failed on a S1 permission or access
1529 	 * fault, we know for sure that the PTW was able to walk the S1
1530 	 * tables and there's nothing else to do.
1531 	 *
1532 	 * If AT failed for any other reason, then we must walk the guest S1
1533 	 * to emulate the instruction.
1534 	 */
1535 	if ((par & SYS_PAR_EL1_F) &&
1536 	    !par_check_s1_perm_fault(par) &&
1537 	    !par_check_s1_access_fault(par)) {
1538 		ret = handle_at_slow(vcpu, op, vaddr, &par);
1539 		if (ret)
1540 			return ret;
1541 	}
1542 
1543 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1544 	return 0;
1545 }
1546 
1547 int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1548 {
1549 	u64 par;
1550 	int ret;
1551 
1552 	/*
1553 	 * We've trapped, so everything is live on the CPU. As we will be
1554 	 * switching context behind everybody's back, disable interrupts...
1555 	 */
1556 	scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1557 		u64 val, hcr;
1558 		bool fail;
1559 
1560 		val = hcr = read_sysreg(hcr_el2);
1561 		val &= ~HCR_TGE;
1562 		val |= HCR_VM;
1563 
1564 		if (!vcpu_el2_e2h_is_set(vcpu))
1565 			val |= HCR_NV | HCR_NV1;
1566 
1567 		write_sysreg_hcr(val);
1568 		isb();
1569 
1570 		par = SYS_PAR_EL1_F;
1571 
1572 		switch (op) {
1573 		case OP_AT_S1E2R:
1574 			fail = __kvm_at(OP_AT_S1E1R, vaddr);
1575 			break;
1576 		case OP_AT_S1E2W:
1577 			fail = __kvm_at(OP_AT_S1E1W, vaddr);
1578 			break;
1579 		case OP_AT_S1E2A:
1580 			fail = __kvm_at(OP_AT_S1E1A, vaddr);
1581 			break;
1582 		default:
1583 			WARN_ON_ONCE(1);
1584 			fail = true;
1585 		}
1586 
1587 		if (!fail)
1588 			par = read_sysreg_par();
1589 
1590 		write_sysreg_hcr(hcr);
1591 		isb();
1592 	}
1593 
1594 	/* We failed the translation, let's replay it in slow motion */
1595 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
1596 		ret = handle_at_slow(vcpu, op, vaddr, &par);
1597 		if (ret)
1598 			return ret;
1599 	}
1600 
1601 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1602 	return 0;
1603 }
1604 
1605 int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1606 {
1607 	struct kvm_s2_trans out = {};
1608 	u64 ipa, par;
1609 	bool write;
1610 	int ret;
1611 
1612 	/* Do the stage-1 translation */
1613 	switch (op) {
1614 	case OP_AT_S12E1R:
1615 		op = OP_AT_S1E1R;
1616 		write = false;
1617 		break;
1618 	case OP_AT_S12E1W:
1619 		op = OP_AT_S1E1W;
1620 		write = true;
1621 		break;
1622 	case OP_AT_S12E0R:
1623 		op = OP_AT_S1E0R;
1624 		write = false;
1625 		break;
1626 	case OP_AT_S12E0W:
1627 		op = OP_AT_S1E0W;
1628 		write = true;
1629 		break;
1630 	default:
1631 		WARN_ON_ONCE(1);
1632 		return 0;
1633 	}
1634 
1635 	ret = __kvm_at_s1e01(vcpu, op, vaddr);
1636 	if (ret)
1637 		return ret;
1638 
1639 	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1640 	if (par & SYS_PAR_EL1_F)
1641 		return 0;
1642 
1643 	/*
1644 	 * If we only have a single stage of translation (EL2&0), exit
1645 	 * early. Same thing if {VM,DC}=={0,0}.
1646 	 */
1647 	if (compute_translation_regime(vcpu, op) == TR_EL20 ||
1648 	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1649 		return 0;
1650 
1651 	/* Do the stage-2 translation */
1652 	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1653 	out.esr = 0;
1654 	scoped_guard(srcu, &vcpu->kvm->srcu)
1655 		ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1656 	if (ret < 0)
1657 		return ret;
1658 
1659 	/* Check the access permission */
1660 	if (!out.esr &&
1661 	    ((!write && !out.readable) || (write && !out.writable)))
1662 		out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1663 
1664 	par = compute_par_s12(vcpu, par, &out);
1665 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1666 	return 0;
1667 }
1668 
1669 /*
1670  * Translate a VA for a given EL in a given translation regime, with
1671  * or without PAN. This requires wi->{regime, as_el0, pan} to be
1672  * set. The rest of the wi and wr should be 0-initialised.
1673  */
1674 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1675 		       struct s1_walk_result *wr, u64 va)
1676 {
1677 	int ret;
1678 
1679 	ret = setup_s1_walk(vcpu, wi, wr, va);
1680 	if (ret)
1681 		return ret;
1682 
1683 	if (wr->level == S1_MMU_DISABLED) {
1684 		wr->ur = wr->uw = wr->ux = true;
1685 		wr->pr = wr->pw = wr->px = true;
1686 	} else {
1687 		ret = walk_s1(vcpu, wi, wr, va);
1688 		if (ret)
1689 			return ret;
1690 
1691 		compute_s1_permissions(vcpu, wi, wr);
1692 	}
1693 
1694 	return 0;
1695 }
1696 
1697 struct desc_match {
1698 	u64	ipa;
1699 	int	level;
1700 };
1701 
1702 static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
1703 {
1704 	struct desc_match *dm = priv;
1705 	u64 ipa = dm->ipa;
1706 
1707 	/* Use S1 granule alignment */
1708 	ipa &= GENMASK(51, ctxt->wi->pgshift);
1709 
1710 	/* Not the IPA we're looking for? Continue. */
1711 	if (ipa != ctxt->table_ipa)
1712 		return 0;
1713 
1714 	/* Note the level and interrupt the walk */
1715 	dm->level = ctxt->level;
1716 	return -EINTR;
1717 }
1718 
1719 int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
1720 {
1721 	struct desc_match dm = {
1722 		.ipa	= ipa,
1723 	};
1724 	struct s1_walk_info wi = {
1725 		.filter	= &(struct s1_walk_filter){
1726 			.fn	= match_s1_desc,
1727 			.priv	= &dm,
1728 		},
1729 		.as_el0	= false,
1730 		.pan	= false,
1731 	};
1732 	struct s1_walk_result wr = {};
1733 	int ret;
1734 
1735 	if (is_hyp_ctxt(vcpu))
1736 		wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
1737 	else
1738 		wi.regime = TR_EL10;
1739 
1740 	ret = setup_s1_walk(vcpu, &wi, &wr, va);
1741 	if (ret)
1742 		return ret;
1743 
1744 	/* We really expect the S1 MMU to be on here... */
1745 	if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
1746 		*level = 0;
1747 		return 0;
1748 	}
1749 
1750 	/* Walk the guest's PT, looking for a match along the way */
1751 	scoped_guard(srcu, &vcpu->kvm->srcu)
1752 		ret = walk_s1(vcpu, &wi, &wr, va);
1753 	switch (ret) {
1754 	case -EINTR:
1755 		/* We interrupted the walk on a match, return the level */
1756 		*level = dm.level;
1757 		return 0;
1758 	case 0:
1759 		/* The walk completed, we failed to find the entry */
1760 		return -ENOENT;
1761 	default:
1762 		/* Any other error... */
1763 		return ret;
1764 	}
1765 }
1766 
1767 static int __lsui_swap_desc(u64 __user *ptep, u64 old, u64 new)
1768 {
1769 	u64 tmp = old;
1770 	int ret = 0;
1771 
1772 	/*
1773 	 * Wrap LSUI instructions with uaccess_ttbr0_enable()/disable(),
1774 	 * as PAN toggling is not required.
1775 	 */
1776 	uaccess_ttbr0_enable();
1777 
1778 	asm volatile(__LSUI_PREAMBLE
1779 		     "1: cast	%[old], %[new], %[addr]\n"
1780 		     "2:\n"
1781 		     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
1782 		     : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
1783 		     : [new] "r" (new)
1784 		     : "memory");
1785 
1786 	uaccess_ttbr0_disable();
1787 
1788 	if (ret)
1789 		return ret;
1790 	if (tmp != old)
1791 		return -EAGAIN;
1792 
1793 	return ret;
1794 }
1795 
1796 static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
1797 {
1798 	u64 tmp = old;
1799 	int ret = 0;
1800 
1801 	uaccess_enable_privileged();
1802 
1803 	asm volatile(__LSE_PREAMBLE
1804 		     "1: cas	%[old], %[new], %[addr]\n"
1805 		     "2:\n"
1806 		     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
1807 		     : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
1808 		     : [new] "r" (new)
1809 		     : "memory");
1810 
1811 	uaccess_disable_privileged();
1812 
1813 	if (ret)
1814 		return ret;
1815 	if (tmp != old)
1816 		return -EAGAIN;
1817 
1818 	return ret;
1819 }
1820 
1821 static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
1822 {
1823 	int ret = 1;
1824 	u64 tmp;
1825 
1826 	uaccess_enable_privileged();
1827 
1828 	asm volatile("prfm	pstl1strm, %[addr]\n"
1829 		     "1: ldxr	%[tmp], %[addr]\n"
1830 		     "sub	%[tmp], %[tmp], %[old]\n"
1831 		     "cbnz	%[tmp], 3f\n"
1832 		     "2: stlxr	%w[ret], %[new], %[addr]\n"
1833 		     "3:\n"
1834 		     _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
1835 		     _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
1836 		     : [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
1837 		     : [old] "r" (old), [new] "r" (new)
1838 		     : "memory");
1839 
1840 	uaccess_disable_privileged();
1841 
1842 	/* STLXR didn't update the descriptor, or the compare failed */
1843 	if (ret == 1)
1844 		return -EAGAIN;
1845 
1846 	return ret;
1847 }
1848 
1849 int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
1850 {
1851 	struct kvm_memory_slot *slot;
1852 	unsigned long hva;
1853 	u64 __user *ptep;
1854 	bool writable;
1855 	int offset;
1856 	gfn_t gfn;
1857 	int r;
1858 
1859 	lockdep_assert(srcu_read_lock_held(&kvm->srcu));
1860 
1861 	gfn = ipa >> PAGE_SHIFT;
1862 	offset = offset_in_page(ipa);
1863 	slot = gfn_to_memslot(kvm, gfn);
1864 	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1865 	if (kvm_is_error_hva(hva))
1866 		return -EINVAL;
1867 	if (!writable)
1868 		return -EPERM;
1869 
1870 	ptep = (void __user *)hva + offset;
1871 	if (cpus_have_final_cap(ARM64_HAS_LSUI))
1872 		r = __lsui_swap_desc(ptep, old, new);
1873 	else if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
1874 		r = __lse_swap_desc(ptep, old, new);
1875 	else
1876 		r = __llsc_swap_desc(ptep, old, new);
1877 
1878 	if (r < 0)
1879 		return r;
1880 
1881 	mark_page_dirty_in_slot(kvm, slot, gfn);
1882 	return 0;
1883 }
1884