xref: /linux/arch/arm64/kvm/at.c (revision 4a51fe919b06cb33ab5834600b501058e944f42b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2017 - Linaro Ltd
4  * Author: Jintack Lim <jintack.lim@linaro.org>
5  */
6 
7 #include <linux/kvm_host.h>
8 
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12 
fail_s1_walk(struct s1_walk_result * wr,u8 fst,bool s1ptw)13 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
14 {
15 	wr->fst		= fst;
16 	wr->ptw		= s1ptw;
17 	wr->s2		= s1ptw;
18 	wr->failed	= true;
19 }
20 
21 #define S1_MMU_DISABLED		(-127)
22 
get_ia_size(struct s1_walk_info * wi)23 static int get_ia_size(struct s1_walk_info *wi)
24 {
25 	return 64 - wi->txsz;
26 }
27 
28 /* Return true if the IPA is out of the OA range */
check_output_size(u64 ipa,struct s1_walk_info * wi)29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
30 {
31 	if (wi->pa52bit)
32 		return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
33 	return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
34 }
35 
has_52bit_pa(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,u64 tcr)36 static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
37 {
38 	switch (BIT(wi->pgshift)) {
39 	case SZ_64K:
40 	default:		/* IMPDEF: treat any other value as 64k */
41 		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
42 			return false;
43 		return ((wi->regime == TR_EL2 ?
44 			 FIELD_GET(TCR_EL2_PS_MASK, tcr) :
45 			 FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
46 	case SZ_16K:
47 		if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
48 			return false;
49 		break;
50 	case SZ_4K:
51 		if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
52 			return false;
53 		break;
54 	}
55 
56 	return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
57 }
58 
desc_to_oa(struct s1_walk_info * wi,u64 desc)59 static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
60 {
61 	u64 addr;
62 
63 	if (!wi->pa52bit)
64 		return desc & GENMASK_ULL(47, wi->pgshift);
65 
66 	switch (BIT(wi->pgshift)) {
67 	case SZ_4K:
68 	case SZ_16K:
69 		addr = desc & GENMASK_ULL(49, wi->pgshift);
70 		addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
71 		break;
72 	case SZ_64K:
73 	default:	    /* IMPDEF: treat any other value as 64k */
74 		addr = desc & GENMASK_ULL(47, wi->pgshift);
75 		addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
76 		break;
77 	}
78 
79 	return addr;
80 }
81 
82 /* Return the translation regime that applies to an AT instruction */
compute_translation_regime(struct kvm_vcpu * vcpu,u32 op)83 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
84 {
85 	/*
86 	 * We only get here from guest EL2, so the translation
87 	 * regime AT applies to is solely defined by {E2H,TGE}.
88 	 */
89 	switch (op) {
90 	case OP_AT_S1E2R:
91 	case OP_AT_S1E2W:
92 	case OP_AT_S1E2A:
93 		return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
94 	default:
95 		return (vcpu_el2_e2h_is_set(vcpu) &&
96 			vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
97 	}
98 }
99 
effective_tcr2(struct kvm_vcpu * vcpu,enum trans_regime regime)100 static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
101 {
102 	if (regime == TR_EL10) {
103 		if (vcpu_has_nv(vcpu) &&
104 		    !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
105 			return 0;
106 
107 		return vcpu_read_sys_reg(vcpu, TCR2_EL1);
108 	}
109 
110 	return vcpu_read_sys_reg(vcpu, TCR2_EL2);
111 }
112 
s1pie_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)113 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
114 {
115 	if (!kvm_has_s1pie(vcpu->kvm))
116 		return false;
117 
118 	/* Abuse TCR2_EL1_PIE and use it for EL2 as well */
119 	return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
120 }
121 
compute_s1poe(struct kvm_vcpu * vcpu,struct s1_walk_info * wi)122 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
123 {
124 	u64 val;
125 
126 	if (!kvm_has_s1poe(vcpu->kvm)) {
127 		wi->poe = wi->e0poe = false;
128 		return;
129 	}
130 
131 	val = effective_tcr2(vcpu, wi->regime);
132 
133 	/* Abuse TCR2_EL1_* for EL2 */
134 	wi->poe = val & TCR2_EL1_POE;
135 	wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
136 }
137 
setup_s1_walk(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)138 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
139 			 struct s1_walk_result *wr, u64 va)
140 {
141 	u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
142 	unsigned int stride, x;
143 	bool va55, tbi, lva;
144 
145 	va55 = va & BIT(55);
146 
147 	if (vcpu_has_nv(vcpu)) {
148 		hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
149 		wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
150 	} else {
151 		WARN_ON_ONCE(wi->regime != TR_EL10);
152 		wi->s2 = false;
153 		hcr = 0;
154 	}
155 
156 	switch (wi->regime) {
157 	case TR_EL10:
158 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL1);
159 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL1);
160 		ttbr	= (va55 ?
161 			   vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
162 			   vcpu_read_sys_reg(vcpu, TTBR0_EL1));
163 		break;
164 	case TR_EL2:
165 	case TR_EL20:
166 		sctlr	= vcpu_read_sys_reg(vcpu, SCTLR_EL2);
167 		tcr	= vcpu_read_sys_reg(vcpu, TCR_EL2);
168 		ttbr	= (va55 ?
169 			   vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
170 			   vcpu_read_sys_reg(vcpu, TTBR0_EL2));
171 		break;
172 	default:
173 		BUG();
174 	}
175 
176 	/* Someone was silly enough to encode TG0/TG1 differently */
177 	if (va55 && wi->regime != TR_EL2) {
178 		wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
179 		tg = FIELD_GET(TCR_TG1_MASK, tcr);
180 
181 		switch (tg << TCR_TG1_SHIFT) {
182 		case TCR_TG1_4K:
183 			wi->pgshift = 12;	 break;
184 		case TCR_TG1_16K:
185 			wi->pgshift = 14;	 break;
186 		case TCR_TG1_64K:
187 		default:	    /* IMPDEF: treat any other value as 64k */
188 			wi->pgshift = 16;	 break;
189 		}
190 	} else {
191 		wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
192 		tg = FIELD_GET(TCR_TG0_MASK, tcr);
193 
194 		switch (tg << TCR_TG0_SHIFT) {
195 		case TCR_TG0_4K:
196 			wi->pgshift = 12;	 break;
197 		case TCR_TG0_16K:
198 			wi->pgshift = 14;	 break;
199 		case TCR_TG0_64K:
200 		default:	    /* IMPDEF: treat any other value as 64k */
201 			wi->pgshift = 16;	 break;
202 		}
203 	}
204 
205 	wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
206 
207 	ia_bits = get_ia_size(wi);
208 
209 	/* AArch64.S1StartLevel() */
210 	stride = wi->pgshift - 3;
211 	wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
212 
213 	if (wi->regime == TR_EL2 && va55)
214 		goto addrsz;
215 
216 	tbi = (wi->regime == TR_EL2 ?
217 	       FIELD_GET(TCR_EL2_TBI, tcr) :
218 	       (va55 ?
219 		FIELD_GET(TCR_TBI1, tcr) :
220 		FIELD_GET(TCR_TBI0, tcr)));
221 
222 	if (!tbi && (u64)sign_extend64(va, 55) != va)
223 		goto addrsz;
224 
225 	wi->sh = (wi->regime == TR_EL2 ?
226 		  FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
227 		  (va55 ?
228 		   FIELD_GET(TCR_SH1_MASK, tcr) :
229 		   FIELD_GET(TCR_SH0_MASK, tcr)));
230 
231 	va = (u64)sign_extend64(va, 55);
232 
233 	/* Let's put the MMU disabled case aside immediately */
234 	switch (wi->regime) {
235 	case TR_EL10:
236 		/*
237 		 * If dealing with the EL1&0 translation regime, 3 things
238 		 * can disable the S1 translation:
239 		 *
240 		 * - HCR_EL2.DC = 1
241 		 * - HCR_EL2.{E2H,TGE} = {0,1}
242 		 * - SCTLR_EL1.M = 0
243 		 *
244 		 * The TGE part is interesting. If we have decided that this
245 		 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
246 		 * {0,x}, and we only need to test for TGE == 1.
247 		 */
248 		if (hcr & (HCR_DC | HCR_TGE)) {
249 			wr->level = S1_MMU_DISABLED;
250 			break;
251 		}
252 		fallthrough;
253 	case TR_EL2:
254 	case TR_EL20:
255 		if (!(sctlr & SCTLR_ELx_M))
256 			wr->level = S1_MMU_DISABLED;
257 		break;
258 	}
259 
260 	if (wr->level == S1_MMU_DISABLED) {
261 		if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
262 			goto addrsz;
263 
264 		wr->pa = va;
265 		return 0;
266 	}
267 
268 	wi->be = sctlr & SCTLR_ELx_EE;
269 
270 	wi->hpd  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
271 	wi->hpd &= (wi->regime == TR_EL2 ?
272 		    FIELD_GET(TCR_EL2_HPD, tcr) :
273 		    (va55 ?
274 		     FIELD_GET(TCR_HPD1, tcr) :
275 		     FIELD_GET(TCR_HPD0, tcr)));
276 	/* R_JHSVW */
277 	wi->hpd |= s1pie_enabled(vcpu, wi->regime);
278 
279 	/* Do we have POE? */
280 	compute_s1poe(vcpu, wi);
281 
282 	/* R_BVXDG */
283 	wi->hpd |= (wi->poe || wi->e0poe);
284 
285 	/* R_PLCGL, R_YXNYW */
286 	if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
287 		if (wi->txsz > 39)
288 			goto transfault;
289 	} else {
290 		if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
291 			goto transfault;
292 	}
293 
294 	/* R_GTJBY, R_SXWGM */
295 	switch (BIT(wi->pgshift)) {
296 	case SZ_4K:
297 	case SZ_16K:
298 		lva = wi->pa52bit;
299 		break;
300 	case SZ_64K:
301 		lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
302 		break;
303 	}
304 
305 	if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
306 		goto transfault;
307 
308 	/* R_YYVYV, I_THCZK */
309 	if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
310 	    (va55 && va < GENMASK(63, ia_bits)))
311 		goto transfault;
312 
313 	/* I_ZFSYQ */
314 	if (wi->regime != TR_EL2 &&
315 	    (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
316 		goto transfault;
317 
318 	/* R_BNDVG and following statements */
319 	if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
320 	    wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
321 		goto transfault;
322 
323 	ps = (wi->regime == TR_EL2 ?
324 	      FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
325 
326 	wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
327 
328 	/* Compute minimal alignment */
329 	x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
330 
331 	wi->baddr = ttbr & TTBRx_EL1_BADDR;
332 	if (wi->pa52bit) {
333 		/*
334 		 * Force the alignment on 64 bytes for top-level tables
335 		 * smaller than 8 entries, since TTBR.BADDR[5:2] are used to
336 		 * store bits [51:48] of the first level of lookup.
337 		 */
338 		x = max(x, 6);
339 
340 		wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
341 	}
342 
343 	/* R_VPBBF */
344 	if (check_output_size(wi->baddr, wi))
345 		goto addrsz;
346 
347 	wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
348 
349 	wi->ha  = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
350 	wi->ha &= (wi->regime == TR_EL2 ?
351 		  FIELD_GET(TCR_EL2_HA, tcr) :
352 		  FIELD_GET(TCR_HA, tcr));
353 
354 	return 0;
355 
356 addrsz:
357 	/*
358 	 * Address Size Fault level 0 to indicate it comes from TTBR.
359 	 * yes, this is an oddity.
360 	 */
361 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
362 	return -EFAULT;
363 
364 transfault:
365 	/* Translation Fault on start level */
366 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
367 	return -EFAULT;
368 }
369 
kvm_read_s1_desc(struct kvm_vcpu * vcpu,u64 pa,u64 * desc,struct s1_walk_info * wi)370 static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
371 			    struct s1_walk_info *wi)
372 {
373 	u64 val;
374 	int r;
375 
376 	r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
377 	if (r)
378 		return r;
379 
380 	if (wi->be)
381 		*desc = be64_to_cpu((__force __be64)val);
382 	else
383 		*desc = le64_to_cpu((__force __le64)val);
384 
385 	return 0;
386 }
387 
kvm_swap_s1_desc(struct kvm_vcpu * vcpu,u64 pa,u64 old,u64 new,struct s1_walk_info * wi)388 static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
389 			    struct s1_walk_info *wi)
390 {
391 	if (wi->be) {
392 		old = (__force u64)cpu_to_be64(old);
393 		new = (__force u64)cpu_to_be64(new);
394 	} else {
395 		old = (__force u64)cpu_to_le64(old);
396 		new = (__force u64)cpu_to_le64(new);
397 	}
398 
399 	return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
400 }
401 
walk_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)402 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
403 		   struct s1_walk_result *wr, u64 va)
404 {
405 	u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
406 	struct kvm_s2_trans s2_trans = {};
407 	int level, stride, ret;
408 
409 	level = wi->sl;
410 	stride = wi->pgshift - 3;
411 	baddr = wi->baddr;
412 
413 	va_top = get_ia_size(wi) - 1;
414 
415 	while (1) {
416 		u64 index;
417 
418 		va_bottom = (3 - level) * stride + wi->pgshift;
419 		index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
420 
421 		ipa = baddr | index;
422 
423 		if (wi->s2) {
424 			ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
425 			if (ret) {
426 				fail_s1_walk(wr,
427 					     (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
428 					     true);
429 				return ret;
430 			}
431 
432 			if (!kvm_s2_trans_readable(&s2_trans)) {
433 				fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
434 					     true);
435 
436 				return -EPERM;
437 			}
438 
439 			ipa = kvm_s2_trans_output(&s2_trans);
440 		}
441 
442 		if (wi->filter) {
443 			ret = wi->filter->fn(&(struct s1_walk_context)
444 					     {
445 						     .wi	= wi,
446 						     .table_ipa	= baddr,
447 						     .level	= level,
448 					     }, wi->filter->priv);
449 			if (ret)
450 				return ret;
451 		}
452 
453 		ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
454 		if (ret) {
455 			fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
456 			return ret;
457 		}
458 
459 		new_desc = desc;
460 
461 		/* Invalid descriptor */
462 		if (!(desc & BIT(0)))
463 			goto transfault;
464 
465 		/* Block mapping, check validity down the line */
466 		if (!(desc & BIT(1)))
467 			break;
468 
469 		/* Page mapping */
470 		if (level == 3)
471 			break;
472 
473 		/* Table handling */
474 		if (!wi->hpd) {
475 			wr->APTable  |= FIELD_GET(S1_TABLE_AP, desc);
476 			wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
477 			wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
478 		}
479 
480 		baddr = desc_to_oa(wi, desc);
481 
482 		/* Check for out-of-range OA */
483 		if (check_output_size(baddr, wi))
484 			goto addrsz;
485 
486 		/* Prepare for next round */
487 		va_top = va_bottom - 1;
488 		level++;
489 	}
490 
491 	/* Block mapping, check the validity of the level */
492 	if (!(desc & BIT(1))) {
493 		bool valid_block = false;
494 
495 		switch (BIT(wi->pgshift)) {
496 		case SZ_4K:
497 			valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
498 			break;
499 		case SZ_16K:
500 		case SZ_64K:
501 			valid_block = level == 2 || (wi->pa52bit && level == 1);
502 			break;
503 		}
504 
505 		if (!valid_block)
506 			goto transfault;
507 	}
508 
509 	baddr = desc_to_oa(wi, desc);
510 	if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
511 		goto addrsz;
512 
513 	if (wi->ha)
514 		new_desc |= PTE_AF;
515 
516 	if (new_desc != desc) {
517 		if (wi->s2 && !kvm_s2_trans_writable(&s2_trans)) {
518 			fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), true);
519 			return -EPERM;
520 		}
521 
522 		ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
523 		if (ret)
524 			return ret;
525 
526 		desc = new_desc;
527 	}
528 
529 	if (!(desc & PTE_AF)) {
530 		fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
531 		return -EACCES;
532 	}
533 
534 	va_bottom += contiguous_bit_shift(desc, wi, level);
535 
536 	wr->failed = false;
537 	wr->level = level;
538 	wr->desc = desc;
539 	wr->pa = baddr & GENMASK(52, va_bottom);
540 	wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
541 
542 	wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
543 	if (wr->nG) {
544 		u64 asid_ttbr, tcr;
545 
546 		switch (wi->regime) {
547 		case TR_EL10:
548 			tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
549 			asid_ttbr = ((tcr & TCR_A1) ?
550 				     vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
551 				     vcpu_read_sys_reg(vcpu, TTBR0_EL1));
552 			break;
553 		case TR_EL20:
554 			tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
555 			asid_ttbr = ((tcr & TCR_A1) ?
556 				     vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
557 				     vcpu_read_sys_reg(vcpu, TTBR0_EL2));
558 			break;
559 		default:
560 			BUG();
561 		}
562 
563 		wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
564 		if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
565 		    !(tcr & TCR_ASID16))
566 			wr->asid &= GENMASK(7, 0);
567 	}
568 
569 	return 0;
570 
571 addrsz:
572 	fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
573 	return -EINVAL;
574 transfault:
575 	fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
576 	return -ENOENT;
577 }
578 
579 struct mmu_config {
580 	u64	ttbr0;
581 	u64	ttbr1;
582 	u64	tcr;
583 	u64	mair;
584 	u64	tcr2;
585 	u64	pir;
586 	u64	pire0;
587 	u64	por_el0;
588 	u64	por_el1;
589 	u64	sctlr;
590 	u64	vttbr;
591 	u64	vtcr;
592 };
593 
__mmu_config_save(struct mmu_config * config)594 static void __mmu_config_save(struct mmu_config *config)
595 {
596 	config->ttbr0	= read_sysreg_el1(SYS_TTBR0);
597 	config->ttbr1	= read_sysreg_el1(SYS_TTBR1);
598 	config->tcr	= read_sysreg_el1(SYS_TCR);
599 	config->mair	= read_sysreg_el1(SYS_MAIR);
600 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
601 		config->tcr2	= read_sysreg_el1(SYS_TCR2);
602 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
603 			config->pir	= read_sysreg_el1(SYS_PIR);
604 			config->pire0	= read_sysreg_el1(SYS_PIRE0);
605 		}
606 		if (system_supports_poe()) {
607 			config->por_el1	= read_sysreg_el1(SYS_POR);
608 			config->por_el0	= read_sysreg_s(SYS_POR_EL0);
609 		}
610 	}
611 	config->sctlr	= read_sysreg_el1(SYS_SCTLR);
612 	config->vttbr	= read_sysreg(vttbr_el2);
613 	config->vtcr	= read_sysreg(vtcr_el2);
614 }
615 
__mmu_config_restore(struct mmu_config * config)616 static void __mmu_config_restore(struct mmu_config *config)
617 {
618 	/*
619 	 * ARM errata 1165522 and 1530923 require TGE to be 1 before
620 	 * we update the guest state.
621 	 */
622 	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
623 
624 	write_sysreg_el1(config->ttbr0,	SYS_TTBR0);
625 	write_sysreg_el1(config->ttbr1,	SYS_TTBR1);
626 	write_sysreg_el1(config->tcr,	SYS_TCR);
627 	write_sysreg_el1(config->mair,	SYS_MAIR);
628 	if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
629 		write_sysreg_el1(config->tcr2, SYS_TCR2);
630 		if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
631 			write_sysreg_el1(config->pir, SYS_PIR);
632 			write_sysreg_el1(config->pire0, SYS_PIRE0);
633 		}
634 		if (system_supports_poe()) {
635 			write_sysreg_el1(config->por_el1, SYS_POR);
636 			write_sysreg_s(config->por_el0, SYS_POR_EL0);
637 		}
638 	}
639 	write_sysreg_el1(config->sctlr,	SYS_SCTLR);
640 	write_sysreg(config->vttbr,	vttbr_el2);
641 	write_sysreg(config->vtcr,	vtcr_el2);
642 }
643 
at_s1e1p_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)644 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
645 {
646 	u64 host_pan;
647 	bool fail;
648 
649 	host_pan = read_sysreg_s(SYS_PSTATE_PAN);
650 	write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
651 
652 	switch (op) {
653 	case OP_AT_S1E1RP:
654 		fail = __kvm_at(OP_AT_S1E1RP, vaddr);
655 		break;
656 	case OP_AT_S1E1WP:
657 		fail = __kvm_at(OP_AT_S1E1WP, vaddr);
658 		break;
659 	}
660 
661 	write_sysreg_s(host_pan, SYS_PSTATE_PAN);
662 
663 	return fail;
664 }
665 
666 #define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
667 #define MEMATTR_NC		0b0100
668 #define MEMATTR_Wt		0b1000
669 #define MEMATTR_Wb		0b1100
670 #define MEMATTR_WbRaWa		0b1111
671 
672 #define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
673 
s2_memattr_to_attr(u8 memattr)674 static u8 s2_memattr_to_attr(u8 memattr)
675 {
676 	memattr &= 0b1111;
677 
678 	switch (memattr) {
679 	case 0b0000:
680 	case 0b0001:
681 	case 0b0010:
682 	case 0b0011:
683 		return memattr << 2;
684 	case 0b0100:
685 		return MEMATTR(Wb, Wb);
686 	case 0b0101:
687 		return MEMATTR(NC, NC);
688 	case 0b0110:
689 		return MEMATTR(Wt, NC);
690 	case 0b0111:
691 		return MEMATTR(Wb, NC);
692 	case 0b1000:
693 		/* Reserved, assume NC */
694 		return MEMATTR(NC, NC);
695 	case 0b1001:
696 		return MEMATTR(NC, Wt);
697 	case 0b1010:
698 		return MEMATTR(Wt, Wt);
699 	case 0b1011:
700 		return MEMATTR(Wb, Wt);
701 	case 0b1100:
702 		/* Reserved, assume NC */
703 		return MEMATTR(NC, NC);
704 	case 0b1101:
705 		return MEMATTR(NC, Wb);
706 	case 0b1110:
707 		return MEMATTR(Wt, Wb);
708 	case 0b1111:
709 		return MEMATTR(Wb, Wb);
710 	default:
711 		unreachable();
712 	}
713 }
714 
combine_s1_s2_attr(u8 s1,u8 s2)715 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
716 {
717 	bool transient;
718 	u8 final = 0;
719 
720 	/* Upgrade transient s1 to non-transient to simplify things */
721 	switch (s1) {
722 	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
723 		transient = true;
724 		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
725 		break;
726 	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
727 		transient = true;
728 		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
729 		break;
730 	default:
731 		transient = false;
732 	}
733 
734 	/* S2CombineS1AttrHints() */
735 	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
736 	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
737 		final = MEMATTR_NC;
738 	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
739 		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
740 		final = MEMATTR_Wt;
741 	else
742 		final = MEMATTR_Wb;
743 
744 	if (final != MEMATTR_NC) {
745 		/* Inherit RaWa hints form S1 */
746 		if (transient) {
747 			switch (s1 & GENMASK(3, 2)) {
748 			case MEMATTR_Wt:
749 				final = 0;
750 				break;
751 			case MEMATTR_Wb:
752 				final = MEMATTR_NC;
753 				break;
754 			}
755 		}
756 
757 		final |= s1 & GENMASK(1, 0);
758 	}
759 
760 	return final;
761 }
762 
763 #define ATTR_NSH	0b00
764 #define ATTR_RSV	0b01
765 #define ATTR_OSH	0b10
766 #define ATTR_ISH	0b11
767 
compute_final_sh(u8 attr,u8 sh)768 static u8 compute_final_sh(u8 attr, u8 sh)
769 {
770 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
771 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
772 		return ATTR_OSH;
773 
774 	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
775 		sh = ATTR_NSH;
776 
777 	return sh;
778 }
779 
compute_s1_sh(struct s1_walk_info * wi,struct s1_walk_result * wr,u8 attr)780 static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
781 			u8 attr)
782 {
783 	u8 sh;
784 
785 	/*
786 	 * non-52bit and LPA have their basic shareability described in the
787 	 * descriptor. LPA2 gets it from the corresponding field in TCR,
788 	 * conveniently recorded in the walk info.
789 	 */
790 	if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
791 		sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
792 	else
793 		sh = wi->sh;
794 
795 	return compute_final_sh(attr, sh);
796 }
797 
combine_sh(u8 s1_sh,u8 s2_sh)798 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
799 {
800 	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
801 		return ATTR_OSH;
802 	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
803 		return ATTR_ISH;
804 
805 	return ATTR_NSH;
806 }
807 
compute_par_s12(struct kvm_vcpu * vcpu,u64 s1_par,struct kvm_s2_trans * tr)808 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
809 			   struct kvm_s2_trans *tr)
810 {
811 	u8 s1_parattr, s2_memattr, final_attr, s2_sh;
812 	u64 par;
813 
814 	/* If S2 has failed to translate, report the damage */
815 	if (tr->esr) {
816 		par = SYS_PAR_EL1_RES1;
817 		par |= SYS_PAR_EL1_F;
818 		par |= SYS_PAR_EL1_S;
819 		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
820 		return par;
821 	}
822 
823 	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
824 	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
825 
826 	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
827 		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
828 			s2_memattr &= ~BIT(3);
829 
830 		/* Combination of R_VRJSW and R_RHWZM */
831 		switch (s2_memattr) {
832 		case 0b0101:
833 			if (MEMATTR_IS_DEVICE(s1_parattr))
834 				final_attr = s1_parattr;
835 			else
836 				final_attr = MEMATTR(NC, NC);
837 			break;
838 		case 0b0110:
839 		case 0b1110:
840 			final_attr = MEMATTR(WbRaWa, WbRaWa);
841 			break;
842 		case 0b0111:
843 		case 0b1111:
844 			/* Preserve S1 attribute */
845 			final_attr = s1_parattr;
846 			break;
847 		case 0b0100:
848 		case 0b1100:
849 		case 0b1101:
850 			/* Reserved, do something non-silly */
851 			final_attr = s1_parattr;
852 			break;
853 		default:
854 			/*
855 			 * MemAttr[2]=0, Device from S2.
856 			 *
857 			 * FWB does not influence the way that stage 1
858 			 * memory types and attributes are combined
859 			 * with stage 2 Device type and attributes.
860 			 */
861 			final_attr = min(s2_memattr_to_attr(s2_memattr),
862 					 s1_parattr);
863 		}
864 	} else {
865 		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
866 		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
867 
868 		if (MEMATTR_IS_DEVICE(s1_parattr) ||
869 		    MEMATTR_IS_DEVICE(s2_parattr)) {
870 			final_attr = min(s1_parattr, s2_parattr);
871 		} else {
872 			/* At this stage, this is memory vs memory */
873 			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
874 							 s2_parattr & 0xf);
875 			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
876 							 s2_parattr >> 4) << 4;
877 		}
878 	}
879 
880 	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
881 	    !MEMATTR_IS_DEVICE(final_attr))
882 		final_attr = MEMATTR(NC, NC);
883 
884 	s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
885 
886 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
887 	par |= tr->output & GENMASK(47, 12);
888 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
889 			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
890 				     compute_final_sh(final_attr, s2_sh)));
891 
892 	return par;
893 }
894 
compute_par_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)895 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
896 			  struct s1_walk_result *wr)
897 {
898 	u64 par;
899 
900 	if (wr->failed) {
901 		par = SYS_PAR_EL1_RES1;
902 		par |= SYS_PAR_EL1_F;
903 		par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
904 		par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
905 		par |= wr->s2 ? SYS_PAR_EL1_S : 0;
906 	} else if (wr->level == S1_MMU_DISABLED) {
907 		/* MMU off or HCR_EL2.DC == 1 */
908 		par  = SYS_PAR_EL1_NSE;
909 		par |= wr->pa & SYS_PAR_EL1_PA;
910 
911 		if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
912 		    (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
913 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
914 					  MEMATTR(WbRaWa, WbRaWa));
915 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
916 		} else {
917 			par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
918 			par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
919 		}
920 	} else {
921 		u64 mair, sctlr;
922 		u8 sh;
923 
924 		par  = SYS_PAR_EL1_NSE;
925 
926 		mair = (wi->regime == TR_EL10 ?
927 			vcpu_read_sys_reg(vcpu, MAIR_EL1) :
928 			vcpu_read_sys_reg(vcpu, MAIR_EL2));
929 
930 		mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
931 		mair &= 0xff;
932 
933 		sctlr = (wi->regime == TR_EL10 ?
934 			 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
935 			 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
936 
937 		/* Force NC for memory if SCTLR_ELx.C is clear */
938 		if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
939 			mair = MEMATTR(NC, NC);
940 
941 		par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
942 		par |= wr->pa & SYS_PAR_EL1_PA;
943 
944 		sh = compute_s1_sh(wi, wr, mair);
945 		par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
946 	}
947 
948 	return par;
949 }
950 
pan3_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)951 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
952 {
953 	u64 sctlr;
954 
955 	if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
956 		return false;
957 
958 	if (s1pie_enabled(vcpu, regime))
959 		return true;
960 
961 	if (regime == TR_EL10)
962 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
963 	else
964 		sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
965 
966 	return sctlr & SCTLR_EL1_EPAN;
967 }
968 
compute_s1_direct_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)969 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
970 					  struct s1_walk_info *wi,
971 					  struct s1_walk_result *wr)
972 {
973 	bool wxn;
974 
975 	/* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
976 	if (wi->regime != TR_EL2) {
977 		switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
978 		case 0b00:
979 			wr->pr = wr->pw = true;
980 			wr->ur = wr->uw = false;
981 			break;
982 		case 0b01:
983 			wr->pr = wr->pw = wr->ur = wr->uw = true;
984 			break;
985 		case 0b10:
986 			wr->pr = true;
987 			wr->pw = wr->ur = wr->uw = false;
988 			break;
989 		case 0b11:
990 			wr->pr = wr->ur = true;
991 			wr->pw = wr->uw = false;
992 			break;
993 		}
994 
995 		/* We don't use px for anything yet, but hey... */
996 		wr->px = !((wr->desc & PTE_PXN) || wr->uw);
997 		wr->ux = !(wr->desc & PTE_UXN);
998 	} else {
999 		wr->ur = wr->uw = wr->ux = false;
1000 
1001 		if (!(wr->desc & PTE_RDONLY)) {
1002 			wr->pr = wr->pw = true;
1003 		} else {
1004 			wr->pr = true;
1005 			wr->pw = false;
1006 		}
1007 
1008 		/* XN maps to UXN */
1009 		wr->px = !(wr->desc & PTE_UXN);
1010 	}
1011 
1012 	switch (wi->regime) {
1013 	case TR_EL2:
1014 	case TR_EL20:
1015 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
1016 		break;
1017 	case TR_EL10:
1018 		wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
1019 		break;
1020 	}
1021 
1022 	wr->pwxn = wr->uwxn = wxn;
1023 	wr->pov = wi->poe;
1024 	wr->uov = wi->e0poe;
1025 }
1026 
compute_s1_hierarchical_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1027 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
1028 						struct s1_walk_info *wi,
1029 						struct s1_walk_result *wr)
1030 {
1031 	/* Hierarchical part of AArch64.S1DirectBasePermissions() */
1032 	if (wi->regime != TR_EL2) {
1033 		switch (wr->APTable) {
1034 		case 0b00:
1035 			break;
1036 		case 0b01:
1037 			wr->ur = wr->uw = false;
1038 			break;
1039 		case 0b10:
1040 			wr->pw = wr->uw = false;
1041 			break;
1042 		case 0b11:
1043 			wr->pw = wr->ur = wr->uw = false;
1044 			break;
1045 		}
1046 
1047 		wr->px &= !wr->PXNTable;
1048 		wr->ux &= !wr->UXNTable;
1049 	} else {
1050 		if (wr->APTable & BIT(1))
1051 			wr->pw = false;
1052 
1053 		/* XN maps to UXN */
1054 		wr->px &= !wr->UXNTable;
1055 	}
1056 }
1057 
1058 #define perm_idx(v, r, i)	((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
1059 
1060 #define set_priv_perms(wr, r, w, x)	\
1061 	do {				\
1062 		(wr)->pr = (r);		\
1063 		(wr)->pw = (w);		\
1064 		(wr)->px = (x);		\
1065 	} while (0)
1066 
1067 #define set_unpriv_perms(wr, r, w, x)	\
1068 	do {				\
1069 		(wr)->ur = (r);		\
1070 		(wr)->uw = (w);		\
1071 		(wr)->ux = (x);		\
1072 	} while (0)
1073 
1074 #define set_priv_wxn(wr, v)		\
1075 	do {				\
1076 		(wr)->pwxn = (v);	\
1077 	} while (0)
1078 
1079 #define set_unpriv_wxn(wr, v)		\
1080 	do {				\
1081 		(wr)->uwxn = (v);	\
1082 	} while (0)
1083 
1084 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS  */
1085 #define set_perms(w, wr, ip)						\
1086 	do {								\
1087 		/* R_LLZDZ */						\
1088 		switch ((ip)) {						\
1089 		case 0b0000:						\
1090 			set_ ## w ## _perms((wr), false, false, false);	\
1091 			break;						\
1092 		case 0b0001:						\
1093 			set_ ## w ## _perms((wr), true , false, false);	\
1094 			break;						\
1095 		case 0b0010:						\
1096 			set_ ## w ## _perms((wr), false, false, true );	\
1097 			break;						\
1098 		case 0b0011:						\
1099 			set_ ## w ## _perms((wr), true , false, true );	\
1100 			break;						\
1101 		case 0b0100:						\
1102 			set_ ## w ## _perms((wr), false, false, false);	\
1103 			break;						\
1104 		case 0b0101:						\
1105 			set_ ## w ## _perms((wr), true , true , false);	\
1106 			break;						\
1107 		case 0b0110:						\
1108 			set_ ## w ## _perms((wr), true , true , true );	\
1109 			break;						\
1110 		case 0b0111:						\
1111 			set_ ## w ## _perms((wr), true , true , true );	\
1112 			break;						\
1113 		case 0b1000:						\
1114 			set_ ## w ## _perms((wr), true , false, false);	\
1115 			break;						\
1116 		case 0b1001:						\
1117 			set_ ## w ## _perms((wr), true , false, false);	\
1118 			break;						\
1119 		case 0b1010:						\
1120 			set_ ## w ## _perms((wr), true , false, true );	\
1121 			break;						\
1122 		case 0b1011:						\
1123 			set_ ## w ## _perms((wr), false, false, false);	\
1124 			break;						\
1125 		case 0b1100:						\
1126 			set_ ## w ## _perms((wr), true , true , false);	\
1127 			break;						\
1128 		case 0b1101:						\
1129 			set_ ## w ## _perms((wr), false, false, false);	\
1130 			break;						\
1131 		case 0b1110:						\
1132 			set_ ## w ## _perms((wr), true , true , true );	\
1133 			break;						\
1134 		case 0b1111:						\
1135 			set_ ## w ## _perms((wr), false, false, false);	\
1136 			break;						\
1137 		}							\
1138 									\
1139 		/* R_HJYGR */						\
1140 		set_ ## w ## _wxn((wr), ((ip) == 0b0110));		\
1141 									\
1142 	} while (0)
1143 
compute_s1_indirect_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1144 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1145 					    struct s1_walk_info *wi,
1146 					    struct s1_walk_result *wr)
1147 {
1148 	u8 up, pp, idx;
1149 
1150 	idx = pte_pi_index(wr->desc);
1151 
1152 	switch (wi->regime) {
1153 	case TR_EL10:
1154 		pp = perm_idx(vcpu, PIR_EL1, idx);
1155 		up = perm_idx(vcpu, PIRE0_EL1, idx);
1156 		break;
1157 	case TR_EL20:
1158 		pp = perm_idx(vcpu, PIR_EL2, idx);
1159 		up = perm_idx(vcpu, PIRE0_EL2, idx);
1160 		break;
1161 	case TR_EL2:
1162 		pp = perm_idx(vcpu, PIR_EL2, idx);
1163 		up = 0;
1164 		break;
1165 	}
1166 
1167 	set_perms(priv, wr, pp);
1168 
1169 	if (wi->regime != TR_EL2)
1170 		set_perms(unpriv, wr, up);
1171 	else
1172 		set_unpriv_perms(wr, false, false, false);
1173 
1174 	wr->pov = wi->poe && !(pp & BIT(3));
1175 	wr->uov = wi->e0poe && !(up & BIT(3));
1176 
1177 	/* R_VFPJF */
1178 	if (wr->px && wr->uw) {
1179 		set_priv_perms(wr, false, false, false);
1180 		set_unpriv_perms(wr, false, false, false);
1181 	}
1182 }
1183 
compute_s1_overlay_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1184 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1185 					   struct s1_walk_info *wi,
1186 					   struct s1_walk_result *wr)
1187 {
1188 	u8 idx, pov_perms, uov_perms;
1189 
1190 	idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1191 
1192 	if (wr->pov) {
1193 		switch (wi->regime) {
1194 		case TR_EL10:
1195 			pov_perms = perm_idx(vcpu, POR_EL1, idx);
1196 			break;
1197 		case TR_EL20:
1198 			pov_perms = perm_idx(vcpu, POR_EL2, idx);
1199 			break;
1200 		case TR_EL2:
1201 			pov_perms = perm_idx(vcpu, POR_EL2, idx);
1202 			break;
1203 		}
1204 
1205 		if (pov_perms & ~POE_RWX)
1206 			pov_perms = POE_NONE;
1207 
1208 		/* R_QXXPC, S1PrivOverflow enabled */
1209 		if (wr->pwxn && (pov_perms & POE_X))
1210 			pov_perms &= ~POE_W;
1211 
1212 		wr->pr &= pov_perms & POE_R;
1213 		wr->pw &= pov_perms & POE_W;
1214 		wr->px &= pov_perms & POE_X;
1215 	}
1216 
1217 	if (wr->uov) {
1218 		switch (wi->regime) {
1219 		case TR_EL10:
1220 			uov_perms = perm_idx(vcpu, POR_EL0, idx);
1221 			break;
1222 		case TR_EL20:
1223 			uov_perms = perm_idx(vcpu, POR_EL0, idx);
1224 			break;
1225 		case TR_EL2:
1226 			uov_perms = 0;
1227 			break;
1228 		}
1229 
1230 		if (uov_perms & ~POE_RWX)
1231 			uov_perms = POE_NONE;
1232 
1233 		/* R_NPBXC, S1UnprivOverlay enabled */
1234 		if (wr->uwxn && (uov_perms & POE_X))
1235 			uov_perms &= ~POE_W;
1236 
1237 		wr->ur &= uov_perms & POE_R;
1238 		wr->uw &= uov_perms & POE_W;
1239 		wr->ux &= uov_perms & POE_X;
1240 	}
1241 }
1242 
compute_s1_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1243 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1244 				   struct s1_walk_info *wi,
1245 				   struct s1_walk_result *wr)
1246 {
1247 	bool pan;
1248 
1249 	if (!s1pie_enabled(vcpu, wi->regime))
1250 		compute_s1_direct_permissions(vcpu, wi, wr);
1251 	else
1252 		compute_s1_indirect_permissions(vcpu, wi, wr);
1253 
1254 	if (!wi->hpd)
1255 		compute_s1_hierarchical_permissions(vcpu, wi, wr);
1256 
1257 	compute_s1_overlay_permissions(vcpu, wi, wr);
1258 
1259 	/* R_QXXPC, S1PrivOverlay disabled */
1260 	if (!wr->pov)
1261 		wr->px &= !(wr->pwxn && wr->pw);
1262 
1263 	/* R_NPBXC, S1UnprivOverlay disabled */
1264 	if (!wr->uov)
1265 		wr->ux &= !(wr->uwxn && wr->uw);
1266 
1267 	pan = wi->pan && (wr->ur || wr->uw ||
1268 			  (pan3_enabled(vcpu, wi->regime) && wr->ux));
1269 	wr->pw &= !pan;
1270 	wr->pr &= !pan;
1271 }
1272 
handle_at_slow(struct kvm_vcpu * vcpu,u32 op,u64 vaddr,u64 * par)1273 static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
1274 {
1275 	struct s1_walk_result wr = {};
1276 	struct s1_walk_info wi = {};
1277 	bool perm_fail = false;
1278 	int ret, idx;
1279 
1280 	wi.regime = compute_translation_regime(vcpu, op);
1281 	wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1282 	wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1283 		 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1284 
1285 	ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1286 	if (ret)
1287 		goto compute_par;
1288 
1289 	if (wr.level == S1_MMU_DISABLED)
1290 		goto compute_par;
1291 
1292 	idx = srcu_read_lock(&vcpu->kvm->srcu);
1293 
1294 	ret = walk_s1(vcpu, &wi, &wr, vaddr);
1295 
1296 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
1297 
1298 	/*
1299 	 * Race to update a descriptor -- restart the walk.
1300 	 */
1301 	if (ret == -EAGAIN)
1302 		return ret;
1303 	if (ret)
1304 		goto compute_par;
1305 
1306 	compute_s1_permissions(vcpu, &wi, &wr);
1307 
1308 	switch (op) {
1309 	case OP_AT_S1E1RP:
1310 	case OP_AT_S1E1R:
1311 	case OP_AT_S1E2R:
1312 		perm_fail = !wr.pr;
1313 		break;
1314 	case OP_AT_S1E1WP:
1315 	case OP_AT_S1E1W:
1316 	case OP_AT_S1E2W:
1317 		perm_fail = !wr.pw;
1318 		break;
1319 	case OP_AT_S1E0R:
1320 		perm_fail = !wr.ur;
1321 		break;
1322 	case OP_AT_S1E0W:
1323 		perm_fail = !wr.uw;
1324 		break;
1325 	case OP_AT_S1E1A:
1326 	case OP_AT_S1E2A:
1327 		break;
1328 	default:
1329 		BUG();
1330 	}
1331 
1332 	if (perm_fail)
1333 		fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1334 
1335 compute_par:
1336 	*par = compute_par_s1(vcpu, &wi, &wr);
1337 	return 0;
1338 }
1339 
1340 /*
1341  * Return the PAR_EL1 value as the result of a valid translation.
1342  *
1343  * If the translation is unsuccessful, the value may only contain
1344  * PAR_EL1.F, and cannot be taken at face value. It isn't an
1345  * indication of the translation having failed, only that the fast
1346  * path did not succeed, *unless* it indicates a S1 permission or
1347  * access fault.
1348  */
__kvm_at_s1e01_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1349 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1350 {
1351 	struct mmu_config config;
1352 	struct kvm_s2_mmu *mmu;
1353 	bool fail, mmu_cs;
1354 	u64 par;
1355 
1356 	par = SYS_PAR_EL1_F;
1357 
1358 	/*
1359 	 * We've trapped, so everything is live on the CPU. As we will
1360 	 * be switching contexts behind everybody's back, disable
1361 	 * interrupts while holding the mmu lock.
1362 	 */
1363 	guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1364 
1365 	/*
1366 	 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1367 	 * the right one (as we trapped from vEL2). If not, save the
1368 	 * full MMU context.
1369 	 *
1370 	 * We are also guaranteed to be in the correct context if
1371 	 * we're not in a nested VM.
1372 	 */
1373 	mmu_cs = (vcpu_has_nv(vcpu) &&
1374 		  !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
1375 	if (!mmu_cs)
1376 		goto skip_mmu_switch;
1377 
1378 	/*
1379 	 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1380 	 * find it (recycled by another vcpu, for example). When this
1381 	 * happens, admit defeat immediately and use the SW (slow) path.
1382 	 */
1383 	mmu = lookup_s2_mmu(vcpu);
1384 	if (!mmu)
1385 		return par;
1386 
1387 	__mmu_config_save(&config);
1388 
1389 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1),	SYS_TTBR0);
1390 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1),	SYS_TTBR1);
1391 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1),	SYS_TCR);
1392 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1),	SYS_MAIR);
1393 	if (kvm_has_tcr2(vcpu->kvm)) {
1394 		write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1395 		if (kvm_has_s1pie(vcpu->kvm)) {
1396 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1397 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1398 		}
1399 		if (kvm_has_s1poe(vcpu->kvm)) {
1400 			write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1401 			write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1402 		}
1403 	}
1404 	write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1),	SYS_SCTLR);
1405 	__load_stage2(mmu, mmu->arch);
1406 
1407 skip_mmu_switch:
1408 	/* Temporarily switch back to guest context */
1409 	write_sysreg_hcr(vcpu->arch.hcr_el2);
1410 	isb();
1411 
1412 	switch (op) {
1413 	case OP_AT_S1E1RP:
1414 	case OP_AT_S1E1WP:
1415 		fail = at_s1e1p_fast(vcpu, op, vaddr);
1416 		break;
1417 	case OP_AT_S1E1R:
1418 		fail = __kvm_at(OP_AT_S1E1R, vaddr);
1419 		break;
1420 	case OP_AT_S1E1W:
1421 		fail = __kvm_at(OP_AT_S1E1W, vaddr);
1422 		break;
1423 	case OP_AT_S1E0R:
1424 		fail = __kvm_at(OP_AT_S1E0R, vaddr);
1425 		break;
1426 	case OP_AT_S1E0W:
1427 		fail = __kvm_at(OP_AT_S1E0W, vaddr);
1428 		break;
1429 	case OP_AT_S1E1A:
1430 		fail = __kvm_at(OP_AT_S1E1A, vaddr);
1431 		break;
1432 	default:
1433 		WARN_ON_ONCE(1);
1434 		fail = true;
1435 		break;
1436 	}
1437 
1438 	if (!fail)
1439 		par = read_sysreg_par();
1440 
1441 	write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1442 
1443 	if (mmu_cs)
1444 		__mmu_config_restore(&config);
1445 
1446 	return par;
1447 }
1448 
par_check_s1_perm_fault(u64 par)1449 static bool par_check_s1_perm_fault(u64 par)
1450 {
1451 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1452 
1453 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1454 		 !(par & SYS_PAR_EL1_S));
1455 }
1456 
par_check_s1_access_fault(u64 par)1457 static bool par_check_s1_access_fault(u64 par)
1458 {
1459 	u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1460 
1461 	return  ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1462 		 !(par & SYS_PAR_EL1_S));
1463 }
1464 
__kvm_at_s1e01(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1465 int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1466 {
1467 	u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1468 	int ret;
1469 
1470 	/*
1471 	 * If PAR_EL1 reports that AT failed on a S1 permission or access
1472 	 * fault, we know for sure that the PTW was able to walk the S1
1473 	 * tables and there's nothing else to do.
1474 	 *
1475 	 * If AT failed for any other reason, then we must walk the guest S1
1476 	 * to emulate the instruction.
1477 	 */
1478 	if ((par & SYS_PAR_EL1_F) &&
1479 	    !par_check_s1_perm_fault(par) &&
1480 	    !par_check_s1_access_fault(par)) {
1481 		ret = handle_at_slow(vcpu, op, vaddr, &par);
1482 		if (ret)
1483 			return ret;
1484 	}
1485 
1486 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1487 	return 0;
1488 }
1489 
__kvm_at_s1e2(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1490 int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1491 {
1492 	u64 par;
1493 	int ret;
1494 
1495 	/*
1496 	 * We've trapped, so everything is live on the CPU. As we will be
1497 	 * switching context behind everybody's back, disable interrupts...
1498 	 */
1499 	scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1500 		u64 val, hcr;
1501 		bool fail;
1502 
1503 		val = hcr = read_sysreg(hcr_el2);
1504 		val &= ~HCR_TGE;
1505 		val |= HCR_VM;
1506 
1507 		if (!vcpu_el2_e2h_is_set(vcpu))
1508 			val |= HCR_NV | HCR_NV1;
1509 
1510 		write_sysreg_hcr(val);
1511 		isb();
1512 
1513 		par = SYS_PAR_EL1_F;
1514 
1515 		switch (op) {
1516 		case OP_AT_S1E2R:
1517 			fail = __kvm_at(OP_AT_S1E1R, vaddr);
1518 			break;
1519 		case OP_AT_S1E2W:
1520 			fail = __kvm_at(OP_AT_S1E1W, vaddr);
1521 			break;
1522 		case OP_AT_S1E2A:
1523 			fail = __kvm_at(OP_AT_S1E1A, vaddr);
1524 			break;
1525 		default:
1526 			WARN_ON_ONCE(1);
1527 			fail = true;
1528 		}
1529 
1530 		isb();
1531 
1532 		if (!fail)
1533 			par = read_sysreg_par();
1534 
1535 		write_sysreg_hcr(hcr);
1536 		isb();
1537 	}
1538 
1539 	/* We failed the translation, let's replay it in slow motion */
1540 	if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
1541 		ret = handle_at_slow(vcpu, op, vaddr, &par);
1542 		if (ret)
1543 			return ret;
1544 	}
1545 
1546 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1547 	return 0;
1548 }
1549 
__kvm_at_s12(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1550 int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1551 {
1552 	struct kvm_s2_trans out = {};
1553 	u64 ipa, par;
1554 	bool write;
1555 	int ret;
1556 
1557 	/* Do the stage-1 translation */
1558 	switch (op) {
1559 	case OP_AT_S12E1R:
1560 		op = OP_AT_S1E1R;
1561 		write = false;
1562 		break;
1563 	case OP_AT_S12E1W:
1564 		op = OP_AT_S1E1W;
1565 		write = true;
1566 		break;
1567 	case OP_AT_S12E0R:
1568 		op = OP_AT_S1E0R;
1569 		write = false;
1570 		break;
1571 	case OP_AT_S12E0W:
1572 		op = OP_AT_S1E0W;
1573 		write = true;
1574 		break;
1575 	default:
1576 		WARN_ON_ONCE(1);
1577 		return 0;
1578 	}
1579 
1580 	__kvm_at_s1e01(vcpu, op, vaddr);
1581 	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1582 	if (par & SYS_PAR_EL1_F)
1583 		return 0;
1584 
1585 	/*
1586 	 * If we only have a single stage of translation (EL2&0), exit
1587 	 * early. Same thing if {VM,DC}=={0,0}.
1588 	 */
1589 	if (compute_translation_regime(vcpu, op) == TR_EL20 ||
1590 	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1591 		return 0;
1592 
1593 	/* Do the stage-2 translation */
1594 	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1595 	out.esr = 0;
1596 	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1597 	if (ret < 0)
1598 		return ret;
1599 
1600 	/* Check the access permission */
1601 	if (!out.esr &&
1602 	    ((!write && !out.readable) || (write && !out.writable)))
1603 		out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1604 
1605 	par = compute_par_s12(vcpu, par, &out);
1606 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1607 	return 0;
1608 }
1609 
1610 /*
1611  * Translate a VA for a given EL in a given translation regime, with
1612  * or without PAN. This requires wi->{regime, as_el0, pan} to be
1613  * set. The rest of the wi and wr should be 0-initialised.
1614  */
__kvm_translate_va(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)1615 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1616 		       struct s1_walk_result *wr, u64 va)
1617 {
1618 	int ret;
1619 
1620 	ret = setup_s1_walk(vcpu, wi, wr, va);
1621 	if (ret)
1622 		return ret;
1623 
1624 	if (wr->level == S1_MMU_DISABLED) {
1625 		wr->ur = wr->uw = wr->ux = true;
1626 		wr->pr = wr->pw = wr->px = true;
1627 	} else {
1628 		ret = walk_s1(vcpu, wi, wr, va);
1629 		if (ret)
1630 			return ret;
1631 
1632 		compute_s1_permissions(vcpu, wi, wr);
1633 	}
1634 
1635 	return 0;
1636 }
1637 
1638 struct desc_match {
1639 	u64	ipa;
1640 	int	level;
1641 };
1642 
match_s1_desc(struct s1_walk_context * ctxt,void * priv)1643 static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
1644 {
1645 	struct desc_match *dm = priv;
1646 	u64 ipa = dm->ipa;
1647 
1648 	/* Use S1 granule alignment */
1649 	ipa &= GENMASK(51, ctxt->wi->pgshift);
1650 
1651 	/* Not the IPA we're looking for? Continue. */
1652 	if (ipa != ctxt->table_ipa)
1653 		return 0;
1654 
1655 	/* Note the level and interrupt the walk */
1656 	dm->level = ctxt->level;
1657 	return -EINTR;
1658 }
1659 
__kvm_find_s1_desc_level(struct kvm_vcpu * vcpu,u64 va,u64 ipa,int * level)1660 int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
1661 {
1662 	struct desc_match dm = {
1663 		.ipa	= ipa,
1664 	};
1665 	struct s1_walk_info wi = {
1666 		.filter	= &(struct s1_walk_filter){
1667 			.fn	= match_s1_desc,
1668 			.priv	= &dm,
1669 		},
1670 		.as_el0	= false,
1671 		.pan	= false,
1672 	};
1673 	struct s1_walk_result wr = {};
1674 	int ret;
1675 
1676 	if (is_hyp_ctxt(vcpu))
1677 		wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
1678 	else
1679 		wi.regime = TR_EL10;
1680 
1681 	ret = setup_s1_walk(vcpu, &wi, &wr, va);
1682 	if (ret)
1683 		return ret;
1684 
1685 	/* We really expect the S1 MMU to be on here... */
1686 	if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
1687 		*level = 0;
1688 		return 0;
1689 	}
1690 
1691 	/* Walk the guest's PT, looking for a match along the way */
1692 	ret = walk_s1(vcpu, &wi, &wr, va);
1693 	switch (ret) {
1694 	case -EINTR:
1695 		/* We interrupted the walk on a match, return the level */
1696 		*level = dm.level;
1697 		return 0;
1698 	case 0:
1699 		/* The walk completed, we failed to find the entry */
1700 		return -ENOENT;
1701 	default:
1702 		/* Any other error... */
1703 		return ret;
1704 	}
1705 }
1706 
1707 #ifdef CONFIG_ARM64_LSE_ATOMICS
__lse_swap_desc(u64 __user * ptep,u64 old,u64 new)1708 static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
1709 {
1710 	u64 tmp = old;
1711 	int ret = 0;
1712 
1713 	uaccess_enable_privileged();
1714 
1715 	asm volatile(__LSE_PREAMBLE
1716 		     "1: cas	%[old], %[new], %[addr]\n"
1717 		     "2:\n"
1718 		     _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
1719 		     : [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
1720 		     : [new] "r" (new)
1721 		     : "memory");
1722 
1723 	uaccess_disable_privileged();
1724 
1725 	if (ret)
1726 		return ret;
1727 	if (tmp != old)
1728 		return -EAGAIN;
1729 
1730 	return ret;
1731 }
1732 #else
__lse_swap_desc(u64 __user * ptep,u64 old,u64 new)1733 static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
1734 {
1735 	return -EINVAL;
1736 }
1737 #endif
1738 
__llsc_swap_desc(u64 __user * ptep,u64 old,u64 new)1739 static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
1740 {
1741 	int ret = 1;
1742 	u64 tmp;
1743 
1744 	uaccess_enable_privileged();
1745 
1746 	asm volatile("prfm	pstl1strm, %[addr]\n"
1747 		     "1: ldxr	%[tmp], %[addr]\n"
1748 		     "sub	%[tmp], %[tmp], %[old]\n"
1749 		     "cbnz	%[tmp], 3f\n"
1750 		     "2: stlxr	%w[ret], %[new], %[addr]\n"
1751 		     "3:\n"
1752 		     _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
1753 		     _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
1754 		     : [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
1755 		     : [old] "r" (old), [new] "r" (new)
1756 		     : "memory");
1757 
1758 	uaccess_disable_privileged();
1759 
1760 	/* STLXR didn't update the descriptor, or the compare failed */
1761 	if (ret == 1)
1762 		return -EAGAIN;
1763 
1764 	return ret;
1765 }
1766 
__kvm_at_swap_desc(struct kvm * kvm,gpa_t ipa,u64 old,u64 new)1767 int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
1768 {
1769 	struct kvm_memory_slot *slot;
1770 	unsigned long hva;
1771 	u64 __user *ptep;
1772 	bool writable;
1773 	int offset;
1774 	gfn_t gfn;
1775 	int r;
1776 
1777 	lockdep_assert(srcu_read_lock_held(&kvm->srcu));
1778 
1779 	gfn = ipa >> PAGE_SHIFT;
1780 	offset = offset_in_page(ipa);
1781 	slot = gfn_to_memslot(kvm, gfn);
1782 	hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
1783 	if (kvm_is_error_hva(hva))
1784 		return -EINVAL;
1785 	if (!writable)
1786 		return -EPERM;
1787 
1788 	ptep = (u64 __user *)hva + offset;
1789 	if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
1790 		r = __lse_swap_desc(ptep, old, new);
1791 	else
1792 		r = __llsc_swap_desc(ptep, old, new);
1793 
1794 	if (r < 0)
1795 		return r;
1796 
1797 	mark_page_dirty_in_slot(kvm, slot, gfn);
1798 	return 0;
1799 }
1800