1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2017 - Linaro Ltd
4 * Author: Jintack Lim <jintack.lim@linaro.org>
5 */
6
7 #include <linux/kvm_host.h>
8
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12
13 enum trans_regime {
14 TR_EL10,
15 TR_EL20,
16 TR_EL2,
17 };
18
19 struct s1_walk_info {
20 u64 baddr;
21 enum trans_regime regime;
22 unsigned int max_oa_bits;
23 unsigned int pgshift;
24 unsigned int txsz;
25 int sl;
26 bool hpd;
27 bool e0poe;
28 bool poe;
29 bool pan;
30 bool be;
31 bool s2;
32 };
33
34 struct s1_walk_result {
35 union {
36 struct {
37 u64 desc;
38 u64 pa;
39 s8 level;
40 u8 APTable;
41 bool UXNTable;
42 bool PXNTable;
43 bool uwxn;
44 bool uov;
45 bool ur;
46 bool uw;
47 bool ux;
48 bool pwxn;
49 bool pov;
50 bool pr;
51 bool pw;
52 bool px;
53 };
54 struct {
55 u8 fst;
56 bool ptw;
57 bool s2;
58 };
59 };
60 bool failed;
61 };
62
fail_s1_walk(struct s1_walk_result * wr,u8 fst,bool ptw,bool s2)63 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2)
64 {
65 wr->fst = fst;
66 wr->ptw = ptw;
67 wr->s2 = s2;
68 wr->failed = true;
69 }
70
71 #define S1_MMU_DISABLED (-127)
72
get_ia_size(struct s1_walk_info * wi)73 static int get_ia_size(struct s1_walk_info *wi)
74 {
75 return 64 - wi->txsz;
76 }
77
78 /* Return true if the IPA is out of the OA range */
check_output_size(u64 ipa,struct s1_walk_info * wi)79 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
80 {
81 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
82 }
83
84 /* Return the translation regime that applies to an AT instruction */
compute_translation_regime(struct kvm_vcpu * vcpu,u32 op)85 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
86 {
87 /*
88 * We only get here from guest EL2, so the translation
89 * regime AT applies to is solely defined by {E2H,TGE}.
90 */
91 switch (op) {
92 case OP_AT_S1E2R:
93 case OP_AT_S1E2W:
94 case OP_AT_S1E2A:
95 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
96 break;
97 default:
98 return (vcpu_el2_e2h_is_set(vcpu) &&
99 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
100 }
101 }
102
s1pie_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)103 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
104 {
105 if (!kvm_has_s1pie(vcpu->kvm))
106 return false;
107
108 switch (regime) {
109 case TR_EL2:
110 case TR_EL20:
111 return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
112 case TR_EL10:
113 return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
114 (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
115 default:
116 BUG();
117 }
118 }
119
compute_s1poe(struct kvm_vcpu * vcpu,struct s1_walk_info * wi)120 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
121 {
122 u64 val;
123
124 if (!kvm_has_s1poe(vcpu->kvm)) {
125 wi->poe = wi->e0poe = false;
126 return;
127 }
128
129 switch (wi->regime) {
130 case TR_EL2:
131 case TR_EL20:
132 val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
133 wi->poe = val & TCR2_EL2_POE;
134 wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
135 break;
136 case TR_EL10:
137 if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
138 wi->poe = wi->e0poe = false;
139 return;
140 }
141
142 val = __vcpu_sys_reg(vcpu, TCR2_EL1);
143 wi->poe = val & TCR2_EL1x_POE;
144 wi->e0poe = val & TCR2_EL1x_E0POE;
145 }
146 }
147
setup_s1_walk(struct kvm_vcpu * vcpu,u32 op,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)148 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
149 struct s1_walk_result *wr, u64 va)
150 {
151 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
152 unsigned int stride, x;
153 bool va55, tbi, lva, as_el0;
154
155 hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
156
157 wi->regime = compute_translation_regime(vcpu, op);
158 as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
159 wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
160 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
161
162 va55 = va & BIT(55);
163
164 if (wi->regime == TR_EL2 && va55)
165 goto addrsz;
166
167 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
168
169 switch (wi->regime) {
170 case TR_EL10:
171 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
172 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
173 ttbr = (va55 ?
174 vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
175 vcpu_read_sys_reg(vcpu, TTBR0_EL1));
176 break;
177 case TR_EL2:
178 case TR_EL20:
179 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
180 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
181 ttbr = (va55 ?
182 vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
183 vcpu_read_sys_reg(vcpu, TTBR0_EL2));
184 break;
185 default:
186 BUG();
187 }
188
189 tbi = (wi->regime == TR_EL2 ?
190 FIELD_GET(TCR_EL2_TBI, tcr) :
191 (va55 ?
192 FIELD_GET(TCR_TBI1, tcr) :
193 FIELD_GET(TCR_TBI0, tcr)));
194
195 if (!tbi && (u64)sign_extend64(va, 55) != va)
196 goto addrsz;
197
198 va = (u64)sign_extend64(va, 55);
199
200 /* Let's put the MMU disabled case aside immediately */
201 switch (wi->regime) {
202 case TR_EL10:
203 /*
204 * If dealing with the EL1&0 translation regime, 3 things
205 * can disable the S1 translation:
206 *
207 * - HCR_EL2.DC = 1
208 * - HCR_EL2.{E2H,TGE} = {0,1}
209 * - SCTLR_EL1.M = 0
210 *
211 * The TGE part is interesting. If we have decided that this
212 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
213 * {0,x}, and we only need to test for TGE == 1.
214 */
215 if (hcr & (HCR_DC | HCR_TGE)) {
216 wr->level = S1_MMU_DISABLED;
217 break;
218 }
219 fallthrough;
220 case TR_EL2:
221 case TR_EL20:
222 if (!(sctlr & SCTLR_ELx_M))
223 wr->level = S1_MMU_DISABLED;
224 break;
225 }
226
227 if (wr->level == S1_MMU_DISABLED) {
228 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
229 goto addrsz;
230
231 wr->pa = va;
232 return 0;
233 }
234
235 wi->be = sctlr & SCTLR_ELx_EE;
236
237 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
238 wi->hpd &= (wi->regime == TR_EL2 ?
239 FIELD_GET(TCR_EL2_HPD, tcr) :
240 (va55 ?
241 FIELD_GET(TCR_HPD1, tcr) :
242 FIELD_GET(TCR_HPD0, tcr)));
243 /* R_JHSVW */
244 wi->hpd |= s1pie_enabled(vcpu, wi->regime);
245
246 /* Do we have POE? */
247 compute_s1poe(vcpu, wi);
248
249 /* R_BVXDG */
250 wi->hpd |= (wi->poe || wi->e0poe);
251
252 /* Someone was silly enough to encode TG0/TG1 differently */
253 if (va55) {
254 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
255 tg = FIELD_GET(TCR_TG1_MASK, tcr);
256
257 switch (tg << TCR_TG1_SHIFT) {
258 case TCR_TG1_4K:
259 wi->pgshift = 12; break;
260 case TCR_TG1_16K:
261 wi->pgshift = 14; break;
262 case TCR_TG1_64K:
263 default: /* IMPDEF: treat any other value as 64k */
264 wi->pgshift = 16; break;
265 }
266 } else {
267 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
268 tg = FIELD_GET(TCR_TG0_MASK, tcr);
269
270 switch (tg << TCR_TG0_SHIFT) {
271 case TCR_TG0_4K:
272 wi->pgshift = 12; break;
273 case TCR_TG0_16K:
274 wi->pgshift = 14; break;
275 case TCR_TG0_64K:
276 default: /* IMPDEF: treat any other value as 64k */
277 wi->pgshift = 16; break;
278 }
279 }
280
281 /* R_PLCGL, R_YXNYW */
282 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
283 if (wi->txsz > 39)
284 goto transfault_l0;
285 } else {
286 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
287 goto transfault_l0;
288 }
289
290 /* R_GTJBY, R_SXWGM */
291 switch (BIT(wi->pgshift)) {
292 case SZ_4K:
293 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
294 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
295 break;
296 case SZ_16K:
297 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
298 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
299 break;
300 case SZ_64K:
301 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
302 break;
303 }
304
305 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
306 goto transfault_l0;
307
308 ia_bits = get_ia_size(wi);
309
310 /* R_YYVYV, I_THCZK */
311 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
312 (va55 && va < GENMASK(63, ia_bits)))
313 goto transfault_l0;
314
315 /* I_ZFSYQ */
316 if (wi->regime != TR_EL2 &&
317 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
318 goto transfault_l0;
319
320 /* R_BNDVG and following statements */
321 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
322 as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
323 goto transfault_l0;
324
325 /* AArch64.S1StartLevel() */
326 stride = wi->pgshift - 3;
327 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
328
329 ps = (wi->regime == TR_EL2 ?
330 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
331
332 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
333
334 /* Compute minimal alignment */
335 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
336
337 wi->baddr = ttbr & TTBRx_EL1_BADDR;
338
339 /* R_VPBBF */
340 if (check_output_size(wi->baddr, wi))
341 goto addrsz;
342
343 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
344
345 return 0;
346
347 addrsz: /* Address Size Fault level 0 */
348 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false);
349 return -EFAULT;
350
351 transfault_l0: /* Translation Fault level 0 */
352 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false);
353 return -EFAULT;
354 }
355
walk_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)356 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
357 struct s1_walk_result *wr, u64 va)
358 {
359 u64 va_top, va_bottom, baddr, desc;
360 int level, stride, ret;
361
362 level = wi->sl;
363 stride = wi->pgshift - 3;
364 baddr = wi->baddr;
365
366 va_top = get_ia_size(wi) - 1;
367
368 while (1) {
369 u64 index, ipa;
370
371 va_bottom = (3 - level) * stride + wi->pgshift;
372 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
373
374 ipa = baddr | index;
375
376 if (wi->s2) {
377 struct kvm_s2_trans s2_trans = {};
378
379 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
380 if (ret) {
381 fail_s1_walk(wr,
382 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
383 true, true);
384 return ret;
385 }
386
387 if (!kvm_s2_trans_readable(&s2_trans)) {
388 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
389 true, true);
390
391 return -EPERM;
392 }
393
394 ipa = kvm_s2_trans_output(&s2_trans);
395 }
396
397 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
398 if (ret) {
399 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level),
400 true, false);
401 return ret;
402 }
403
404 if (wi->be)
405 desc = be64_to_cpu((__force __be64)desc);
406 else
407 desc = le64_to_cpu((__force __le64)desc);
408
409 /* Invalid descriptor */
410 if (!(desc & BIT(0)))
411 goto transfault;
412
413 /* Block mapping, check validity down the line */
414 if (!(desc & BIT(1)))
415 break;
416
417 /* Page mapping */
418 if (level == 3)
419 break;
420
421 /* Table handling */
422 if (!wi->hpd) {
423 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
424 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
425 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
426 }
427
428 baddr = desc & GENMASK_ULL(47, wi->pgshift);
429
430 /* Check for out-of-range OA */
431 if (check_output_size(baddr, wi))
432 goto addrsz;
433
434 /* Prepare for next round */
435 va_top = va_bottom - 1;
436 level++;
437 }
438
439 /* Block mapping, check the validity of the level */
440 if (!(desc & BIT(1))) {
441 bool valid_block = false;
442
443 switch (BIT(wi->pgshift)) {
444 case SZ_4K:
445 valid_block = level == 1 || level == 2;
446 break;
447 case SZ_16K:
448 case SZ_64K:
449 valid_block = level == 2;
450 break;
451 }
452
453 if (!valid_block)
454 goto transfault;
455 }
456
457 if (check_output_size(desc & GENMASK(47, va_bottom), wi))
458 goto addrsz;
459
460 va_bottom += contiguous_bit_shift(desc, wi, level);
461
462 wr->failed = false;
463 wr->level = level;
464 wr->desc = desc;
465 wr->pa = desc & GENMASK(47, va_bottom);
466 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
467
468 return 0;
469
470 addrsz:
471 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false);
472 return -EINVAL;
473 transfault:
474 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false);
475 return -ENOENT;
476 }
477
478 struct mmu_config {
479 u64 ttbr0;
480 u64 ttbr1;
481 u64 tcr;
482 u64 mair;
483 u64 tcr2;
484 u64 pir;
485 u64 pire0;
486 u64 por_el0;
487 u64 por_el1;
488 u64 sctlr;
489 u64 vttbr;
490 u64 vtcr;
491 u64 hcr;
492 };
493
__mmu_config_save(struct mmu_config * config)494 static void __mmu_config_save(struct mmu_config *config)
495 {
496 config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
497 config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
498 config->tcr = read_sysreg_el1(SYS_TCR);
499 config->mair = read_sysreg_el1(SYS_MAIR);
500 if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
501 config->tcr2 = read_sysreg_el1(SYS_TCR2);
502 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
503 config->pir = read_sysreg_el1(SYS_PIR);
504 config->pire0 = read_sysreg_el1(SYS_PIRE0);
505 }
506 if (system_supports_poe()) {
507 config->por_el1 = read_sysreg_el1(SYS_POR);
508 config->por_el0 = read_sysreg_s(SYS_POR_EL0);
509 }
510 }
511 config->sctlr = read_sysreg_el1(SYS_SCTLR);
512 config->vttbr = read_sysreg(vttbr_el2);
513 config->vtcr = read_sysreg(vtcr_el2);
514 config->hcr = read_sysreg(hcr_el2);
515 }
516
__mmu_config_restore(struct mmu_config * config)517 static void __mmu_config_restore(struct mmu_config *config)
518 {
519 write_sysreg(config->hcr, hcr_el2);
520
521 /*
522 * ARM errata 1165522 and 1530923 require TGE to be 1 before
523 * we update the guest state.
524 */
525 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
526
527 write_sysreg_el1(config->ttbr0, SYS_TTBR0);
528 write_sysreg_el1(config->ttbr1, SYS_TTBR1);
529 write_sysreg_el1(config->tcr, SYS_TCR);
530 write_sysreg_el1(config->mair, SYS_MAIR);
531 if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
532 write_sysreg_el1(config->tcr2, SYS_TCR2);
533 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
534 write_sysreg_el1(config->pir, SYS_PIR);
535 write_sysreg_el1(config->pire0, SYS_PIRE0);
536 }
537 if (system_supports_poe()) {
538 write_sysreg_el1(config->por_el1, SYS_POR);
539 write_sysreg_s(config->por_el0, SYS_POR_EL0);
540 }
541 }
542 write_sysreg_el1(config->sctlr, SYS_SCTLR);
543 write_sysreg(config->vttbr, vttbr_el2);
544 write_sysreg(config->vtcr, vtcr_el2);
545 }
546
at_s1e1p_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)547 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
548 {
549 u64 host_pan;
550 bool fail;
551
552 host_pan = read_sysreg_s(SYS_PSTATE_PAN);
553 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
554
555 switch (op) {
556 case OP_AT_S1E1RP:
557 fail = __kvm_at(OP_AT_S1E1RP, vaddr);
558 break;
559 case OP_AT_S1E1WP:
560 fail = __kvm_at(OP_AT_S1E1WP, vaddr);
561 break;
562 }
563
564 write_sysreg_s(host_pan, SYS_PSTATE_PAN);
565
566 return fail;
567 }
568
569 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
570 #define MEMATTR_NC 0b0100
571 #define MEMATTR_Wt 0b1000
572 #define MEMATTR_Wb 0b1100
573 #define MEMATTR_WbRaWa 0b1111
574
575 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
576
s2_memattr_to_attr(u8 memattr)577 static u8 s2_memattr_to_attr(u8 memattr)
578 {
579 memattr &= 0b1111;
580
581 switch (memattr) {
582 case 0b0000:
583 case 0b0001:
584 case 0b0010:
585 case 0b0011:
586 return memattr << 2;
587 case 0b0100:
588 return MEMATTR(Wb, Wb);
589 case 0b0101:
590 return MEMATTR(NC, NC);
591 case 0b0110:
592 return MEMATTR(Wt, NC);
593 case 0b0111:
594 return MEMATTR(Wb, NC);
595 case 0b1000:
596 /* Reserved, assume NC */
597 return MEMATTR(NC, NC);
598 case 0b1001:
599 return MEMATTR(NC, Wt);
600 case 0b1010:
601 return MEMATTR(Wt, Wt);
602 case 0b1011:
603 return MEMATTR(Wb, Wt);
604 case 0b1100:
605 /* Reserved, assume NC */
606 return MEMATTR(NC, NC);
607 case 0b1101:
608 return MEMATTR(NC, Wb);
609 case 0b1110:
610 return MEMATTR(Wt, Wb);
611 case 0b1111:
612 return MEMATTR(Wb, Wb);
613 default:
614 unreachable();
615 }
616 }
617
combine_s1_s2_attr(u8 s1,u8 s2)618 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
619 {
620 bool transient;
621 u8 final = 0;
622
623 /* Upgrade transient s1 to non-transient to simplify things */
624 switch (s1) {
625 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
626 transient = true;
627 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
628 break;
629 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
630 transient = true;
631 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
632 break;
633 default:
634 transient = false;
635 }
636
637 /* S2CombineS1AttrHints() */
638 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
639 (s2 & GENMASK(3, 2)) == MEMATTR_NC)
640 final = MEMATTR_NC;
641 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
642 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
643 final = MEMATTR_Wt;
644 else
645 final = MEMATTR_Wb;
646
647 if (final != MEMATTR_NC) {
648 /* Inherit RaWa hints form S1 */
649 if (transient) {
650 switch (s1 & GENMASK(3, 2)) {
651 case MEMATTR_Wt:
652 final = 0;
653 break;
654 case MEMATTR_Wb:
655 final = MEMATTR_NC;
656 break;
657 }
658 }
659
660 final |= s1 & GENMASK(1, 0);
661 }
662
663 return final;
664 }
665
666 #define ATTR_NSH 0b00
667 #define ATTR_RSV 0b01
668 #define ATTR_OSH 0b10
669 #define ATTR_ISH 0b11
670
compute_sh(u8 attr,u64 desc)671 static u8 compute_sh(u8 attr, u64 desc)
672 {
673 u8 sh;
674
675 /* Any form of device, as well as NC has SH[1:0]=0b10 */
676 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
677 return ATTR_OSH;
678
679 sh = FIELD_GET(PTE_SHARED, desc);
680 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
681 sh = ATTR_NSH;
682
683 return sh;
684 }
685
combine_sh(u8 s1_sh,u8 s2_sh)686 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
687 {
688 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
689 return ATTR_OSH;
690 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
691 return ATTR_ISH;
692
693 return ATTR_NSH;
694 }
695
compute_par_s12(struct kvm_vcpu * vcpu,u64 s1_par,struct kvm_s2_trans * tr)696 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
697 struct kvm_s2_trans *tr)
698 {
699 u8 s1_parattr, s2_memattr, final_attr;
700 u64 par;
701
702 /* If S2 has failed to translate, report the damage */
703 if (tr->esr) {
704 par = SYS_PAR_EL1_RES1;
705 par |= SYS_PAR_EL1_F;
706 par |= SYS_PAR_EL1_S;
707 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
708 return par;
709 }
710
711 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
712 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
713
714 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
715 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
716 s2_memattr &= ~BIT(3);
717
718 /* Combination of R_VRJSW and R_RHWZM */
719 switch (s2_memattr) {
720 case 0b0101:
721 if (MEMATTR_IS_DEVICE(s1_parattr))
722 final_attr = s1_parattr;
723 else
724 final_attr = MEMATTR(NC, NC);
725 break;
726 case 0b0110:
727 case 0b1110:
728 final_attr = MEMATTR(WbRaWa, WbRaWa);
729 break;
730 case 0b0111:
731 case 0b1111:
732 /* Preserve S1 attribute */
733 final_attr = s1_parattr;
734 break;
735 case 0b0100:
736 case 0b1100:
737 case 0b1101:
738 /* Reserved, do something non-silly */
739 final_attr = s1_parattr;
740 break;
741 default:
742 /*
743 * MemAttr[2]=0, Device from S2.
744 *
745 * FWB does not influence the way that stage 1
746 * memory types and attributes are combined
747 * with stage 2 Device type and attributes.
748 */
749 final_attr = min(s2_memattr_to_attr(s2_memattr),
750 s1_parattr);
751 }
752 } else {
753 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
754 u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
755
756 if (MEMATTR_IS_DEVICE(s1_parattr) ||
757 MEMATTR_IS_DEVICE(s2_parattr)) {
758 final_attr = min(s1_parattr, s2_parattr);
759 } else {
760 /* At this stage, this is memory vs memory */
761 final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
762 s2_parattr & 0xf);
763 final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
764 s2_parattr >> 4) << 4;
765 }
766 }
767
768 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
769 !MEMATTR_IS_DEVICE(final_attr))
770 final_attr = MEMATTR(NC, NC);
771
772 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
773 par |= tr->output & GENMASK(47, 12);
774 par |= FIELD_PREP(SYS_PAR_EL1_SH,
775 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
776 compute_sh(final_attr, tr->desc)));
777
778 return par;
779 }
780
compute_par_s1(struct kvm_vcpu * vcpu,struct s1_walk_result * wr,enum trans_regime regime)781 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
782 enum trans_regime regime)
783 {
784 u64 par;
785
786 if (wr->failed) {
787 par = SYS_PAR_EL1_RES1;
788 par |= SYS_PAR_EL1_F;
789 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
790 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
791 par |= wr->s2 ? SYS_PAR_EL1_S : 0;
792 } else if (wr->level == S1_MMU_DISABLED) {
793 /* MMU off or HCR_EL2.DC == 1 */
794 par = SYS_PAR_EL1_NSE;
795 par |= wr->pa & GENMASK_ULL(47, 12);
796
797 if (regime == TR_EL10 &&
798 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
799 par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
800 MEMATTR(WbRaWa, WbRaWa));
801 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
802 } else {
803 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
804 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
805 }
806 } else {
807 u64 mair, sctlr;
808 u8 sh;
809
810 par = SYS_PAR_EL1_NSE;
811
812 mair = (regime == TR_EL10 ?
813 vcpu_read_sys_reg(vcpu, MAIR_EL1) :
814 vcpu_read_sys_reg(vcpu, MAIR_EL2));
815
816 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
817 mair &= 0xff;
818
819 sctlr = (regime == TR_EL10 ?
820 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
821 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
822
823 /* Force NC for memory if SCTLR_ELx.C is clear */
824 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
825 mair = MEMATTR(NC, NC);
826
827 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
828 par |= wr->pa & GENMASK_ULL(47, 12);
829
830 sh = compute_sh(mair, wr->desc);
831 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
832 }
833
834 return par;
835 }
836
pan3_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)837 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
838 {
839 u64 sctlr;
840
841 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
842 return false;
843
844 if (s1pie_enabled(vcpu, regime))
845 return true;
846
847 if (regime == TR_EL10)
848 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
849 else
850 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
851
852 return sctlr & SCTLR_EL1_EPAN;
853 }
854
compute_s1_direct_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)855 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
856 struct s1_walk_info *wi,
857 struct s1_walk_result *wr)
858 {
859 bool wxn;
860
861 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
862 if (wi->regime != TR_EL2) {
863 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
864 case 0b00:
865 wr->pr = wr->pw = true;
866 wr->ur = wr->uw = false;
867 break;
868 case 0b01:
869 wr->pr = wr->pw = wr->ur = wr->uw = true;
870 break;
871 case 0b10:
872 wr->pr = true;
873 wr->pw = wr->ur = wr->uw = false;
874 break;
875 case 0b11:
876 wr->pr = wr->ur = true;
877 wr->pw = wr->uw = false;
878 break;
879 }
880
881 /* We don't use px for anything yet, but hey... */
882 wr->px = !((wr->desc & PTE_PXN) || wr->uw);
883 wr->ux = !(wr->desc & PTE_UXN);
884 } else {
885 wr->ur = wr->uw = wr->ux = false;
886
887 if (!(wr->desc & PTE_RDONLY)) {
888 wr->pr = wr->pw = true;
889 } else {
890 wr->pr = true;
891 wr->pw = false;
892 }
893
894 /* XN maps to UXN */
895 wr->px = !(wr->desc & PTE_UXN);
896 }
897
898 switch (wi->regime) {
899 case TR_EL2:
900 case TR_EL20:
901 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
902 break;
903 case TR_EL10:
904 wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
905 break;
906 }
907
908 wr->pwxn = wr->uwxn = wxn;
909 wr->pov = wi->poe;
910 wr->uov = wi->e0poe;
911 }
912
compute_s1_hierarchical_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)913 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
914 struct s1_walk_info *wi,
915 struct s1_walk_result *wr)
916 {
917 /* Hierarchical part of AArch64.S1DirectBasePermissions() */
918 if (wi->regime != TR_EL2) {
919 switch (wr->APTable) {
920 case 0b00:
921 break;
922 case 0b01:
923 wr->ur = wr->uw = false;
924 break;
925 case 0b10:
926 wr->pw = wr->uw = false;
927 break;
928 case 0b11:
929 wr->pw = wr->ur = wr->uw = false;
930 break;
931 }
932
933 wr->px &= !wr->PXNTable;
934 wr->ux &= !wr->UXNTable;
935 } else {
936 if (wr->APTable & BIT(1))
937 wr->pw = false;
938
939 /* XN maps to UXN */
940 wr->px &= !wr->UXNTable;
941 }
942 }
943
944 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
945
946 #define set_priv_perms(wr, r, w, x) \
947 do { \
948 (wr)->pr = (r); \
949 (wr)->pw = (w); \
950 (wr)->px = (x); \
951 } while (0)
952
953 #define set_unpriv_perms(wr, r, w, x) \
954 do { \
955 (wr)->ur = (r); \
956 (wr)->uw = (w); \
957 (wr)->ux = (x); \
958 } while (0)
959
960 #define set_priv_wxn(wr, v) \
961 do { \
962 (wr)->pwxn = (v); \
963 } while (0)
964
965 #define set_unpriv_wxn(wr, v) \
966 do { \
967 (wr)->uwxn = (v); \
968 } while (0)
969
970 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
971 #define set_perms(w, wr, ip) \
972 do { \
973 /* R_LLZDZ */ \
974 switch ((ip)) { \
975 case 0b0000: \
976 set_ ## w ## _perms((wr), false, false, false); \
977 break; \
978 case 0b0001: \
979 set_ ## w ## _perms((wr), true , false, false); \
980 break; \
981 case 0b0010: \
982 set_ ## w ## _perms((wr), false, false, true ); \
983 break; \
984 case 0b0011: \
985 set_ ## w ## _perms((wr), true , false, true ); \
986 break; \
987 case 0b0100: \
988 set_ ## w ## _perms((wr), false, false, false); \
989 break; \
990 case 0b0101: \
991 set_ ## w ## _perms((wr), true , true , false); \
992 break; \
993 case 0b0110: \
994 set_ ## w ## _perms((wr), true , true , true ); \
995 break; \
996 case 0b0111: \
997 set_ ## w ## _perms((wr), true , true , true ); \
998 break; \
999 case 0b1000: \
1000 set_ ## w ## _perms((wr), true , false, false); \
1001 break; \
1002 case 0b1001: \
1003 set_ ## w ## _perms((wr), true , false, false); \
1004 break; \
1005 case 0b1010: \
1006 set_ ## w ## _perms((wr), true , false, true ); \
1007 break; \
1008 case 0b1011: \
1009 set_ ## w ## _perms((wr), false, false, false); \
1010 break; \
1011 case 0b1100: \
1012 set_ ## w ## _perms((wr), true , true , false); \
1013 break; \
1014 case 0b1101: \
1015 set_ ## w ## _perms((wr), false, false, false); \
1016 break; \
1017 case 0b1110: \
1018 set_ ## w ## _perms((wr), true , true , true ); \
1019 break; \
1020 case 0b1111: \
1021 set_ ## w ## _perms((wr), false, false, false); \
1022 break; \
1023 } \
1024 \
1025 /* R_HJYGR */ \
1026 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
1027 \
1028 } while (0)
1029
compute_s1_indirect_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1030 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1031 struct s1_walk_info *wi,
1032 struct s1_walk_result *wr)
1033 {
1034 u8 up, pp, idx;
1035
1036 idx = pte_pi_index(wr->desc);
1037
1038 switch (wi->regime) {
1039 case TR_EL10:
1040 pp = perm_idx(vcpu, PIR_EL1, idx);
1041 up = perm_idx(vcpu, PIRE0_EL1, idx);
1042 break;
1043 case TR_EL20:
1044 pp = perm_idx(vcpu, PIR_EL2, idx);
1045 up = perm_idx(vcpu, PIRE0_EL2, idx);
1046 break;
1047 case TR_EL2:
1048 pp = perm_idx(vcpu, PIR_EL2, idx);
1049 up = 0;
1050 break;
1051 }
1052
1053 set_perms(priv, wr, pp);
1054
1055 if (wi->regime != TR_EL2)
1056 set_perms(unpriv, wr, up);
1057 else
1058 set_unpriv_perms(wr, false, false, false);
1059
1060 wr->pov = wi->poe && !(pp & BIT(3));
1061 wr->uov = wi->e0poe && !(up & BIT(3));
1062
1063 /* R_VFPJF */
1064 if (wr->px && wr->uw) {
1065 set_priv_perms(wr, false, false, false);
1066 set_unpriv_perms(wr, false, false, false);
1067 }
1068 }
1069
compute_s1_overlay_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1070 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1071 struct s1_walk_info *wi,
1072 struct s1_walk_result *wr)
1073 {
1074 u8 idx, pov_perms, uov_perms;
1075
1076 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1077
1078 switch (wi->regime) {
1079 case TR_EL10:
1080 pov_perms = perm_idx(vcpu, POR_EL1, idx);
1081 uov_perms = perm_idx(vcpu, POR_EL0, idx);
1082 break;
1083 case TR_EL20:
1084 pov_perms = perm_idx(vcpu, POR_EL2, idx);
1085 uov_perms = perm_idx(vcpu, POR_EL0, idx);
1086 break;
1087 case TR_EL2:
1088 pov_perms = perm_idx(vcpu, POR_EL2, idx);
1089 uov_perms = 0;
1090 break;
1091 }
1092
1093 if (pov_perms & ~POE_RXW)
1094 pov_perms = POE_NONE;
1095
1096 if (wi->poe && wr->pov) {
1097 wr->pr &= pov_perms & POE_R;
1098 wr->px &= pov_perms & POE_X;
1099 wr->pw &= pov_perms & POE_W;
1100 }
1101
1102 if (uov_perms & ~POE_RXW)
1103 uov_perms = POE_NONE;
1104
1105 if (wi->e0poe && wr->uov) {
1106 wr->ur &= uov_perms & POE_R;
1107 wr->ux &= uov_perms & POE_X;
1108 wr->uw &= uov_perms & POE_W;
1109 }
1110 }
1111
compute_s1_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1112 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1113 struct s1_walk_info *wi,
1114 struct s1_walk_result *wr)
1115 {
1116 bool pan;
1117
1118 if (!s1pie_enabled(vcpu, wi->regime))
1119 compute_s1_direct_permissions(vcpu, wi, wr);
1120 else
1121 compute_s1_indirect_permissions(vcpu, wi, wr);
1122
1123 if (!wi->hpd)
1124 compute_s1_hierarchical_permissions(vcpu, wi, wr);
1125
1126 if (wi->poe || wi->e0poe)
1127 compute_s1_overlay_permissions(vcpu, wi, wr);
1128
1129 /* R_QXXPC */
1130 if (wr->pwxn) {
1131 if (!wr->pov && wr->pw)
1132 wr->px = false;
1133 if (wr->pov && wr->px)
1134 wr->pw = false;
1135 }
1136
1137 /* R_NPBXC */
1138 if (wr->uwxn) {
1139 if (!wr->uov && wr->uw)
1140 wr->ux = false;
1141 if (wr->uov && wr->ux)
1142 wr->uw = false;
1143 }
1144
1145 pan = wi->pan && (wr->ur || wr->uw ||
1146 (pan3_enabled(vcpu, wi->regime) && wr->ux));
1147 wr->pw &= !pan;
1148 wr->pr &= !pan;
1149 }
1150
handle_at_slow(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1151 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1152 {
1153 struct s1_walk_result wr = {};
1154 struct s1_walk_info wi = {};
1155 bool perm_fail = false;
1156 int ret, idx;
1157
1158 ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
1159 if (ret)
1160 goto compute_par;
1161
1162 if (wr.level == S1_MMU_DISABLED)
1163 goto compute_par;
1164
1165 idx = srcu_read_lock(&vcpu->kvm->srcu);
1166
1167 ret = walk_s1(vcpu, &wi, &wr, vaddr);
1168
1169 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1170
1171 if (ret)
1172 goto compute_par;
1173
1174 compute_s1_permissions(vcpu, &wi, &wr);
1175
1176 switch (op) {
1177 case OP_AT_S1E1RP:
1178 case OP_AT_S1E1R:
1179 case OP_AT_S1E2R:
1180 perm_fail = !wr.pr;
1181 break;
1182 case OP_AT_S1E1WP:
1183 case OP_AT_S1E1W:
1184 case OP_AT_S1E2W:
1185 perm_fail = !wr.pw;
1186 break;
1187 case OP_AT_S1E0R:
1188 perm_fail = !wr.ur;
1189 break;
1190 case OP_AT_S1E0W:
1191 perm_fail = !wr.uw;
1192 break;
1193 case OP_AT_S1E1A:
1194 case OP_AT_S1E2A:
1195 break;
1196 default:
1197 BUG();
1198 }
1199
1200 if (perm_fail)
1201 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false);
1202
1203 compute_par:
1204 return compute_par_s1(vcpu, &wr, wi.regime);
1205 }
1206
1207 /*
1208 * Return the PAR_EL1 value as the result of a valid translation.
1209 *
1210 * If the translation is unsuccessful, the value may only contain
1211 * PAR_EL1.F, and cannot be taken at face value. It isn't an
1212 * indication of the translation having failed, only that the fast
1213 * path did not succeed, *unless* it indicates a S1 permission fault.
1214 */
__kvm_at_s1e01_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1215 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1216 {
1217 struct mmu_config config;
1218 struct kvm_s2_mmu *mmu;
1219 bool fail;
1220 u64 par;
1221
1222 par = SYS_PAR_EL1_F;
1223
1224 /*
1225 * We've trapped, so everything is live on the CPU. As we will
1226 * be switching contexts behind everybody's back, disable
1227 * interrupts while holding the mmu lock.
1228 */
1229 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1230
1231 /*
1232 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1233 * the right one (as we trapped from vEL2). If not, save the
1234 * full MMU context.
1235 */
1236 if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
1237 goto skip_mmu_switch;
1238
1239 /*
1240 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1241 * find it (recycled by another vcpu, for example). When this
1242 * happens, admit defeat immediately and use the SW (slow) path.
1243 */
1244 mmu = lookup_s2_mmu(vcpu);
1245 if (!mmu)
1246 return par;
1247
1248 __mmu_config_save(&config);
1249
1250 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
1251 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
1252 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
1253 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
1254 if (kvm_has_tcr2(vcpu->kvm)) {
1255 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1256 if (kvm_has_s1pie(vcpu->kvm)) {
1257 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1258 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1259 }
1260 if (kvm_has_s1poe(vcpu->kvm)) {
1261 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1262 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1263 }
1264 }
1265 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
1266 __load_stage2(mmu, mmu->arch);
1267
1268 skip_mmu_switch:
1269 /* Clear TGE, enable S2 translation, we're rolling */
1270 write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2);
1271 isb();
1272
1273 switch (op) {
1274 case OP_AT_S1E1RP:
1275 case OP_AT_S1E1WP:
1276 fail = at_s1e1p_fast(vcpu, op, vaddr);
1277 break;
1278 case OP_AT_S1E1R:
1279 fail = __kvm_at(OP_AT_S1E1R, vaddr);
1280 break;
1281 case OP_AT_S1E1W:
1282 fail = __kvm_at(OP_AT_S1E1W, vaddr);
1283 break;
1284 case OP_AT_S1E0R:
1285 fail = __kvm_at(OP_AT_S1E0R, vaddr);
1286 break;
1287 case OP_AT_S1E0W:
1288 fail = __kvm_at(OP_AT_S1E0W, vaddr);
1289 break;
1290 case OP_AT_S1E1A:
1291 fail = __kvm_at(OP_AT_S1E1A, vaddr);
1292 break;
1293 default:
1294 WARN_ON_ONCE(1);
1295 fail = true;
1296 break;
1297 }
1298
1299 if (!fail)
1300 par = read_sysreg_par();
1301
1302 if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
1303 __mmu_config_restore(&config);
1304
1305 return par;
1306 }
1307
par_check_s1_perm_fault(u64 par)1308 static bool par_check_s1_perm_fault(u64 par)
1309 {
1310 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1311
1312 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1313 !(par & SYS_PAR_EL1_S));
1314 }
1315
__kvm_at_s1e01(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1316 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1317 {
1318 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1319
1320 /*
1321 * If PAR_EL1 reports that AT failed on a S1 permission fault, we
1322 * know for sure that the PTW was able to walk the S1 tables and
1323 * there's nothing else to do.
1324 *
1325 * If AT failed for any other reason, then we must walk the guest S1
1326 * to emulate the instruction.
1327 */
1328 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1329 par = handle_at_slow(vcpu, op, vaddr);
1330
1331 vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1332 }
1333
__kvm_at_s1e2(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1334 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1335 {
1336 u64 par;
1337
1338 /*
1339 * We've trapped, so everything is live on the CPU. As we will be
1340 * switching context behind everybody's back, disable interrupts...
1341 */
1342 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1343 u64 val, hcr;
1344 bool fail;
1345
1346 val = hcr = read_sysreg(hcr_el2);
1347 val &= ~HCR_TGE;
1348 val |= HCR_VM;
1349
1350 if (!vcpu_el2_e2h_is_set(vcpu))
1351 val |= HCR_NV | HCR_NV1;
1352
1353 write_sysreg(val, hcr_el2);
1354 isb();
1355
1356 par = SYS_PAR_EL1_F;
1357
1358 switch (op) {
1359 case OP_AT_S1E2R:
1360 fail = __kvm_at(OP_AT_S1E1R, vaddr);
1361 break;
1362 case OP_AT_S1E2W:
1363 fail = __kvm_at(OP_AT_S1E1W, vaddr);
1364 break;
1365 case OP_AT_S1E2A:
1366 fail = __kvm_at(OP_AT_S1E1A, vaddr);
1367 break;
1368 default:
1369 WARN_ON_ONCE(1);
1370 fail = true;
1371 }
1372
1373 isb();
1374
1375 if (!fail)
1376 par = read_sysreg_par();
1377
1378 write_sysreg(hcr, hcr_el2);
1379 isb();
1380 }
1381
1382 /* We failed the translation, let's replay it in slow motion */
1383 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1384 par = handle_at_slow(vcpu, op, vaddr);
1385
1386 vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1387 }
1388
__kvm_at_s12(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1389 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1390 {
1391 struct kvm_s2_trans out = {};
1392 u64 ipa, par;
1393 bool write;
1394 int ret;
1395
1396 /* Do the stage-1 translation */
1397 switch (op) {
1398 case OP_AT_S12E1R:
1399 op = OP_AT_S1E1R;
1400 write = false;
1401 break;
1402 case OP_AT_S12E1W:
1403 op = OP_AT_S1E1W;
1404 write = true;
1405 break;
1406 case OP_AT_S12E0R:
1407 op = OP_AT_S1E0R;
1408 write = false;
1409 break;
1410 case OP_AT_S12E0W:
1411 op = OP_AT_S1E0W;
1412 write = true;
1413 break;
1414 default:
1415 WARN_ON_ONCE(1);
1416 return;
1417 }
1418
1419 __kvm_at_s1e01(vcpu, op, vaddr);
1420 par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1421 if (par & SYS_PAR_EL1_F)
1422 return;
1423
1424 /*
1425 * If we only have a single stage of translation (E2H=0 or
1426 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1427 */
1428 if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
1429 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1430 return;
1431
1432 /* Do the stage-2 translation */
1433 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1434 out.esr = 0;
1435 ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1436 if (ret < 0)
1437 return;
1438
1439 /* Check the access permission */
1440 if (!out.esr &&
1441 ((!write && !out.readable) || (write && !out.writable)))
1442 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1443
1444 par = compute_par_s12(vcpu, par, &out);
1445 vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1446 }
1447