1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2017 - Linaro Ltd
4 * Author: Jintack Lim <jintack.lim@linaro.org>
5 */
6
7 #include <linux/kvm_host.h>
8
9 #include <asm/esr.h>
10 #include <asm/kvm_hyp.h>
11 #include <asm/kvm_mmu.h>
12
fail_s1_walk(struct s1_walk_result * wr,u8 fst,bool s1ptw)13 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw)
14 {
15 wr->fst = fst;
16 wr->ptw = s1ptw;
17 wr->s2 = s1ptw;
18 wr->failed = true;
19 }
20
21 #define S1_MMU_DISABLED (-127)
22
get_ia_size(struct s1_walk_info * wi)23 static int get_ia_size(struct s1_walk_info *wi)
24 {
25 return 64 - wi->txsz;
26 }
27
28 /* Return true if the IPA is out of the OA range */
check_output_size(u64 ipa,struct s1_walk_info * wi)29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
30 {
31 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
32 }
33
34 /* Return the translation regime that applies to an AT instruction */
compute_translation_regime(struct kvm_vcpu * vcpu,u32 op)35 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
36 {
37 /*
38 * We only get here from guest EL2, so the translation
39 * regime AT applies to is solely defined by {E2H,TGE}.
40 */
41 switch (op) {
42 case OP_AT_S1E2R:
43 case OP_AT_S1E2W:
44 case OP_AT_S1E2A:
45 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2;
46 break;
47 default:
48 return (vcpu_el2_e2h_is_set(vcpu) &&
49 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10;
50 }
51 }
52
s1pie_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)53 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
54 {
55 if (!kvm_has_s1pie(vcpu->kvm))
56 return false;
57
58 switch (regime) {
59 case TR_EL2:
60 case TR_EL20:
61 return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
62 case TR_EL10:
63 return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
64 (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
65 default:
66 BUG();
67 }
68 }
69
compute_s1poe(struct kvm_vcpu * vcpu,struct s1_walk_info * wi)70 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
71 {
72 u64 val;
73
74 if (!kvm_has_s1poe(vcpu->kvm)) {
75 wi->poe = wi->e0poe = false;
76 return;
77 }
78
79 switch (wi->regime) {
80 case TR_EL2:
81 case TR_EL20:
82 val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
83 wi->poe = val & TCR2_EL2_POE;
84 wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
85 break;
86 case TR_EL10:
87 if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
88 wi->poe = wi->e0poe = false;
89 return;
90 }
91
92 val = __vcpu_sys_reg(vcpu, TCR2_EL1);
93 wi->poe = val & TCR2_EL1_POE;
94 wi->e0poe = val & TCR2_EL1_E0POE;
95 }
96 }
97
setup_s1_walk(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)98 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
99 struct s1_walk_result *wr, u64 va)
100 {
101 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr;
102 unsigned int stride, x;
103 bool va55, tbi, lva;
104
105 hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
106
107 va55 = va & BIT(55);
108
109 if (wi->regime == TR_EL2 && va55)
110 goto addrsz;
111
112 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
113
114 switch (wi->regime) {
115 case TR_EL10:
116 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
117 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
118 ttbr = (va55 ?
119 vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
120 vcpu_read_sys_reg(vcpu, TTBR0_EL1));
121 break;
122 case TR_EL2:
123 case TR_EL20:
124 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
125 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
126 ttbr = (va55 ?
127 vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
128 vcpu_read_sys_reg(vcpu, TTBR0_EL2));
129 break;
130 default:
131 BUG();
132 }
133
134 tbi = (wi->regime == TR_EL2 ?
135 FIELD_GET(TCR_EL2_TBI, tcr) :
136 (va55 ?
137 FIELD_GET(TCR_TBI1, tcr) :
138 FIELD_GET(TCR_TBI0, tcr)));
139
140 if (!tbi && (u64)sign_extend64(va, 55) != va)
141 goto addrsz;
142
143 va = (u64)sign_extend64(va, 55);
144
145 /* Let's put the MMU disabled case aside immediately */
146 switch (wi->regime) {
147 case TR_EL10:
148 /*
149 * If dealing with the EL1&0 translation regime, 3 things
150 * can disable the S1 translation:
151 *
152 * - HCR_EL2.DC = 1
153 * - HCR_EL2.{E2H,TGE} = {0,1}
154 * - SCTLR_EL1.M = 0
155 *
156 * The TGE part is interesting. If we have decided that this
157 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or
158 * {0,x}, and we only need to test for TGE == 1.
159 */
160 if (hcr & (HCR_DC | HCR_TGE)) {
161 wr->level = S1_MMU_DISABLED;
162 break;
163 }
164 fallthrough;
165 case TR_EL2:
166 case TR_EL20:
167 if (!(sctlr & SCTLR_ELx_M))
168 wr->level = S1_MMU_DISABLED;
169 break;
170 }
171
172 if (wr->level == S1_MMU_DISABLED) {
173 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm)))
174 goto addrsz;
175
176 wr->pa = va;
177 return 0;
178 }
179
180 wi->be = sctlr & SCTLR_ELx_EE;
181
182 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP);
183 wi->hpd &= (wi->regime == TR_EL2 ?
184 FIELD_GET(TCR_EL2_HPD, tcr) :
185 (va55 ?
186 FIELD_GET(TCR_HPD1, tcr) :
187 FIELD_GET(TCR_HPD0, tcr)));
188 /* R_JHSVW */
189 wi->hpd |= s1pie_enabled(vcpu, wi->regime);
190
191 /* Do we have POE? */
192 compute_s1poe(vcpu, wi);
193
194 /* R_BVXDG */
195 wi->hpd |= (wi->poe || wi->e0poe);
196
197 /* Someone was silly enough to encode TG0/TG1 differently */
198 if (va55) {
199 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
200 tg = FIELD_GET(TCR_TG1_MASK, tcr);
201
202 switch (tg << TCR_TG1_SHIFT) {
203 case TCR_TG1_4K:
204 wi->pgshift = 12; break;
205 case TCR_TG1_16K:
206 wi->pgshift = 14; break;
207 case TCR_TG1_64K:
208 default: /* IMPDEF: treat any other value as 64k */
209 wi->pgshift = 16; break;
210 }
211 } else {
212 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
213 tg = FIELD_GET(TCR_TG0_MASK, tcr);
214
215 switch (tg << TCR_TG0_SHIFT) {
216 case TCR_TG0_4K:
217 wi->pgshift = 12; break;
218 case TCR_TG0_16K:
219 wi->pgshift = 14; break;
220 case TCR_TG0_64K:
221 default: /* IMPDEF: treat any other value as 64k */
222 wi->pgshift = 16; break;
223 }
224 }
225
226 /* R_PLCGL, R_YXNYW */
227 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
228 if (wi->txsz > 39)
229 goto transfault_l0;
230 } else {
231 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
232 goto transfault_l0;
233 }
234
235 /* R_GTJBY, R_SXWGM */
236 switch (BIT(wi->pgshift)) {
237 case SZ_4K:
238 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
239 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
240 break;
241 case SZ_16K:
242 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
243 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
244 break;
245 case SZ_64K:
246 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
247 break;
248 }
249
250 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
251 goto transfault_l0;
252
253 ia_bits = get_ia_size(wi);
254
255 /* R_YYVYV, I_THCZK */
256 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
257 (va55 && va < GENMASK(63, ia_bits)))
258 goto transfault_l0;
259
260 /* I_ZFSYQ */
261 if (wi->regime != TR_EL2 &&
262 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
263 goto transfault_l0;
264
265 /* R_BNDVG and following statements */
266 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
267 wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
268 goto transfault_l0;
269
270 /* AArch64.S1StartLevel() */
271 stride = wi->pgshift - 3;
272 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
273
274 ps = (wi->regime == TR_EL2 ?
275 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
276
277 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
278
279 /* Compute minimal alignment */
280 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
281
282 wi->baddr = ttbr & TTBRx_EL1_BADDR;
283
284 /* R_VPBBF */
285 if (check_output_size(wi->baddr, wi))
286 goto addrsz;
287
288 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
289
290 return 0;
291
292 addrsz: /* Address Size Fault level 0 */
293 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
294 return -EFAULT;
295
296 transfault_l0: /* Translation Fault level 0 */
297 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
298 return -EFAULT;
299 }
300
walk_s1(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)301 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
302 struct s1_walk_result *wr, u64 va)
303 {
304 u64 va_top, va_bottom, baddr, desc;
305 int level, stride, ret;
306
307 level = wi->sl;
308 stride = wi->pgshift - 3;
309 baddr = wi->baddr;
310
311 va_top = get_ia_size(wi) - 1;
312
313 while (1) {
314 u64 index, ipa;
315
316 va_bottom = (3 - level) * stride + wi->pgshift;
317 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
318
319 ipa = baddr | index;
320
321 if (wi->s2) {
322 struct kvm_s2_trans s2_trans = {};
323
324 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans);
325 if (ret) {
326 fail_s1_walk(wr,
327 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level,
328 true);
329 return ret;
330 }
331
332 if (!kvm_s2_trans_readable(&s2_trans)) {
333 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level),
334 true);
335
336 return -EPERM;
337 }
338
339 ipa = kvm_s2_trans_output(&s2_trans);
340 }
341
342 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
343 if (ret) {
344 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
345 return ret;
346 }
347
348 if (wi->be)
349 desc = be64_to_cpu((__force __be64)desc);
350 else
351 desc = le64_to_cpu((__force __le64)desc);
352
353 /* Invalid descriptor */
354 if (!(desc & BIT(0)))
355 goto transfault;
356
357 /* Block mapping, check validity down the line */
358 if (!(desc & BIT(1)))
359 break;
360
361 /* Page mapping */
362 if (level == 3)
363 break;
364
365 /* Table handling */
366 if (!wi->hpd) {
367 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc);
368 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc);
369 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
370 }
371
372 baddr = desc & GENMASK_ULL(47, wi->pgshift);
373
374 /* Check for out-of-range OA */
375 if (check_output_size(baddr, wi))
376 goto addrsz;
377
378 /* Prepare for next round */
379 va_top = va_bottom - 1;
380 level++;
381 }
382
383 /* Block mapping, check the validity of the level */
384 if (!(desc & BIT(1))) {
385 bool valid_block = false;
386
387 switch (BIT(wi->pgshift)) {
388 case SZ_4K:
389 valid_block = level == 1 || level == 2;
390 break;
391 case SZ_16K:
392 case SZ_64K:
393 valid_block = level == 2;
394 break;
395 }
396
397 if (!valid_block)
398 goto transfault;
399 }
400
401 if (check_output_size(desc & GENMASK(47, va_bottom), wi))
402 goto addrsz;
403
404 if (!(desc & PTE_AF)) {
405 fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
406 return -EACCES;
407 }
408
409 va_bottom += contiguous_bit_shift(desc, wi, level);
410
411 wr->failed = false;
412 wr->level = level;
413 wr->desc = desc;
414 wr->pa = desc & GENMASK(47, va_bottom);
415 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
416
417 wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
418 if (wr->nG) {
419 u64 asid_ttbr, tcr;
420
421 switch (wi->regime) {
422 case TR_EL10:
423 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1);
424 asid_ttbr = ((tcr & TCR_A1) ?
425 vcpu_read_sys_reg(vcpu, TTBR1_EL1) :
426 vcpu_read_sys_reg(vcpu, TTBR0_EL1));
427 break;
428 case TR_EL20:
429 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
430 asid_ttbr = ((tcr & TCR_A1) ?
431 vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
432 vcpu_read_sys_reg(vcpu, TTBR0_EL2));
433 break;
434 default:
435 BUG();
436 }
437
438 wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr);
439 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
440 !(tcr & TCR_ASID16))
441 wr->asid &= GENMASK(7, 0);
442 }
443
444 return 0;
445
446 addrsz:
447 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false);
448 return -EINVAL;
449 transfault:
450 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false);
451 return -ENOENT;
452 }
453
454 struct mmu_config {
455 u64 ttbr0;
456 u64 ttbr1;
457 u64 tcr;
458 u64 mair;
459 u64 tcr2;
460 u64 pir;
461 u64 pire0;
462 u64 por_el0;
463 u64 por_el1;
464 u64 sctlr;
465 u64 vttbr;
466 u64 vtcr;
467 };
468
__mmu_config_save(struct mmu_config * config)469 static void __mmu_config_save(struct mmu_config *config)
470 {
471 config->ttbr0 = read_sysreg_el1(SYS_TTBR0);
472 config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
473 config->tcr = read_sysreg_el1(SYS_TCR);
474 config->mair = read_sysreg_el1(SYS_MAIR);
475 if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
476 config->tcr2 = read_sysreg_el1(SYS_TCR2);
477 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
478 config->pir = read_sysreg_el1(SYS_PIR);
479 config->pire0 = read_sysreg_el1(SYS_PIRE0);
480 }
481 if (system_supports_poe()) {
482 config->por_el1 = read_sysreg_el1(SYS_POR);
483 config->por_el0 = read_sysreg_s(SYS_POR_EL0);
484 }
485 }
486 config->sctlr = read_sysreg_el1(SYS_SCTLR);
487 config->vttbr = read_sysreg(vttbr_el2);
488 config->vtcr = read_sysreg(vtcr_el2);
489 }
490
__mmu_config_restore(struct mmu_config * config)491 static void __mmu_config_restore(struct mmu_config *config)
492 {
493 /*
494 * ARM errata 1165522 and 1530923 require TGE to be 1 before
495 * we update the guest state.
496 */
497 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
498
499 write_sysreg_el1(config->ttbr0, SYS_TTBR0);
500 write_sysreg_el1(config->ttbr1, SYS_TTBR1);
501 write_sysreg_el1(config->tcr, SYS_TCR);
502 write_sysreg_el1(config->mair, SYS_MAIR);
503 if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
504 write_sysreg_el1(config->tcr2, SYS_TCR2);
505 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
506 write_sysreg_el1(config->pir, SYS_PIR);
507 write_sysreg_el1(config->pire0, SYS_PIRE0);
508 }
509 if (system_supports_poe()) {
510 write_sysreg_el1(config->por_el1, SYS_POR);
511 write_sysreg_s(config->por_el0, SYS_POR_EL0);
512 }
513 }
514 write_sysreg_el1(config->sctlr, SYS_SCTLR);
515 write_sysreg(config->vttbr, vttbr_el2);
516 write_sysreg(config->vtcr, vtcr_el2);
517 }
518
at_s1e1p_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)519 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
520 {
521 u64 host_pan;
522 bool fail;
523
524 host_pan = read_sysreg_s(SYS_PSTATE_PAN);
525 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN);
526
527 switch (op) {
528 case OP_AT_S1E1RP:
529 fail = __kvm_at(OP_AT_S1E1RP, vaddr);
530 break;
531 case OP_AT_S1E1WP:
532 fail = __kvm_at(OP_AT_S1E1WP, vaddr);
533 break;
534 }
535
536 write_sysreg_s(host_pan, SYS_PSTATE_PAN);
537
538 return fail;
539 }
540
541 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic)
542 #define MEMATTR_NC 0b0100
543 #define MEMATTR_Wt 0b1000
544 #define MEMATTR_Wb 0b1100
545 #define MEMATTR_WbRaWa 0b1111
546
547 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0)
548
s2_memattr_to_attr(u8 memattr)549 static u8 s2_memattr_to_attr(u8 memattr)
550 {
551 memattr &= 0b1111;
552
553 switch (memattr) {
554 case 0b0000:
555 case 0b0001:
556 case 0b0010:
557 case 0b0011:
558 return memattr << 2;
559 case 0b0100:
560 return MEMATTR(Wb, Wb);
561 case 0b0101:
562 return MEMATTR(NC, NC);
563 case 0b0110:
564 return MEMATTR(Wt, NC);
565 case 0b0111:
566 return MEMATTR(Wb, NC);
567 case 0b1000:
568 /* Reserved, assume NC */
569 return MEMATTR(NC, NC);
570 case 0b1001:
571 return MEMATTR(NC, Wt);
572 case 0b1010:
573 return MEMATTR(Wt, Wt);
574 case 0b1011:
575 return MEMATTR(Wb, Wt);
576 case 0b1100:
577 /* Reserved, assume NC */
578 return MEMATTR(NC, NC);
579 case 0b1101:
580 return MEMATTR(NC, Wb);
581 case 0b1110:
582 return MEMATTR(Wt, Wb);
583 case 0b1111:
584 return MEMATTR(Wb, Wb);
585 default:
586 unreachable();
587 }
588 }
589
combine_s1_s2_attr(u8 s1,u8 s2)590 static u8 combine_s1_s2_attr(u8 s1, u8 s2)
591 {
592 bool transient;
593 u8 final = 0;
594
595 /* Upgrade transient s1 to non-transient to simplify things */
596 switch (s1) {
597 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */
598 transient = true;
599 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
600 break;
601 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */
602 transient = true;
603 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
604 break;
605 default:
606 transient = false;
607 }
608
609 /* S2CombineS1AttrHints() */
610 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
611 (s2 & GENMASK(3, 2)) == MEMATTR_NC)
612 final = MEMATTR_NC;
613 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
614 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
615 final = MEMATTR_Wt;
616 else
617 final = MEMATTR_Wb;
618
619 if (final != MEMATTR_NC) {
620 /* Inherit RaWa hints form S1 */
621 if (transient) {
622 switch (s1 & GENMASK(3, 2)) {
623 case MEMATTR_Wt:
624 final = 0;
625 break;
626 case MEMATTR_Wb:
627 final = MEMATTR_NC;
628 break;
629 }
630 }
631
632 final |= s1 & GENMASK(1, 0);
633 }
634
635 return final;
636 }
637
638 #define ATTR_NSH 0b00
639 #define ATTR_RSV 0b01
640 #define ATTR_OSH 0b10
641 #define ATTR_ISH 0b11
642
compute_sh(u8 attr,u64 desc)643 static u8 compute_sh(u8 attr, u64 desc)
644 {
645 u8 sh;
646
647 /* Any form of device, as well as NC has SH[1:0]=0b10 */
648 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
649 return ATTR_OSH;
650
651 sh = FIELD_GET(PTE_SHARED, desc);
652 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
653 sh = ATTR_NSH;
654
655 return sh;
656 }
657
combine_sh(u8 s1_sh,u8 s2_sh)658 static u8 combine_sh(u8 s1_sh, u8 s2_sh)
659 {
660 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
661 return ATTR_OSH;
662 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
663 return ATTR_ISH;
664
665 return ATTR_NSH;
666 }
667
compute_par_s12(struct kvm_vcpu * vcpu,u64 s1_par,struct kvm_s2_trans * tr)668 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
669 struct kvm_s2_trans *tr)
670 {
671 u8 s1_parattr, s2_memattr, final_attr;
672 u64 par;
673
674 /* If S2 has failed to translate, report the damage */
675 if (tr->esr) {
676 par = SYS_PAR_EL1_RES1;
677 par |= SYS_PAR_EL1_F;
678 par |= SYS_PAR_EL1_S;
679 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
680 return par;
681 }
682
683 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
684 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
685
686 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
687 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
688 s2_memattr &= ~BIT(3);
689
690 /* Combination of R_VRJSW and R_RHWZM */
691 switch (s2_memattr) {
692 case 0b0101:
693 if (MEMATTR_IS_DEVICE(s1_parattr))
694 final_attr = s1_parattr;
695 else
696 final_attr = MEMATTR(NC, NC);
697 break;
698 case 0b0110:
699 case 0b1110:
700 final_attr = MEMATTR(WbRaWa, WbRaWa);
701 break;
702 case 0b0111:
703 case 0b1111:
704 /* Preserve S1 attribute */
705 final_attr = s1_parattr;
706 break;
707 case 0b0100:
708 case 0b1100:
709 case 0b1101:
710 /* Reserved, do something non-silly */
711 final_attr = s1_parattr;
712 break;
713 default:
714 /*
715 * MemAttr[2]=0, Device from S2.
716 *
717 * FWB does not influence the way that stage 1
718 * memory types and attributes are combined
719 * with stage 2 Device type and attributes.
720 */
721 final_attr = min(s2_memattr_to_attr(s2_memattr),
722 s1_parattr);
723 }
724 } else {
725 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
726 u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
727
728 if (MEMATTR_IS_DEVICE(s1_parattr) ||
729 MEMATTR_IS_DEVICE(s2_parattr)) {
730 final_attr = min(s1_parattr, s2_parattr);
731 } else {
732 /* At this stage, this is memory vs memory */
733 final_attr = combine_s1_s2_attr(s1_parattr & 0xf,
734 s2_parattr & 0xf);
735 final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
736 s2_parattr >> 4) << 4;
737 }
738 }
739
740 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
741 !MEMATTR_IS_DEVICE(final_attr))
742 final_attr = MEMATTR(NC, NC);
743
744 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
745 par |= tr->output & GENMASK(47, 12);
746 par |= FIELD_PREP(SYS_PAR_EL1_SH,
747 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
748 compute_sh(final_attr, tr->desc)));
749
750 return par;
751 }
752
compute_par_s1(struct kvm_vcpu * vcpu,struct s1_walk_result * wr,enum trans_regime regime)753 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
754 enum trans_regime regime)
755 {
756 u64 par;
757
758 if (wr->failed) {
759 par = SYS_PAR_EL1_RES1;
760 par |= SYS_PAR_EL1_F;
761 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst);
762 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0;
763 par |= wr->s2 ? SYS_PAR_EL1_S : 0;
764 } else if (wr->level == S1_MMU_DISABLED) {
765 /* MMU off or HCR_EL2.DC == 1 */
766 par = SYS_PAR_EL1_NSE;
767 par |= wr->pa & GENMASK_ULL(47, 12);
768
769 if (regime == TR_EL10 &&
770 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
771 par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
772 MEMATTR(WbRaWa, WbRaWa));
773 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH);
774 } else {
775 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */
776 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH);
777 }
778 } else {
779 u64 mair, sctlr;
780 u8 sh;
781
782 par = SYS_PAR_EL1_NSE;
783
784 mair = (regime == TR_EL10 ?
785 vcpu_read_sys_reg(vcpu, MAIR_EL1) :
786 vcpu_read_sys_reg(vcpu, MAIR_EL2));
787
788 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
789 mair &= 0xff;
790
791 sctlr = (regime == TR_EL10 ?
792 vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
793 vcpu_read_sys_reg(vcpu, SCTLR_EL2));
794
795 /* Force NC for memory if SCTLR_ELx.C is clear */
796 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair))
797 mair = MEMATTR(NC, NC);
798
799 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
800 par |= wr->pa & GENMASK_ULL(47, 12);
801
802 sh = compute_sh(mair, wr->desc);
803 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
804 }
805
806 return par;
807 }
808
pan3_enabled(struct kvm_vcpu * vcpu,enum trans_regime regime)809 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
810 {
811 u64 sctlr;
812
813 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
814 return false;
815
816 if (s1pie_enabled(vcpu, regime))
817 return true;
818
819 if (regime == TR_EL10)
820 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
821 else
822 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
823
824 return sctlr & SCTLR_EL1_EPAN;
825 }
826
compute_s1_direct_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)827 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
828 struct s1_walk_info *wi,
829 struct s1_walk_result *wr)
830 {
831 bool wxn;
832
833 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
834 if (wi->regime != TR_EL2) {
835 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
836 case 0b00:
837 wr->pr = wr->pw = true;
838 wr->ur = wr->uw = false;
839 break;
840 case 0b01:
841 wr->pr = wr->pw = wr->ur = wr->uw = true;
842 break;
843 case 0b10:
844 wr->pr = true;
845 wr->pw = wr->ur = wr->uw = false;
846 break;
847 case 0b11:
848 wr->pr = wr->ur = true;
849 wr->pw = wr->uw = false;
850 break;
851 }
852
853 /* We don't use px for anything yet, but hey... */
854 wr->px = !((wr->desc & PTE_PXN) || wr->uw);
855 wr->ux = !(wr->desc & PTE_UXN);
856 } else {
857 wr->ur = wr->uw = wr->ux = false;
858
859 if (!(wr->desc & PTE_RDONLY)) {
860 wr->pr = wr->pw = true;
861 } else {
862 wr->pr = true;
863 wr->pw = false;
864 }
865
866 /* XN maps to UXN */
867 wr->px = !(wr->desc & PTE_UXN);
868 }
869
870 switch (wi->regime) {
871 case TR_EL2:
872 case TR_EL20:
873 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
874 break;
875 case TR_EL10:
876 wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
877 break;
878 }
879
880 wr->pwxn = wr->uwxn = wxn;
881 wr->pov = wi->poe;
882 wr->uov = wi->e0poe;
883 }
884
compute_s1_hierarchical_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)885 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
886 struct s1_walk_info *wi,
887 struct s1_walk_result *wr)
888 {
889 /* Hierarchical part of AArch64.S1DirectBasePermissions() */
890 if (wi->regime != TR_EL2) {
891 switch (wr->APTable) {
892 case 0b00:
893 break;
894 case 0b01:
895 wr->ur = wr->uw = false;
896 break;
897 case 0b10:
898 wr->pw = wr->uw = false;
899 break;
900 case 0b11:
901 wr->pw = wr->ur = wr->uw = false;
902 break;
903 }
904
905 wr->px &= !wr->PXNTable;
906 wr->ux &= !wr->UXNTable;
907 } else {
908 if (wr->APTable & BIT(1))
909 wr->pw = false;
910
911 /* XN maps to UXN */
912 wr->px &= !wr->UXNTable;
913 }
914 }
915
916 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
917
918 #define set_priv_perms(wr, r, w, x) \
919 do { \
920 (wr)->pr = (r); \
921 (wr)->pw = (w); \
922 (wr)->px = (x); \
923 } while (0)
924
925 #define set_unpriv_perms(wr, r, w, x) \
926 do { \
927 (wr)->ur = (r); \
928 (wr)->uw = (w); \
929 (wr)->ux = (x); \
930 } while (0)
931
932 #define set_priv_wxn(wr, v) \
933 do { \
934 (wr)->pwxn = (v); \
935 } while (0)
936
937 #define set_unpriv_wxn(wr, v) \
938 do { \
939 (wr)->uwxn = (v); \
940 } while (0)
941
942 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
943 #define set_perms(w, wr, ip) \
944 do { \
945 /* R_LLZDZ */ \
946 switch ((ip)) { \
947 case 0b0000: \
948 set_ ## w ## _perms((wr), false, false, false); \
949 break; \
950 case 0b0001: \
951 set_ ## w ## _perms((wr), true , false, false); \
952 break; \
953 case 0b0010: \
954 set_ ## w ## _perms((wr), false, false, true ); \
955 break; \
956 case 0b0011: \
957 set_ ## w ## _perms((wr), true , false, true ); \
958 break; \
959 case 0b0100: \
960 set_ ## w ## _perms((wr), false, false, false); \
961 break; \
962 case 0b0101: \
963 set_ ## w ## _perms((wr), true , true , false); \
964 break; \
965 case 0b0110: \
966 set_ ## w ## _perms((wr), true , true , true ); \
967 break; \
968 case 0b0111: \
969 set_ ## w ## _perms((wr), true , true , true ); \
970 break; \
971 case 0b1000: \
972 set_ ## w ## _perms((wr), true , false, false); \
973 break; \
974 case 0b1001: \
975 set_ ## w ## _perms((wr), true , false, false); \
976 break; \
977 case 0b1010: \
978 set_ ## w ## _perms((wr), true , false, true ); \
979 break; \
980 case 0b1011: \
981 set_ ## w ## _perms((wr), false, false, false); \
982 break; \
983 case 0b1100: \
984 set_ ## w ## _perms((wr), true , true , false); \
985 break; \
986 case 0b1101: \
987 set_ ## w ## _perms((wr), false, false, false); \
988 break; \
989 case 0b1110: \
990 set_ ## w ## _perms((wr), true , true , true ); \
991 break; \
992 case 0b1111: \
993 set_ ## w ## _perms((wr), false, false, false); \
994 break; \
995 } \
996 \
997 /* R_HJYGR */ \
998 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
999 \
1000 } while (0)
1001
compute_s1_indirect_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1002 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
1003 struct s1_walk_info *wi,
1004 struct s1_walk_result *wr)
1005 {
1006 u8 up, pp, idx;
1007
1008 idx = pte_pi_index(wr->desc);
1009
1010 switch (wi->regime) {
1011 case TR_EL10:
1012 pp = perm_idx(vcpu, PIR_EL1, idx);
1013 up = perm_idx(vcpu, PIRE0_EL1, idx);
1014 break;
1015 case TR_EL20:
1016 pp = perm_idx(vcpu, PIR_EL2, idx);
1017 up = perm_idx(vcpu, PIRE0_EL2, idx);
1018 break;
1019 case TR_EL2:
1020 pp = perm_idx(vcpu, PIR_EL2, idx);
1021 up = 0;
1022 break;
1023 }
1024
1025 set_perms(priv, wr, pp);
1026
1027 if (wi->regime != TR_EL2)
1028 set_perms(unpriv, wr, up);
1029 else
1030 set_unpriv_perms(wr, false, false, false);
1031
1032 wr->pov = wi->poe && !(pp & BIT(3));
1033 wr->uov = wi->e0poe && !(up & BIT(3));
1034
1035 /* R_VFPJF */
1036 if (wr->px && wr->uw) {
1037 set_priv_perms(wr, false, false, false);
1038 set_unpriv_perms(wr, false, false, false);
1039 }
1040 }
1041
compute_s1_overlay_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1042 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
1043 struct s1_walk_info *wi,
1044 struct s1_walk_result *wr)
1045 {
1046 u8 idx, pov_perms, uov_perms;
1047
1048 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
1049
1050 if (wr->pov) {
1051 switch (wi->regime) {
1052 case TR_EL10:
1053 pov_perms = perm_idx(vcpu, POR_EL1, idx);
1054 break;
1055 case TR_EL20:
1056 pov_perms = perm_idx(vcpu, POR_EL2, idx);
1057 break;
1058 case TR_EL2:
1059 pov_perms = perm_idx(vcpu, POR_EL2, idx);
1060 break;
1061 }
1062
1063 if (pov_perms & ~POE_RWX)
1064 pov_perms = POE_NONE;
1065
1066 /* R_QXXPC, S1PrivOverflow enabled */
1067 if (wr->pwxn && (pov_perms & POE_X))
1068 pov_perms &= ~POE_W;
1069
1070 wr->pr &= pov_perms & POE_R;
1071 wr->pw &= pov_perms & POE_W;
1072 wr->px &= pov_perms & POE_X;
1073 }
1074
1075 if (wr->uov) {
1076 switch (wi->regime) {
1077 case TR_EL10:
1078 uov_perms = perm_idx(vcpu, POR_EL0, idx);
1079 break;
1080 case TR_EL20:
1081 uov_perms = perm_idx(vcpu, POR_EL0, idx);
1082 break;
1083 case TR_EL2:
1084 uov_perms = 0;
1085 break;
1086 }
1087
1088 if (uov_perms & ~POE_RWX)
1089 uov_perms = POE_NONE;
1090
1091 /* R_NPBXC, S1UnprivOverlay enabled */
1092 if (wr->uwxn && (uov_perms & POE_X))
1093 uov_perms &= ~POE_W;
1094
1095 wr->ur &= uov_perms & POE_R;
1096 wr->uw &= uov_perms & POE_W;
1097 wr->ux &= uov_perms & POE_X;
1098 }
1099 }
1100
compute_s1_permissions(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr)1101 static void compute_s1_permissions(struct kvm_vcpu *vcpu,
1102 struct s1_walk_info *wi,
1103 struct s1_walk_result *wr)
1104 {
1105 bool pan;
1106
1107 if (!s1pie_enabled(vcpu, wi->regime))
1108 compute_s1_direct_permissions(vcpu, wi, wr);
1109 else
1110 compute_s1_indirect_permissions(vcpu, wi, wr);
1111
1112 if (!wi->hpd)
1113 compute_s1_hierarchical_permissions(vcpu, wi, wr);
1114
1115 compute_s1_overlay_permissions(vcpu, wi, wr);
1116
1117 /* R_QXXPC, S1PrivOverlay disabled */
1118 if (!wr->pov)
1119 wr->px &= !(wr->pwxn && wr->pw);
1120
1121 /* R_NPBXC, S1UnprivOverlay disabled */
1122 if (!wr->uov)
1123 wr->ux &= !(wr->uwxn && wr->uw);
1124
1125 pan = wi->pan && (wr->ur || wr->uw ||
1126 (pan3_enabled(vcpu, wi->regime) && wr->ux));
1127 wr->pw &= !pan;
1128 wr->pr &= !pan;
1129 }
1130
handle_at_slow(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1131 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1132 {
1133 struct s1_walk_result wr = {};
1134 struct s1_walk_info wi = {};
1135 bool perm_fail = false;
1136 int ret, idx;
1137
1138 wi.regime = compute_translation_regime(vcpu, op);
1139 wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
1140 wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
1141 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
1142
1143 ret = setup_s1_walk(vcpu, &wi, &wr, vaddr);
1144 if (ret)
1145 goto compute_par;
1146
1147 if (wr.level == S1_MMU_DISABLED)
1148 goto compute_par;
1149
1150 idx = srcu_read_lock(&vcpu->kvm->srcu);
1151
1152 ret = walk_s1(vcpu, &wi, &wr, vaddr);
1153
1154 srcu_read_unlock(&vcpu->kvm->srcu, idx);
1155
1156 if (ret)
1157 goto compute_par;
1158
1159 compute_s1_permissions(vcpu, &wi, &wr);
1160
1161 switch (op) {
1162 case OP_AT_S1E1RP:
1163 case OP_AT_S1E1R:
1164 case OP_AT_S1E2R:
1165 perm_fail = !wr.pr;
1166 break;
1167 case OP_AT_S1E1WP:
1168 case OP_AT_S1E1W:
1169 case OP_AT_S1E2W:
1170 perm_fail = !wr.pw;
1171 break;
1172 case OP_AT_S1E0R:
1173 perm_fail = !wr.ur;
1174 break;
1175 case OP_AT_S1E0W:
1176 perm_fail = !wr.uw;
1177 break;
1178 case OP_AT_S1E1A:
1179 case OP_AT_S1E2A:
1180 break;
1181 default:
1182 BUG();
1183 }
1184
1185 if (perm_fail)
1186 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
1187
1188 compute_par:
1189 return compute_par_s1(vcpu, &wr, wi.regime);
1190 }
1191
1192 /*
1193 * Return the PAR_EL1 value as the result of a valid translation.
1194 *
1195 * If the translation is unsuccessful, the value may only contain
1196 * PAR_EL1.F, and cannot be taken at face value. It isn't an
1197 * indication of the translation having failed, only that the fast
1198 * path did not succeed, *unless* it indicates a S1 permission or
1199 * access fault.
1200 */
__kvm_at_s1e01_fast(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1201 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1202 {
1203 struct mmu_config config;
1204 struct kvm_s2_mmu *mmu;
1205 bool fail;
1206 u64 par;
1207
1208 par = SYS_PAR_EL1_F;
1209
1210 /*
1211 * We've trapped, so everything is live on the CPU. As we will
1212 * be switching contexts behind everybody's back, disable
1213 * interrupts while holding the mmu lock.
1214 */
1215 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock);
1216
1217 /*
1218 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
1219 * the right one (as we trapped from vEL2). If not, save the
1220 * full MMU context.
1221 */
1222 if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
1223 goto skip_mmu_switch;
1224
1225 /*
1226 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not
1227 * find it (recycled by another vcpu, for example). When this
1228 * happens, admit defeat immediately and use the SW (slow) path.
1229 */
1230 mmu = lookup_s2_mmu(vcpu);
1231 if (!mmu)
1232 return par;
1233
1234 __mmu_config_save(&config);
1235
1236 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0);
1237 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
1238 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
1239 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
1240 if (kvm_has_tcr2(vcpu->kvm)) {
1241 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
1242 if (kvm_has_s1pie(vcpu->kvm)) {
1243 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
1244 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
1245 }
1246 if (kvm_has_s1poe(vcpu->kvm)) {
1247 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
1248 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
1249 }
1250 }
1251 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
1252 __load_stage2(mmu, mmu->arch);
1253
1254 skip_mmu_switch:
1255 /* Temporarily switch back to guest context */
1256 write_sysreg_hcr(vcpu->arch.hcr_el2);
1257 isb();
1258
1259 switch (op) {
1260 case OP_AT_S1E1RP:
1261 case OP_AT_S1E1WP:
1262 fail = at_s1e1p_fast(vcpu, op, vaddr);
1263 break;
1264 case OP_AT_S1E1R:
1265 fail = __kvm_at(OP_AT_S1E1R, vaddr);
1266 break;
1267 case OP_AT_S1E1W:
1268 fail = __kvm_at(OP_AT_S1E1W, vaddr);
1269 break;
1270 case OP_AT_S1E0R:
1271 fail = __kvm_at(OP_AT_S1E0R, vaddr);
1272 break;
1273 case OP_AT_S1E0W:
1274 fail = __kvm_at(OP_AT_S1E0W, vaddr);
1275 break;
1276 case OP_AT_S1E1A:
1277 fail = __kvm_at(OP_AT_S1E1A, vaddr);
1278 break;
1279 default:
1280 WARN_ON_ONCE(1);
1281 fail = true;
1282 break;
1283 }
1284
1285 if (!fail)
1286 par = read_sysreg_par();
1287
1288 write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
1289
1290 if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
1291 __mmu_config_restore(&config);
1292
1293 return par;
1294 }
1295
par_check_s1_perm_fault(u64 par)1296 static bool par_check_s1_perm_fault(u64 par)
1297 {
1298 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1299
1300 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM &&
1301 !(par & SYS_PAR_EL1_S));
1302 }
1303
par_check_s1_access_fault(u64 par)1304 static bool par_check_s1_access_fault(u64 par)
1305 {
1306 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par);
1307
1308 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS &&
1309 !(par & SYS_PAR_EL1_S));
1310 }
1311
__kvm_at_s1e01(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1312 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1313 {
1314 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
1315
1316 /*
1317 * If PAR_EL1 reports that AT failed on a S1 permission or access
1318 * fault, we know for sure that the PTW was able to walk the S1
1319 * tables and there's nothing else to do.
1320 *
1321 * If AT failed for any other reason, then we must walk the guest S1
1322 * to emulate the instruction.
1323 */
1324 if ((par & SYS_PAR_EL1_F) &&
1325 !par_check_s1_perm_fault(par) &&
1326 !par_check_s1_access_fault(par))
1327 par = handle_at_slow(vcpu, op, vaddr);
1328
1329 vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1330 }
1331
__kvm_at_s1e2(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1332 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1333 {
1334 u64 par;
1335
1336 /*
1337 * We've trapped, so everything is live on the CPU. As we will be
1338 * switching context behind everybody's back, disable interrupts...
1339 */
1340 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
1341 u64 val, hcr;
1342 bool fail;
1343
1344 val = hcr = read_sysreg(hcr_el2);
1345 val &= ~HCR_TGE;
1346 val |= HCR_VM;
1347
1348 if (!vcpu_el2_e2h_is_set(vcpu))
1349 val |= HCR_NV | HCR_NV1;
1350
1351 write_sysreg_hcr(val);
1352 isb();
1353
1354 par = SYS_PAR_EL1_F;
1355
1356 switch (op) {
1357 case OP_AT_S1E2R:
1358 fail = __kvm_at(OP_AT_S1E1R, vaddr);
1359 break;
1360 case OP_AT_S1E2W:
1361 fail = __kvm_at(OP_AT_S1E1W, vaddr);
1362 break;
1363 case OP_AT_S1E2A:
1364 fail = __kvm_at(OP_AT_S1E1A, vaddr);
1365 break;
1366 default:
1367 WARN_ON_ONCE(1);
1368 fail = true;
1369 }
1370
1371 isb();
1372
1373 if (!fail)
1374 par = read_sysreg_par();
1375
1376 write_sysreg_hcr(hcr);
1377 isb();
1378 }
1379
1380 /* We failed the translation, let's replay it in slow motion */
1381 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
1382 par = handle_at_slow(vcpu, op, vaddr);
1383
1384 vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1385 }
1386
__kvm_at_s12(struct kvm_vcpu * vcpu,u32 op,u64 vaddr)1387 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
1388 {
1389 struct kvm_s2_trans out = {};
1390 u64 ipa, par;
1391 bool write;
1392 int ret;
1393
1394 /* Do the stage-1 translation */
1395 switch (op) {
1396 case OP_AT_S12E1R:
1397 op = OP_AT_S1E1R;
1398 write = false;
1399 break;
1400 case OP_AT_S12E1W:
1401 op = OP_AT_S1E1W;
1402 write = true;
1403 break;
1404 case OP_AT_S12E0R:
1405 op = OP_AT_S1E0R;
1406 write = false;
1407 break;
1408 case OP_AT_S12E0W:
1409 op = OP_AT_S1E0W;
1410 write = true;
1411 break;
1412 default:
1413 WARN_ON_ONCE(1);
1414 return;
1415 }
1416
1417 __kvm_at_s1e01(vcpu, op, vaddr);
1418 par = vcpu_read_sys_reg(vcpu, PAR_EL1);
1419 if (par & SYS_PAR_EL1_F)
1420 return;
1421
1422 /*
1423 * If we only have a single stage of translation (E2H=0 or
1424 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
1425 */
1426 if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
1427 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
1428 return;
1429
1430 /* Do the stage-2 translation */
1431 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
1432 out.esr = 0;
1433 ret = kvm_walk_nested_s2(vcpu, ipa, &out);
1434 if (ret < 0)
1435 return;
1436
1437 /* Check the access permission */
1438 if (!out.esr &&
1439 ((!write && !out.readable) || (write && !out.writable)))
1440 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3);
1441
1442 par = compute_par_s12(vcpu, par, &out);
1443 vcpu_write_sys_reg(vcpu, par, PAR_EL1);
1444 }
1445
1446 /*
1447 * Translate a VA for a given EL in a given translation regime, with
1448 * or without PAN. This requires wi->{regime, as_el0, pan} to be
1449 * set. The rest of the wi and wr should be 0-initialised.
1450 */
__kvm_translate_va(struct kvm_vcpu * vcpu,struct s1_walk_info * wi,struct s1_walk_result * wr,u64 va)1451 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
1452 struct s1_walk_result *wr, u64 va)
1453 {
1454 int ret;
1455
1456 ret = setup_s1_walk(vcpu, wi, wr, va);
1457 if (ret)
1458 return ret;
1459
1460 if (wr->level == S1_MMU_DISABLED) {
1461 wr->ur = wr->uw = wr->ux = true;
1462 wr->pr = wr->pw = wr->px = true;
1463 } else {
1464 ret = walk_s1(vcpu, wi, wr, va);
1465 if (ret)
1466 return ret;
1467
1468 compute_s1_permissions(vcpu, wi, wr);
1469 }
1470
1471 return 0;
1472 }
1473