1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 - Linaro Ltd 4 * Author: Jintack Lim <jintack.lim@linaro.org> 5 */ 6 7 #include <linux/kvm_host.h> 8 9 #include <asm/esr.h> 10 #include <asm/kvm_hyp.h> 11 #include <asm/kvm_mmu.h> 12 13 enum trans_regime { 14 TR_EL10, 15 TR_EL20, 16 TR_EL2, 17 }; 18 19 struct s1_walk_info { 20 u64 baddr; 21 enum trans_regime regime; 22 unsigned int max_oa_bits; 23 unsigned int pgshift; 24 unsigned int txsz; 25 int sl; 26 bool hpd; 27 bool be; 28 bool s2; 29 }; 30 31 struct s1_walk_result { 32 union { 33 struct { 34 u64 desc; 35 u64 pa; 36 s8 level; 37 u8 APTable; 38 bool UXNTable; 39 bool PXNTable; 40 }; 41 struct { 42 u8 fst; 43 bool ptw; 44 bool s2; 45 }; 46 }; 47 bool failed; 48 }; 49 50 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) 51 { 52 wr->fst = fst; 53 wr->ptw = ptw; 54 wr->s2 = s2; 55 wr->failed = true; 56 } 57 58 #define S1_MMU_DISABLED (-127) 59 60 static int get_ia_size(struct s1_walk_info *wi) 61 { 62 return 64 - wi->txsz; 63 } 64 65 /* Return true if the IPA is out of the OA range */ 66 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 67 { 68 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 69 } 70 71 /* Return the translation regime that applies to an AT instruction */ 72 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) 73 { 74 /* 75 * We only get here from guest EL2, so the translation 76 * regime AT applies to is solely defined by {E2H,TGE}. 77 */ 78 switch (op) { 79 case OP_AT_S1E2R: 80 case OP_AT_S1E2W: 81 case OP_AT_S1E2A: 82 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 83 break; 84 default: 85 return (vcpu_el2_e2h_is_set(vcpu) && 86 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; 87 } 88 } 89 90 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, 91 struct s1_walk_result *wr, u64 va) 92 { 93 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; 94 unsigned int stride, x; 95 bool va55, tbi, lva, as_el0; 96 97 hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 98 99 wi->regime = compute_translation_regime(vcpu, op); 100 as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); 101 102 va55 = va & BIT(55); 103 104 if (wi->regime == TR_EL2 && va55) 105 goto addrsz; 106 107 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 108 109 switch (wi->regime) { 110 case TR_EL10: 111 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 112 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 113 ttbr = (va55 ? 114 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 115 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 116 break; 117 case TR_EL2: 118 case TR_EL20: 119 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 120 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 121 ttbr = (va55 ? 122 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 123 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 124 break; 125 default: 126 BUG(); 127 } 128 129 tbi = (wi->regime == TR_EL2 ? 130 FIELD_GET(TCR_EL2_TBI, tcr) : 131 (va55 ? 132 FIELD_GET(TCR_TBI1, tcr) : 133 FIELD_GET(TCR_TBI0, tcr))); 134 135 if (!tbi && (u64)sign_extend64(va, 55) != va) 136 goto addrsz; 137 138 va = (u64)sign_extend64(va, 55); 139 140 /* Let's put the MMU disabled case aside immediately */ 141 switch (wi->regime) { 142 case TR_EL10: 143 /* 144 * If dealing with the EL1&0 translation regime, 3 things 145 * can disable the S1 translation: 146 * 147 * - HCR_EL2.DC = 1 148 * - HCR_EL2.{E2H,TGE} = {0,1} 149 * - SCTLR_EL1.M = 0 150 * 151 * The TGE part is interesting. If we have decided that this 152 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or 153 * {0,x}, and we only need to test for TGE == 1. 154 */ 155 if (hcr & (HCR_DC | HCR_TGE)) { 156 wr->level = S1_MMU_DISABLED; 157 break; 158 } 159 fallthrough; 160 case TR_EL2: 161 case TR_EL20: 162 if (!(sctlr & SCTLR_ELx_M)) 163 wr->level = S1_MMU_DISABLED; 164 break; 165 } 166 167 if (wr->level == S1_MMU_DISABLED) { 168 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) 169 goto addrsz; 170 171 wr->pa = va; 172 return 0; 173 } 174 175 wi->be = sctlr & SCTLR_ELx_EE; 176 177 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); 178 wi->hpd &= (wi->regime == TR_EL2 ? 179 FIELD_GET(TCR_EL2_HPD, tcr) : 180 (va55 ? 181 FIELD_GET(TCR_HPD1, tcr) : 182 FIELD_GET(TCR_HPD0, tcr))); 183 184 /* Someone was silly enough to encode TG0/TG1 differently */ 185 if (va55) { 186 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 187 tg = FIELD_GET(TCR_TG1_MASK, tcr); 188 189 switch (tg << TCR_TG1_SHIFT) { 190 case TCR_TG1_4K: 191 wi->pgshift = 12; break; 192 case TCR_TG1_16K: 193 wi->pgshift = 14; break; 194 case TCR_TG1_64K: 195 default: /* IMPDEF: treat any other value as 64k */ 196 wi->pgshift = 16; break; 197 } 198 } else { 199 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 200 tg = FIELD_GET(TCR_TG0_MASK, tcr); 201 202 switch (tg << TCR_TG0_SHIFT) { 203 case TCR_TG0_4K: 204 wi->pgshift = 12; break; 205 case TCR_TG0_16K: 206 wi->pgshift = 14; break; 207 case TCR_TG0_64K: 208 default: /* IMPDEF: treat any other value as 64k */ 209 wi->pgshift = 16; break; 210 } 211 } 212 213 /* R_PLCGL, R_YXNYW */ 214 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 215 if (wi->txsz > 39) 216 goto transfault_l0; 217 } else { 218 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 219 goto transfault_l0; 220 } 221 222 /* R_GTJBY, R_SXWGM */ 223 switch (BIT(wi->pgshift)) { 224 case SZ_4K: 225 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); 226 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 227 break; 228 case SZ_16K: 229 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); 230 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 231 break; 232 case SZ_64K: 233 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); 234 break; 235 } 236 237 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 238 goto transfault_l0; 239 240 ia_bits = get_ia_size(wi); 241 242 /* R_YYVYV, I_THCZK */ 243 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 244 (va55 && va < GENMASK(63, ia_bits))) 245 goto transfault_l0; 246 247 /* I_ZFSYQ */ 248 if (wi->regime != TR_EL2 && 249 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 250 goto transfault_l0; 251 252 /* R_BNDVG and following statements */ 253 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 254 as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 255 goto transfault_l0; 256 257 /* AArch64.S1StartLevel() */ 258 stride = wi->pgshift - 3; 259 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 260 261 ps = (wi->regime == TR_EL2 ? 262 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 263 264 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); 265 266 /* Compute minimal alignment */ 267 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 268 269 wi->baddr = ttbr & TTBRx_EL1_BADDR; 270 271 /* R_VPBBF */ 272 if (check_output_size(wi->baddr, wi)) 273 goto addrsz; 274 275 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 276 277 return 0; 278 279 addrsz: /* Address Size Fault level 0 */ 280 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); 281 return -EFAULT; 282 283 transfault_l0: /* Translation Fault level 0 */ 284 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); 285 return -EFAULT; 286 } 287 288 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 289 struct s1_walk_result *wr, u64 va) 290 { 291 u64 va_top, va_bottom, baddr, desc; 292 int level, stride, ret; 293 294 level = wi->sl; 295 stride = wi->pgshift - 3; 296 baddr = wi->baddr; 297 298 va_top = get_ia_size(wi) - 1; 299 300 while (1) { 301 u64 index, ipa; 302 303 va_bottom = (3 - level) * stride + wi->pgshift; 304 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); 305 306 ipa = baddr | index; 307 308 if (wi->s2) { 309 struct kvm_s2_trans s2_trans = {}; 310 311 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 312 if (ret) { 313 fail_s1_walk(wr, 314 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, 315 true, true); 316 return ret; 317 } 318 319 if (!kvm_s2_trans_readable(&s2_trans)) { 320 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), 321 true, true); 322 323 return -EPERM; 324 } 325 326 ipa = kvm_s2_trans_output(&s2_trans); 327 } 328 329 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 330 if (ret) { 331 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), 332 true, false); 333 return ret; 334 } 335 336 if (wi->be) 337 desc = be64_to_cpu((__force __be64)desc); 338 else 339 desc = le64_to_cpu((__force __le64)desc); 340 341 /* Invalid descriptor */ 342 if (!(desc & BIT(0))) 343 goto transfault; 344 345 /* Block mapping, check validity down the line */ 346 if (!(desc & BIT(1))) 347 break; 348 349 /* Page mapping */ 350 if (level == 3) 351 break; 352 353 /* Table handling */ 354 if (!wi->hpd) { 355 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); 356 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); 357 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 358 } 359 360 baddr = desc & GENMASK_ULL(47, wi->pgshift); 361 362 /* Check for out-of-range OA */ 363 if (check_output_size(baddr, wi)) 364 goto addrsz; 365 366 /* Prepare for next round */ 367 va_top = va_bottom - 1; 368 level++; 369 } 370 371 /* Block mapping, check the validity of the level */ 372 if (!(desc & BIT(1))) { 373 bool valid_block = false; 374 375 switch (BIT(wi->pgshift)) { 376 case SZ_4K: 377 valid_block = level == 1 || level == 2; 378 break; 379 case SZ_16K: 380 case SZ_64K: 381 valid_block = level == 2; 382 break; 383 } 384 385 if (!valid_block) 386 goto transfault; 387 } 388 389 if (check_output_size(desc & GENMASK(47, va_bottom), wi)) 390 goto addrsz; 391 392 va_bottom += contiguous_bit_shift(desc, wi, level); 393 394 wr->failed = false; 395 wr->level = level; 396 wr->desc = desc; 397 wr->pa = desc & GENMASK(47, va_bottom); 398 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 399 400 return 0; 401 402 addrsz: 403 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); 404 return -EINVAL; 405 transfault: 406 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); 407 return -ENOENT; 408 } 409 410 struct mmu_config { 411 u64 ttbr0; 412 u64 ttbr1; 413 u64 tcr; 414 u64 mair; 415 u64 sctlr; 416 u64 vttbr; 417 u64 vtcr; 418 u64 hcr; 419 }; 420 421 static void __mmu_config_save(struct mmu_config *config) 422 { 423 config->ttbr0 = read_sysreg_el1(SYS_TTBR0); 424 config->ttbr1 = read_sysreg_el1(SYS_TTBR1); 425 config->tcr = read_sysreg_el1(SYS_TCR); 426 config->mair = read_sysreg_el1(SYS_MAIR); 427 config->sctlr = read_sysreg_el1(SYS_SCTLR); 428 config->vttbr = read_sysreg(vttbr_el2); 429 config->vtcr = read_sysreg(vtcr_el2); 430 config->hcr = read_sysreg(hcr_el2); 431 } 432 433 static void __mmu_config_restore(struct mmu_config *config) 434 { 435 write_sysreg(config->hcr, hcr_el2); 436 437 /* 438 * ARM errata 1165522 and 1530923 require TGE to be 1 before 439 * we update the guest state. 440 */ 441 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 442 443 write_sysreg_el1(config->ttbr0, SYS_TTBR0); 444 write_sysreg_el1(config->ttbr1, SYS_TTBR1); 445 write_sysreg_el1(config->tcr, SYS_TCR); 446 write_sysreg_el1(config->mair, SYS_MAIR); 447 write_sysreg_el1(config->sctlr, SYS_SCTLR); 448 write_sysreg(config->vttbr, vttbr_el2); 449 write_sysreg(config->vtcr, vtcr_el2); 450 } 451 452 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 453 { 454 u64 host_pan; 455 bool fail; 456 457 host_pan = read_sysreg_s(SYS_PSTATE_PAN); 458 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); 459 460 switch (op) { 461 case OP_AT_S1E1RP: 462 fail = __kvm_at(OP_AT_S1E1RP, vaddr); 463 break; 464 case OP_AT_S1E1WP: 465 fail = __kvm_at(OP_AT_S1E1WP, vaddr); 466 break; 467 } 468 469 write_sysreg_s(host_pan, SYS_PSTATE_PAN); 470 471 return fail; 472 } 473 474 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) 475 #define MEMATTR_NC 0b0100 476 #define MEMATTR_Wt 0b1000 477 #define MEMATTR_Wb 0b1100 478 #define MEMATTR_WbRaWa 0b1111 479 480 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) 481 482 static u8 s2_memattr_to_attr(u8 memattr) 483 { 484 memattr &= 0b1111; 485 486 switch (memattr) { 487 case 0b0000: 488 case 0b0001: 489 case 0b0010: 490 case 0b0011: 491 return memattr << 2; 492 case 0b0100: 493 return MEMATTR(Wb, Wb); 494 case 0b0101: 495 return MEMATTR(NC, NC); 496 case 0b0110: 497 return MEMATTR(Wt, NC); 498 case 0b0111: 499 return MEMATTR(Wb, NC); 500 case 0b1000: 501 /* Reserved, assume NC */ 502 return MEMATTR(NC, NC); 503 case 0b1001: 504 return MEMATTR(NC, Wt); 505 case 0b1010: 506 return MEMATTR(Wt, Wt); 507 case 0b1011: 508 return MEMATTR(Wb, Wt); 509 case 0b1100: 510 /* Reserved, assume NC */ 511 return MEMATTR(NC, NC); 512 case 0b1101: 513 return MEMATTR(NC, Wb); 514 case 0b1110: 515 return MEMATTR(Wt, Wb); 516 case 0b1111: 517 return MEMATTR(Wb, Wb); 518 default: 519 unreachable(); 520 } 521 } 522 523 static u8 combine_s1_s2_attr(u8 s1, u8 s2) 524 { 525 bool transient; 526 u8 final = 0; 527 528 /* Upgrade transient s1 to non-transient to simplify things */ 529 switch (s1) { 530 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ 531 transient = true; 532 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); 533 break; 534 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ 535 transient = true; 536 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); 537 break; 538 default: 539 transient = false; 540 } 541 542 /* S2CombineS1AttrHints() */ 543 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || 544 (s2 & GENMASK(3, 2)) == MEMATTR_NC) 545 final = MEMATTR_NC; 546 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || 547 (s2 & GENMASK(3, 2)) == MEMATTR_Wt) 548 final = MEMATTR_Wt; 549 else 550 final = MEMATTR_Wb; 551 552 if (final != MEMATTR_NC) { 553 /* Inherit RaWa hints form S1 */ 554 if (transient) { 555 switch (s1 & GENMASK(3, 2)) { 556 case MEMATTR_Wt: 557 final = 0; 558 break; 559 case MEMATTR_Wb: 560 final = MEMATTR_NC; 561 break; 562 } 563 } 564 565 final |= s1 & GENMASK(1, 0); 566 } 567 568 return final; 569 } 570 571 #define ATTR_NSH 0b00 572 #define ATTR_RSV 0b01 573 #define ATTR_OSH 0b10 574 #define ATTR_ISH 0b11 575 576 static u8 compute_sh(u8 attr, u64 desc) 577 { 578 u8 sh; 579 580 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 581 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 582 return ATTR_OSH; 583 584 sh = FIELD_GET(PTE_SHARED, desc); 585 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 586 sh = ATTR_NSH; 587 588 return sh; 589 } 590 591 static u8 combine_sh(u8 s1_sh, u8 s2_sh) 592 { 593 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) 594 return ATTR_OSH; 595 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) 596 return ATTR_ISH; 597 598 return ATTR_NSH; 599 } 600 601 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 602 struct kvm_s2_trans *tr) 603 { 604 u8 s1_parattr, s2_memattr, final_attr; 605 u64 par; 606 607 /* If S2 has failed to translate, report the damage */ 608 if (tr->esr) { 609 par = SYS_PAR_EL1_RES1; 610 par |= SYS_PAR_EL1_F; 611 par |= SYS_PAR_EL1_S; 612 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); 613 return par; 614 } 615 616 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); 617 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); 618 619 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { 620 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) 621 s2_memattr &= ~BIT(3); 622 623 /* Combination of R_VRJSW and R_RHWZM */ 624 switch (s2_memattr) { 625 case 0b0101: 626 if (MEMATTR_IS_DEVICE(s1_parattr)) 627 final_attr = s1_parattr; 628 else 629 final_attr = MEMATTR(NC, NC); 630 break; 631 case 0b0110: 632 case 0b1110: 633 final_attr = MEMATTR(WbRaWa, WbRaWa); 634 break; 635 case 0b0111: 636 case 0b1111: 637 /* Preserve S1 attribute */ 638 final_attr = s1_parattr; 639 break; 640 case 0b0100: 641 case 0b1100: 642 case 0b1101: 643 /* Reserved, do something non-silly */ 644 final_attr = s1_parattr; 645 break; 646 default: 647 /* MemAttr[2]=0, Device from S2 */ 648 final_attr = s2_memattr & GENMASK(1,0) << 2; 649 } 650 } else { 651 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ 652 u8 s2_parattr = s2_memattr_to_attr(s2_memattr); 653 654 if (MEMATTR_IS_DEVICE(s1_parattr) || 655 MEMATTR_IS_DEVICE(s2_parattr)) { 656 final_attr = min(s1_parattr, s2_parattr); 657 } else { 658 /* At this stage, this is memory vs memory */ 659 final_attr = combine_s1_s2_attr(s1_parattr & 0xf, 660 s2_parattr & 0xf); 661 final_attr |= combine_s1_s2_attr(s1_parattr >> 4, 662 s2_parattr >> 4) << 4; 663 } 664 } 665 666 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && 667 !MEMATTR_IS_DEVICE(final_attr)) 668 final_attr = MEMATTR(NC, NC); 669 670 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 671 par |= tr->output & GENMASK(47, 12); 672 par |= FIELD_PREP(SYS_PAR_EL1_SH, 673 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 674 compute_sh(final_attr, tr->desc))); 675 676 return par; 677 } 678 679 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, 680 enum trans_regime regime) 681 { 682 u64 par; 683 684 if (wr->failed) { 685 par = SYS_PAR_EL1_RES1; 686 par |= SYS_PAR_EL1_F; 687 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); 688 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; 689 par |= wr->s2 ? SYS_PAR_EL1_S : 0; 690 } else if (wr->level == S1_MMU_DISABLED) { 691 /* MMU off or HCR_EL2.DC == 1 */ 692 par = SYS_PAR_EL1_NSE; 693 par |= wr->pa & GENMASK_ULL(47, 12); 694 695 if (regime == TR_EL10 && 696 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 697 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 698 MEMATTR(WbRaWa, WbRaWa)); 699 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); 700 } else { 701 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ 702 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); 703 } 704 } else { 705 u64 mair, sctlr; 706 u8 sh; 707 708 par = SYS_PAR_EL1_NSE; 709 710 mair = (regime == TR_EL10 ? 711 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 712 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 713 714 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 715 mair &= 0xff; 716 717 sctlr = (regime == TR_EL10 ? 718 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 719 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 720 721 /* Force NC for memory if SCTLR_ELx.C is clear */ 722 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) 723 mair = MEMATTR(NC, NC); 724 725 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 726 par |= wr->pa & GENMASK_ULL(47, 12); 727 728 sh = compute_sh(mair, wr->desc); 729 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 730 } 731 732 return par; 733 } 734 735 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 736 { 737 u64 sctlr; 738 739 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) 740 return false; 741 742 if (regime == TR_EL10) 743 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 744 else 745 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 746 747 return sctlr & SCTLR_EL1_EPAN; 748 } 749 750 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 751 { 752 bool perm_fail, ur, uw, ux, pr, pw, px; 753 struct s1_walk_result wr = {}; 754 struct s1_walk_info wi = {}; 755 int ret, idx; 756 757 ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr); 758 if (ret) 759 goto compute_par; 760 761 if (wr.level == S1_MMU_DISABLED) 762 goto compute_par; 763 764 idx = srcu_read_lock(&vcpu->kvm->srcu); 765 766 ret = walk_s1(vcpu, &wi, &wr, vaddr); 767 768 srcu_read_unlock(&vcpu->kvm->srcu, idx); 769 770 if (ret) 771 goto compute_par; 772 773 /* FIXME: revisit when adding indirect permission support */ 774 /* AArch64.S1DirectBasePermissions() */ 775 if (wi.regime != TR_EL2) { 776 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) { 777 case 0b00: 778 pr = pw = true; 779 ur = uw = false; 780 break; 781 case 0b01: 782 pr = pw = ur = uw = true; 783 break; 784 case 0b10: 785 pr = true; 786 pw = ur = uw = false; 787 break; 788 case 0b11: 789 pr = ur = true; 790 pw = uw = false; 791 break; 792 } 793 794 switch (wr.APTable) { 795 case 0b00: 796 break; 797 case 0b01: 798 ur = uw = false; 799 break; 800 case 0b10: 801 pw = uw = false; 802 break; 803 case 0b11: 804 pw = ur = uw = false; 805 break; 806 } 807 808 /* We don't use px for anything yet, but hey... */ 809 px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw); 810 ux = !((wr.desc & PTE_UXN) || wr.UXNTable); 811 812 if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) { 813 bool pan; 814 815 pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; 816 pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux); 817 pw &= !pan; 818 pr &= !pan; 819 } 820 } else { 821 ur = uw = ux = false; 822 823 if (!(wr.desc & PTE_RDONLY)) { 824 pr = pw = true; 825 } else { 826 pr = true; 827 pw = false; 828 } 829 830 if (wr.APTable & BIT(1)) 831 pw = false; 832 833 /* XN maps to UXN */ 834 px = !((wr.desc & PTE_UXN) || wr.UXNTable); 835 } 836 837 perm_fail = false; 838 839 switch (op) { 840 case OP_AT_S1E1RP: 841 case OP_AT_S1E1R: 842 case OP_AT_S1E2R: 843 perm_fail = !pr; 844 break; 845 case OP_AT_S1E1WP: 846 case OP_AT_S1E1W: 847 case OP_AT_S1E2W: 848 perm_fail = !pw; 849 break; 850 case OP_AT_S1E0R: 851 perm_fail = !ur; 852 break; 853 case OP_AT_S1E0W: 854 perm_fail = !uw; 855 break; 856 case OP_AT_S1E1A: 857 case OP_AT_S1E2A: 858 break; 859 default: 860 BUG(); 861 } 862 863 if (perm_fail) 864 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); 865 866 compute_par: 867 return compute_par_s1(vcpu, &wr, wi.regime); 868 } 869 870 /* 871 * Return the PAR_EL1 value as the result of a valid translation. 872 * 873 * If the translation is unsuccessful, the value may only contain 874 * PAR_EL1.F, and cannot be taken at face value. It isn't an 875 * indication of the translation having failed, only that the fast 876 * path did not succeed, *unless* it indicates a S1 permission fault. 877 */ 878 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 879 { 880 struct mmu_config config; 881 struct kvm_s2_mmu *mmu; 882 bool fail; 883 u64 par; 884 885 par = SYS_PAR_EL1_F; 886 887 /* 888 * We've trapped, so everything is live on the CPU. As we will 889 * be switching contexts behind everybody's back, disable 890 * interrupts while holding the mmu lock. 891 */ 892 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); 893 894 /* 895 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 896 * the right one (as we trapped from vEL2). If not, save the 897 * full MMU context. 898 */ 899 if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) 900 goto skip_mmu_switch; 901 902 /* 903 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not 904 * find it (recycled by another vcpu, for example). When this 905 * happens, admit defeat immediately and use the SW (slow) path. 906 */ 907 mmu = lookup_s2_mmu(vcpu); 908 if (!mmu) 909 return par; 910 911 __mmu_config_save(&config); 912 913 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); 914 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); 915 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); 916 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); 917 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); 918 __load_stage2(mmu, mmu->arch); 919 920 skip_mmu_switch: 921 /* Clear TGE, enable S2 translation, we're rolling */ 922 write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); 923 isb(); 924 925 switch (op) { 926 case OP_AT_S1E1RP: 927 case OP_AT_S1E1WP: 928 fail = at_s1e1p_fast(vcpu, op, vaddr); 929 break; 930 case OP_AT_S1E1R: 931 fail = __kvm_at(OP_AT_S1E1R, vaddr); 932 break; 933 case OP_AT_S1E1W: 934 fail = __kvm_at(OP_AT_S1E1W, vaddr); 935 break; 936 case OP_AT_S1E0R: 937 fail = __kvm_at(OP_AT_S1E0R, vaddr); 938 break; 939 case OP_AT_S1E0W: 940 fail = __kvm_at(OP_AT_S1E0W, vaddr); 941 break; 942 case OP_AT_S1E1A: 943 fail = __kvm_at(OP_AT_S1E1A, vaddr); 944 break; 945 default: 946 WARN_ON_ONCE(1); 947 fail = true; 948 break; 949 } 950 951 if (!fail) 952 par = read_sysreg_par(); 953 954 if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) 955 __mmu_config_restore(&config); 956 957 return par; 958 } 959 960 static bool par_check_s1_perm_fault(u64 par) 961 { 962 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 963 964 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && 965 !(par & SYS_PAR_EL1_S)); 966 } 967 968 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 969 { 970 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 971 972 /* 973 * If PAR_EL1 reports that AT failed on a S1 permission fault, we 974 * know for sure that the PTW was able to walk the S1 tables and 975 * there's nothing else to do. 976 * 977 * If AT failed for any other reason, then we must walk the guest S1 978 * to emulate the instruction. 979 */ 980 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 981 par = handle_at_slow(vcpu, op, vaddr); 982 983 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 984 } 985 986 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 987 { 988 u64 par; 989 990 /* 991 * We've trapped, so everything is live on the CPU. As we will be 992 * switching context behind everybody's back, disable interrupts... 993 */ 994 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { 995 struct kvm_s2_mmu *mmu; 996 u64 val, hcr; 997 bool fail; 998 999 mmu = &vcpu->kvm->arch.mmu; 1000 1001 val = hcr = read_sysreg(hcr_el2); 1002 val &= ~HCR_TGE; 1003 val |= HCR_VM; 1004 1005 if (!vcpu_el2_e2h_is_set(vcpu)) 1006 val |= HCR_NV | HCR_NV1; 1007 1008 write_sysreg(val, hcr_el2); 1009 isb(); 1010 1011 par = SYS_PAR_EL1_F; 1012 1013 switch (op) { 1014 case OP_AT_S1E2R: 1015 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1016 break; 1017 case OP_AT_S1E2W: 1018 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1019 break; 1020 case OP_AT_S1E2A: 1021 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1022 break; 1023 default: 1024 WARN_ON_ONCE(1); 1025 fail = true; 1026 } 1027 1028 isb(); 1029 1030 if (!fail) 1031 par = read_sysreg_par(); 1032 1033 write_sysreg(hcr, hcr_el2); 1034 isb(); 1035 } 1036 1037 /* We failed the translation, let's replay it in slow motion */ 1038 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1039 par = handle_at_slow(vcpu, op, vaddr); 1040 1041 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1042 } 1043 1044 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1045 { 1046 struct kvm_s2_trans out = {}; 1047 u64 ipa, par; 1048 bool write; 1049 int ret; 1050 1051 /* Do the stage-1 translation */ 1052 switch (op) { 1053 case OP_AT_S12E1R: 1054 op = OP_AT_S1E1R; 1055 write = false; 1056 break; 1057 case OP_AT_S12E1W: 1058 op = OP_AT_S1E1W; 1059 write = true; 1060 break; 1061 case OP_AT_S12E0R: 1062 op = OP_AT_S1E0R; 1063 write = false; 1064 break; 1065 case OP_AT_S12E0W: 1066 op = OP_AT_S1E0W; 1067 write = true; 1068 break; 1069 default: 1070 WARN_ON_ONCE(1); 1071 return; 1072 } 1073 1074 __kvm_at_s1e01(vcpu, op, vaddr); 1075 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1076 if (par & SYS_PAR_EL1_F) 1077 return; 1078 1079 /* 1080 * If we only have a single stage of translation (E2H=0 or 1081 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. 1082 */ 1083 if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || 1084 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1085 return; 1086 1087 /* Do the stage-2 translation */ 1088 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1089 out.esr = 0; 1090 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1091 if (ret < 0) 1092 return; 1093 1094 /* Check the access permission */ 1095 if (!out.esr && 1096 ((!write && !out.readable) || (write && !out.writable))) 1097 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); 1098 1099 par = compute_par_s12(vcpu, par, &out); 1100 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1101 } 1102