1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 - Linaro Ltd 4 * Author: Jintack Lim <jintack.lim@linaro.org> 5 */ 6 7 #include <linux/kvm_host.h> 8 9 #include <asm/esr.h> 10 #include <asm/kvm_hyp.h> 11 #include <asm/kvm_mmu.h> 12 13 enum trans_regime { 14 TR_EL10, 15 TR_EL20, 16 TR_EL2, 17 }; 18 19 struct s1_walk_info { 20 u64 baddr; 21 enum trans_regime regime; 22 unsigned int max_oa_bits; 23 unsigned int pgshift; 24 unsigned int txsz; 25 int sl; 26 bool hpd; 27 bool e0poe; 28 bool poe; 29 bool pan; 30 bool be; 31 bool s2; 32 }; 33 34 struct s1_walk_result { 35 union { 36 struct { 37 u64 desc; 38 u64 pa; 39 s8 level; 40 u8 APTable; 41 bool UXNTable; 42 bool PXNTable; 43 bool uwxn; 44 bool uov; 45 bool ur; 46 bool uw; 47 bool ux; 48 bool pwxn; 49 bool pov; 50 bool pr; 51 bool pw; 52 bool px; 53 }; 54 struct { 55 u8 fst; 56 bool ptw; 57 bool s2; 58 }; 59 }; 60 bool failed; 61 }; 62 63 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) 64 { 65 wr->fst = fst; 66 wr->ptw = ptw; 67 wr->s2 = s2; 68 wr->failed = true; 69 } 70 71 #define S1_MMU_DISABLED (-127) 72 73 static int get_ia_size(struct s1_walk_info *wi) 74 { 75 return 64 - wi->txsz; 76 } 77 78 /* Return true if the IPA is out of the OA range */ 79 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 80 { 81 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 82 } 83 84 /* Return the translation regime that applies to an AT instruction */ 85 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) 86 { 87 /* 88 * We only get here from guest EL2, so the translation 89 * regime AT applies to is solely defined by {E2H,TGE}. 90 */ 91 switch (op) { 92 case OP_AT_S1E2R: 93 case OP_AT_S1E2W: 94 case OP_AT_S1E2A: 95 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 96 break; 97 default: 98 return (vcpu_el2_e2h_is_set(vcpu) && 99 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; 100 } 101 } 102 103 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 104 { 105 if (!kvm_has_s1pie(vcpu->kvm)) 106 return false; 107 108 switch (regime) { 109 case TR_EL2: 110 case TR_EL20: 111 return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; 112 case TR_EL10: 113 return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && 114 (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE); 115 default: 116 BUG(); 117 } 118 } 119 120 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) 121 { 122 u64 val; 123 124 if (!kvm_has_s1poe(vcpu->kvm)) { 125 wi->poe = wi->e0poe = false; 126 return; 127 } 128 129 switch (wi->regime) { 130 case TR_EL2: 131 case TR_EL20: 132 val = vcpu_read_sys_reg(vcpu, TCR2_EL2); 133 wi->poe = val & TCR2_EL2_POE; 134 wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); 135 break; 136 case TR_EL10: 137 if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { 138 wi->poe = wi->e0poe = false; 139 return; 140 } 141 142 val = __vcpu_sys_reg(vcpu, TCR2_EL1); 143 wi->poe = val & TCR2_EL1x_POE; 144 wi->e0poe = val & TCR2_EL1x_E0POE; 145 } 146 } 147 148 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, 149 struct s1_walk_result *wr, u64 va) 150 { 151 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; 152 unsigned int stride, x; 153 bool va55, tbi, lva, as_el0; 154 155 hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 156 157 wi->regime = compute_translation_regime(vcpu, op); 158 as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); 159 wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) && 160 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT); 161 162 va55 = va & BIT(55); 163 164 if (wi->regime == TR_EL2 && va55) 165 goto addrsz; 166 167 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 168 169 switch (wi->regime) { 170 case TR_EL10: 171 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 172 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 173 ttbr = (va55 ? 174 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 175 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 176 break; 177 case TR_EL2: 178 case TR_EL20: 179 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 180 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 181 ttbr = (va55 ? 182 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 183 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 184 break; 185 default: 186 BUG(); 187 } 188 189 tbi = (wi->regime == TR_EL2 ? 190 FIELD_GET(TCR_EL2_TBI, tcr) : 191 (va55 ? 192 FIELD_GET(TCR_TBI1, tcr) : 193 FIELD_GET(TCR_TBI0, tcr))); 194 195 if (!tbi && (u64)sign_extend64(va, 55) != va) 196 goto addrsz; 197 198 va = (u64)sign_extend64(va, 55); 199 200 /* Let's put the MMU disabled case aside immediately */ 201 switch (wi->regime) { 202 case TR_EL10: 203 /* 204 * If dealing with the EL1&0 translation regime, 3 things 205 * can disable the S1 translation: 206 * 207 * - HCR_EL2.DC = 1 208 * - HCR_EL2.{E2H,TGE} = {0,1} 209 * - SCTLR_EL1.M = 0 210 * 211 * The TGE part is interesting. If we have decided that this 212 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or 213 * {0,x}, and we only need to test for TGE == 1. 214 */ 215 if (hcr & (HCR_DC | HCR_TGE)) { 216 wr->level = S1_MMU_DISABLED; 217 break; 218 } 219 fallthrough; 220 case TR_EL2: 221 case TR_EL20: 222 if (!(sctlr & SCTLR_ELx_M)) 223 wr->level = S1_MMU_DISABLED; 224 break; 225 } 226 227 if (wr->level == S1_MMU_DISABLED) { 228 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) 229 goto addrsz; 230 231 wr->pa = va; 232 return 0; 233 } 234 235 wi->be = sctlr & SCTLR_ELx_EE; 236 237 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); 238 wi->hpd &= (wi->regime == TR_EL2 ? 239 FIELD_GET(TCR_EL2_HPD, tcr) : 240 (va55 ? 241 FIELD_GET(TCR_HPD1, tcr) : 242 FIELD_GET(TCR_HPD0, tcr))); 243 /* R_JHSVW */ 244 wi->hpd |= s1pie_enabled(vcpu, wi->regime); 245 246 /* Do we have POE? */ 247 compute_s1poe(vcpu, wi); 248 249 /* R_BVXDG */ 250 wi->hpd |= (wi->poe || wi->e0poe); 251 252 /* Someone was silly enough to encode TG0/TG1 differently */ 253 if (va55) { 254 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 255 tg = FIELD_GET(TCR_TG1_MASK, tcr); 256 257 switch (tg << TCR_TG1_SHIFT) { 258 case TCR_TG1_4K: 259 wi->pgshift = 12; break; 260 case TCR_TG1_16K: 261 wi->pgshift = 14; break; 262 case TCR_TG1_64K: 263 default: /* IMPDEF: treat any other value as 64k */ 264 wi->pgshift = 16; break; 265 } 266 } else { 267 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 268 tg = FIELD_GET(TCR_TG0_MASK, tcr); 269 270 switch (tg << TCR_TG0_SHIFT) { 271 case TCR_TG0_4K: 272 wi->pgshift = 12; break; 273 case TCR_TG0_16K: 274 wi->pgshift = 14; break; 275 case TCR_TG0_64K: 276 default: /* IMPDEF: treat any other value as 64k */ 277 wi->pgshift = 16; break; 278 } 279 } 280 281 /* R_PLCGL, R_YXNYW */ 282 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 283 if (wi->txsz > 39) 284 goto transfault_l0; 285 } else { 286 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 287 goto transfault_l0; 288 } 289 290 /* R_GTJBY, R_SXWGM */ 291 switch (BIT(wi->pgshift)) { 292 case SZ_4K: 293 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); 294 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 295 break; 296 case SZ_16K: 297 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); 298 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 299 break; 300 case SZ_64K: 301 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); 302 break; 303 } 304 305 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 306 goto transfault_l0; 307 308 ia_bits = get_ia_size(wi); 309 310 /* R_YYVYV, I_THCZK */ 311 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 312 (va55 && va < GENMASK(63, ia_bits))) 313 goto transfault_l0; 314 315 /* I_ZFSYQ */ 316 if (wi->regime != TR_EL2 && 317 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 318 goto transfault_l0; 319 320 /* R_BNDVG and following statements */ 321 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 322 as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 323 goto transfault_l0; 324 325 /* AArch64.S1StartLevel() */ 326 stride = wi->pgshift - 3; 327 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 328 329 ps = (wi->regime == TR_EL2 ? 330 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 331 332 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); 333 334 /* Compute minimal alignment */ 335 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 336 337 wi->baddr = ttbr & TTBRx_EL1_BADDR; 338 339 /* R_VPBBF */ 340 if (check_output_size(wi->baddr, wi)) 341 goto addrsz; 342 343 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 344 345 return 0; 346 347 addrsz: /* Address Size Fault level 0 */ 348 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); 349 return -EFAULT; 350 351 transfault_l0: /* Translation Fault level 0 */ 352 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); 353 return -EFAULT; 354 } 355 356 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 357 struct s1_walk_result *wr, u64 va) 358 { 359 u64 va_top, va_bottom, baddr, desc; 360 int level, stride, ret; 361 362 level = wi->sl; 363 stride = wi->pgshift - 3; 364 baddr = wi->baddr; 365 366 va_top = get_ia_size(wi) - 1; 367 368 while (1) { 369 u64 index, ipa; 370 371 va_bottom = (3 - level) * stride + wi->pgshift; 372 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); 373 374 ipa = baddr | index; 375 376 if (wi->s2) { 377 struct kvm_s2_trans s2_trans = {}; 378 379 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 380 if (ret) { 381 fail_s1_walk(wr, 382 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, 383 true, true); 384 return ret; 385 } 386 387 if (!kvm_s2_trans_readable(&s2_trans)) { 388 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), 389 true, true); 390 391 return -EPERM; 392 } 393 394 ipa = kvm_s2_trans_output(&s2_trans); 395 } 396 397 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 398 if (ret) { 399 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), 400 true, false); 401 return ret; 402 } 403 404 if (wi->be) 405 desc = be64_to_cpu((__force __be64)desc); 406 else 407 desc = le64_to_cpu((__force __le64)desc); 408 409 /* Invalid descriptor */ 410 if (!(desc & BIT(0))) 411 goto transfault; 412 413 /* Block mapping, check validity down the line */ 414 if (!(desc & BIT(1))) 415 break; 416 417 /* Page mapping */ 418 if (level == 3) 419 break; 420 421 /* Table handling */ 422 if (!wi->hpd) { 423 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); 424 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); 425 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 426 } 427 428 baddr = desc & GENMASK_ULL(47, wi->pgshift); 429 430 /* Check for out-of-range OA */ 431 if (check_output_size(baddr, wi)) 432 goto addrsz; 433 434 /* Prepare for next round */ 435 va_top = va_bottom - 1; 436 level++; 437 } 438 439 /* Block mapping, check the validity of the level */ 440 if (!(desc & BIT(1))) { 441 bool valid_block = false; 442 443 switch (BIT(wi->pgshift)) { 444 case SZ_4K: 445 valid_block = level == 1 || level == 2; 446 break; 447 case SZ_16K: 448 case SZ_64K: 449 valid_block = level == 2; 450 break; 451 } 452 453 if (!valid_block) 454 goto transfault; 455 } 456 457 if (check_output_size(desc & GENMASK(47, va_bottom), wi)) 458 goto addrsz; 459 460 va_bottom += contiguous_bit_shift(desc, wi, level); 461 462 wr->failed = false; 463 wr->level = level; 464 wr->desc = desc; 465 wr->pa = desc & GENMASK(47, va_bottom); 466 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 467 468 return 0; 469 470 addrsz: 471 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); 472 return -EINVAL; 473 transfault: 474 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); 475 return -ENOENT; 476 } 477 478 struct mmu_config { 479 u64 ttbr0; 480 u64 ttbr1; 481 u64 tcr; 482 u64 mair; 483 u64 tcr2; 484 u64 pir; 485 u64 pire0; 486 u64 por_el0; 487 u64 por_el1; 488 u64 sctlr; 489 u64 vttbr; 490 u64 vtcr; 491 u64 hcr; 492 }; 493 494 static void __mmu_config_save(struct mmu_config *config) 495 { 496 config->ttbr0 = read_sysreg_el1(SYS_TTBR0); 497 config->ttbr1 = read_sysreg_el1(SYS_TTBR1); 498 config->tcr = read_sysreg_el1(SYS_TCR); 499 config->mair = read_sysreg_el1(SYS_MAIR); 500 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 501 config->tcr2 = read_sysreg_el1(SYS_TCR2); 502 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 503 config->pir = read_sysreg_el1(SYS_PIR); 504 config->pire0 = read_sysreg_el1(SYS_PIRE0); 505 } 506 if (system_supports_poe()) { 507 config->por_el1 = read_sysreg_el1(SYS_POR); 508 config->por_el0 = read_sysreg_s(SYS_POR_EL0); 509 } 510 } 511 config->sctlr = read_sysreg_el1(SYS_SCTLR); 512 config->vttbr = read_sysreg(vttbr_el2); 513 config->vtcr = read_sysreg(vtcr_el2); 514 config->hcr = read_sysreg(hcr_el2); 515 } 516 517 static void __mmu_config_restore(struct mmu_config *config) 518 { 519 write_sysreg(config->hcr, hcr_el2); 520 521 /* 522 * ARM errata 1165522 and 1530923 require TGE to be 1 before 523 * we update the guest state. 524 */ 525 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 526 527 write_sysreg_el1(config->ttbr0, SYS_TTBR0); 528 write_sysreg_el1(config->ttbr1, SYS_TTBR1); 529 write_sysreg_el1(config->tcr, SYS_TCR); 530 write_sysreg_el1(config->mair, SYS_MAIR); 531 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 532 write_sysreg_el1(config->tcr2, SYS_TCR2); 533 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 534 write_sysreg_el1(config->pir, SYS_PIR); 535 write_sysreg_el1(config->pire0, SYS_PIRE0); 536 } 537 if (system_supports_poe()) { 538 write_sysreg_el1(config->por_el1, SYS_POR); 539 write_sysreg_s(config->por_el0, SYS_POR_EL0); 540 } 541 } 542 write_sysreg_el1(config->sctlr, SYS_SCTLR); 543 write_sysreg(config->vttbr, vttbr_el2); 544 write_sysreg(config->vtcr, vtcr_el2); 545 } 546 547 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 548 { 549 u64 host_pan; 550 bool fail; 551 552 host_pan = read_sysreg_s(SYS_PSTATE_PAN); 553 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); 554 555 switch (op) { 556 case OP_AT_S1E1RP: 557 fail = __kvm_at(OP_AT_S1E1RP, vaddr); 558 break; 559 case OP_AT_S1E1WP: 560 fail = __kvm_at(OP_AT_S1E1WP, vaddr); 561 break; 562 } 563 564 write_sysreg_s(host_pan, SYS_PSTATE_PAN); 565 566 return fail; 567 } 568 569 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) 570 #define MEMATTR_NC 0b0100 571 #define MEMATTR_Wt 0b1000 572 #define MEMATTR_Wb 0b1100 573 #define MEMATTR_WbRaWa 0b1111 574 575 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) 576 577 static u8 s2_memattr_to_attr(u8 memattr) 578 { 579 memattr &= 0b1111; 580 581 switch (memattr) { 582 case 0b0000: 583 case 0b0001: 584 case 0b0010: 585 case 0b0011: 586 return memattr << 2; 587 case 0b0100: 588 return MEMATTR(Wb, Wb); 589 case 0b0101: 590 return MEMATTR(NC, NC); 591 case 0b0110: 592 return MEMATTR(Wt, NC); 593 case 0b0111: 594 return MEMATTR(Wb, NC); 595 case 0b1000: 596 /* Reserved, assume NC */ 597 return MEMATTR(NC, NC); 598 case 0b1001: 599 return MEMATTR(NC, Wt); 600 case 0b1010: 601 return MEMATTR(Wt, Wt); 602 case 0b1011: 603 return MEMATTR(Wb, Wt); 604 case 0b1100: 605 /* Reserved, assume NC */ 606 return MEMATTR(NC, NC); 607 case 0b1101: 608 return MEMATTR(NC, Wb); 609 case 0b1110: 610 return MEMATTR(Wt, Wb); 611 case 0b1111: 612 return MEMATTR(Wb, Wb); 613 default: 614 unreachable(); 615 } 616 } 617 618 static u8 combine_s1_s2_attr(u8 s1, u8 s2) 619 { 620 bool transient; 621 u8 final = 0; 622 623 /* Upgrade transient s1 to non-transient to simplify things */ 624 switch (s1) { 625 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ 626 transient = true; 627 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); 628 break; 629 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ 630 transient = true; 631 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); 632 break; 633 default: 634 transient = false; 635 } 636 637 /* S2CombineS1AttrHints() */ 638 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || 639 (s2 & GENMASK(3, 2)) == MEMATTR_NC) 640 final = MEMATTR_NC; 641 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || 642 (s2 & GENMASK(3, 2)) == MEMATTR_Wt) 643 final = MEMATTR_Wt; 644 else 645 final = MEMATTR_Wb; 646 647 if (final != MEMATTR_NC) { 648 /* Inherit RaWa hints form S1 */ 649 if (transient) { 650 switch (s1 & GENMASK(3, 2)) { 651 case MEMATTR_Wt: 652 final = 0; 653 break; 654 case MEMATTR_Wb: 655 final = MEMATTR_NC; 656 break; 657 } 658 } 659 660 final |= s1 & GENMASK(1, 0); 661 } 662 663 return final; 664 } 665 666 #define ATTR_NSH 0b00 667 #define ATTR_RSV 0b01 668 #define ATTR_OSH 0b10 669 #define ATTR_ISH 0b11 670 671 static u8 compute_sh(u8 attr, u64 desc) 672 { 673 u8 sh; 674 675 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 676 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 677 return ATTR_OSH; 678 679 sh = FIELD_GET(PTE_SHARED, desc); 680 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 681 sh = ATTR_NSH; 682 683 return sh; 684 } 685 686 static u8 combine_sh(u8 s1_sh, u8 s2_sh) 687 { 688 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) 689 return ATTR_OSH; 690 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) 691 return ATTR_ISH; 692 693 return ATTR_NSH; 694 } 695 696 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 697 struct kvm_s2_trans *tr) 698 { 699 u8 s1_parattr, s2_memattr, final_attr; 700 u64 par; 701 702 /* If S2 has failed to translate, report the damage */ 703 if (tr->esr) { 704 par = SYS_PAR_EL1_RES1; 705 par |= SYS_PAR_EL1_F; 706 par |= SYS_PAR_EL1_S; 707 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); 708 return par; 709 } 710 711 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); 712 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); 713 714 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { 715 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) 716 s2_memattr &= ~BIT(3); 717 718 /* Combination of R_VRJSW and R_RHWZM */ 719 switch (s2_memattr) { 720 case 0b0101: 721 if (MEMATTR_IS_DEVICE(s1_parattr)) 722 final_attr = s1_parattr; 723 else 724 final_attr = MEMATTR(NC, NC); 725 break; 726 case 0b0110: 727 case 0b1110: 728 final_attr = MEMATTR(WbRaWa, WbRaWa); 729 break; 730 case 0b0111: 731 case 0b1111: 732 /* Preserve S1 attribute */ 733 final_attr = s1_parattr; 734 break; 735 case 0b0100: 736 case 0b1100: 737 case 0b1101: 738 /* Reserved, do something non-silly */ 739 final_attr = s1_parattr; 740 break; 741 default: 742 /* 743 * MemAttr[2]=0, Device from S2. 744 * 745 * FWB does not influence the way that stage 1 746 * memory types and attributes are combined 747 * with stage 2 Device type and attributes. 748 */ 749 final_attr = min(s2_memattr_to_attr(s2_memattr), 750 s1_parattr); 751 } 752 } else { 753 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ 754 u8 s2_parattr = s2_memattr_to_attr(s2_memattr); 755 756 if (MEMATTR_IS_DEVICE(s1_parattr) || 757 MEMATTR_IS_DEVICE(s2_parattr)) { 758 final_attr = min(s1_parattr, s2_parattr); 759 } else { 760 /* At this stage, this is memory vs memory */ 761 final_attr = combine_s1_s2_attr(s1_parattr & 0xf, 762 s2_parattr & 0xf); 763 final_attr |= combine_s1_s2_attr(s1_parattr >> 4, 764 s2_parattr >> 4) << 4; 765 } 766 } 767 768 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && 769 !MEMATTR_IS_DEVICE(final_attr)) 770 final_attr = MEMATTR(NC, NC); 771 772 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 773 par |= tr->output & GENMASK(47, 12); 774 par |= FIELD_PREP(SYS_PAR_EL1_SH, 775 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 776 compute_sh(final_attr, tr->desc))); 777 778 return par; 779 } 780 781 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, 782 enum trans_regime regime) 783 { 784 u64 par; 785 786 if (wr->failed) { 787 par = SYS_PAR_EL1_RES1; 788 par |= SYS_PAR_EL1_F; 789 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); 790 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; 791 par |= wr->s2 ? SYS_PAR_EL1_S : 0; 792 } else if (wr->level == S1_MMU_DISABLED) { 793 /* MMU off or HCR_EL2.DC == 1 */ 794 par = SYS_PAR_EL1_NSE; 795 par |= wr->pa & GENMASK_ULL(47, 12); 796 797 if (regime == TR_EL10 && 798 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 799 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 800 MEMATTR(WbRaWa, WbRaWa)); 801 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); 802 } else { 803 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ 804 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); 805 } 806 } else { 807 u64 mair, sctlr; 808 u8 sh; 809 810 par = SYS_PAR_EL1_NSE; 811 812 mair = (regime == TR_EL10 ? 813 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 814 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 815 816 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 817 mair &= 0xff; 818 819 sctlr = (regime == TR_EL10 ? 820 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 821 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 822 823 /* Force NC for memory if SCTLR_ELx.C is clear */ 824 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) 825 mair = MEMATTR(NC, NC); 826 827 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 828 par |= wr->pa & GENMASK_ULL(47, 12); 829 830 sh = compute_sh(mair, wr->desc); 831 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 832 } 833 834 return par; 835 } 836 837 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 838 { 839 u64 sctlr; 840 841 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) 842 return false; 843 844 if (s1pie_enabled(vcpu, regime)) 845 return true; 846 847 if (regime == TR_EL10) 848 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 849 else 850 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 851 852 return sctlr & SCTLR_EL1_EPAN; 853 } 854 855 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, 856 struct s1_walk_info *wi, 857 struct s1_walk_result *wr) 858 { 859 bool wxn; 860 861 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ 862 if (wi->regime != TR_EL2) { 863 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { 864 case 0b00: 865 wr->pr = wr->pw = true; 866 wr->ur = wr->uw = false; 867 break; 868 case 0b01: 869 wr->pr = wr->pw = wr->ur = wr->uw = true; 870 break; 871 case 0b10: 872 wr->pr = true; 873 wr->pw = wr->ur = wr->uw = false; 874 break; 875 case 0b11: 876 wr->pr = wr->ur = true; 877 wr->pw = wr->uw = false; 878 break; 879 } 880 881 /* We don't use px for anything yet, but hey... */ 882 wr->px = !((wr->desc & PTE_PXN) || wr->uw); 883 wr->ux = !(wr->desc & PTE_UXN); 884 } else { 885 wr->ur = wr->uw = wr->ux = false; 886 887 if (!(wr->desc & PTE_RDONLY)) { 888 wr->pr = wr->pw = true; 889 } else { 890 wr->pr = true; 891 wr->pw = false; 892 } 893 894 /* XN maps to UXN */ 895 wr->px = !(wr->desc & PTE_UXN); 896 } 897 898 switch (wi->regime) { 899 case TR_EL2: 900 case TR_EL20: 901 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); 902 break; 903 case TR_EL10: 904 wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 905 break; 906 } 907 908 wr->pwxn = wr->uwxn = wxn; 909 wr->pov = wi->poe; 910 wr->uov = wi->e0poe; 911 } 912 913 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, 914 struct s1_walk_info *wi, 915 struct s1_walk_result *wr) 916 { 917 /* Hierarchical part of AArch64.S1DirectBasePermissions() */ 918 if (wi->regime != TR_EL2) { 919 switch (wr->APTable) { 920 case 0b00: 921 break; 922 case 0b01: 923 wr->ur = wr->uw = false; 924 break; 925 case 0b10: 926 wr->pw = wr->uw = false; 927 break; 928 case 0b11: 929 wr->pw = wr->ur = wr->uw = false; 930 break; 931 } 932 933 wr->px &= !wr->PXNTable; 934 wr->ux &= !wr->UXNTable; 935 } else { 936 if (wr->APTable & BIT(1)) 937 wr->pw = false; 938 939 /* XN maps to UXN */ 940 wr->px &= !wr->UXNTable; 941 } 942 } 943 944 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf) 945 946 #define set_priv_perms(wr, r, w, x) \ 947 do { \ 948 (wr)->pr = (r); \ 949 (wr)->pw = (w); \ 950 (wr)->px = (x); \ 951 } while (0) 952 953 #define set_unpriv_perms(wr, r, w, x) \ 954 do { \ 955 (wr)->ur = (r); \ 956 (wr)->uw = (w); \ 957 (wr)->ux = (x); \ 958 } while (0) 959 960 #define set_priv_wxn(wr, v) \ 961 do { \ 962 (wr)->pwxn = (v); \ 963 } while (0) 964 965 #define set_unpriv_wxn(wr, v) \ 966 do { \ 967 (wr)->uwxn = (v); \ 968 } while (0) 969 970 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ 971 #define set_perms(w, wr, ip) \ 972 do { \ 973 /* R_LLZDZ */ \ 974 switch ((ip)) { \ 975 case 0b0000: \ 976 set_ ## w ## _perms((wr), false, false, false); \ 977 break; \ 978 case 0b0001: \ 979 set_ ## w ## _perms((wr), true , false, false); \ 980 break; \ 981 case 0b0010: \ 982 set_ ## w ## _perms((wr), false, false, true ); \ 983 break; \ 984 case 0b0011: \ 985 set_ ## w ## _perms((wr), true , false, true ); \ 986 break; \ 987 case 0b0100: \ 988 set_ ## w ## _perms((wr), false, false, false); \ 989 break; \ 990 case 0b0101: \ 991 set_ ## w ## _perms((wr), true , true , false); \ 992 break; \ 993 case 0b0110: \ 994 set_ ## w ## _perms((wr), true , true , true ); \ 995 break; \ 996 case 0b0111: \ 997 set_ ## w ## _perms((wr), true , true , true ); \ 998 break; \ 999 case 0b1000: \ 1000 set_ ## w ## _perms((wr), true , false, false); \ 1001 break; \ 1002 case 0b1001: \ 1003 set_ ## w ## _perms((wr), true , false, false); \ 1004 break; \ 1005 case 0b1010: \ 1006 set_ ## w ## _perms((wr), true , false, true ); \ 1007 break; \ 1008 case 0b1011: \ 1009 set_ ## w ## _perms((wr), false, false, false); \ 1010 break; \ 1011 case 0b1100: \ 1012 set_ ## w ## _perms((wr), true , true , false); \ 1013 break; \ 1014 case 0b1101: \ 1015 set_ ## w ## _perms((wr), false, false, false); \ 1016 break; \ 1017 case 0b1110: \ 1018 set_ ## w ## _perms((wr), true , true , true ); \ 1019 break; \ 1020 case 0b1111: \ 1021 set_ ## w ## _perms((wr), false, false, false); \ 1022 break; \ 1023 } \ 1024 \ 1025 /* R_HJYGR */ \ 1026 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \ 1027 \ 1028 } while (0) 1029 1030 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, 1031 struct s1_walk_info *wi, 1032 struct s1_walk_result *wr) 1033 { 1034 u8 up, pp, idx; 1035 1036 idx = pte_pi_index(wr->desc); 1037 1038 switch (wi->regime) { 1039 case TR_EL10: 1040 pp = perm_idx(vcpu, PIR_EL1, idx); 1041 up = perm_idx(vcpu, PIRE0_EL1, idx); 1042 break; 1043 case TR_EL20: 1044 pp = perm_idx(vcpu, PIR_EL2, idx); 1045 up = perm_idx(vcpu, PIRE0_EL2, idx); 1046 break; 1047 case TR_EL2: 1048 pp = perm_idx(vcpu, PIR_EL2, idx); 1049 up = 0; 1050 break; 1051 } 1052 1053 set_perms(priv, wr, pp); 1054 1055 if (wi->regime != TR_EL2) 1056 set_perms(unpriv, wr, up); 1057 else 1058 set_unpriv_perms(wr, false, false, false); 1059 1060 wr->pov = wi->poe && !(pp & BIT(3)); 1061 wr->uov = wi->e0poe && !(up & BIT(3)); 1062 1063 /* R_VFPJF */ 1064 if (wr->px && wr->uw) { 1065 set_priv_perms(wr, false, false, false); 1066 set_unpriv_perms(wr, false, false, false); 1067 } 1068 } 1069 1070 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, 1071 struct s1_walk_info *wi, 1072 struct s1_walk_result *wr) 1073 { 1074 u8 idx, pov_perms, uov_perms; 1075 1076 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); 1077 1078 switch (wi->regime) { 1079 case TR_EL10: 1080 pov_perms = perm_idx(vcpu, POR_EL1, idx); 1081 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1082 break; 1083 case TR_EL20: 1084 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1085 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1086 break; 1087 case TR_EL2: 1088 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1089 uov_perms = 0; 1090 break; 1091 } 1092 1093 if (pov_perms & ~POE_RXW) 1094 pov_perms = POE_NONE; 1095 1096 if (wi->poe && wr->pov) { 1097 wr->pr &= pov_perms & POE_R; 1098 wr->px &= pov_perms & POE_X; 1099 wr->pw &= pov_perms & POE_W; 1100 } 1101 1102 if (uov_perms & ~POE_RXW) 1103 uov_perms = POE_NONE; 1104 1105 if (wi->e0poe && wr->uov) { 1106 wr->ur &= uov_perms & POE_R; 1107 wr->ux &= uov_perms & POE_X; 1108 wr->uw &= uov_perms & POE_W; 1109 } 1110 } 1111 1112 static void compute_s1_permissions(struct kvm_vcpu *vcpu, 1113 struct s1_walk_info *wi, 1114 struct s1_walk_result *wr) 1115 { 1116 bool pan; 1117 1118 if (!s1pie_enabled(vcpu, wi->regime)) 1119 compute_s1_direct_permissions(vcpu, wi, wr); 1120 else 1121 compute_s1_indirect_permissions(vcpu, wi, wr); 1122 1123 if (!wi->hpd) 1124 compute_s1_hierarchical_permissions(vcpu, wi, wr); 1125 1126 if (wi->poe || wi->e0poe) 1127 compute_s1_overlay_permissions(vcpu, wi, wr); 1128 1129 /* R_QXXPC */ 1130 if (wr->pwxn) { 1131 if (!wr->pov && wr->pw) 1132 wr->px = false; 1133 if (wr->pov && wr->px) 1134 wr->pw = false; 1135 } 1136 1137 /* R_NPBXC */ 1138 if (wr->uwxn) { 1139 if (!wr->uov && wr->uw) 1140 wr->ux = false; 1141 if (wr->uov && wr->ux) 1142 wr->uw = false; 1143 } 1144 1145 pan = wi->pan && (wr->ur || wr->uw || 1146 (pan3_enabled(vcpu, wi->regime) && wr->ux)); 1147 wr->pw &= !pan; 1148 wr->pr &= !pan; 1149 } 1150 1151 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1152 { 1153 struct s1_walk_result wr = {}; 1154 struct s1_walk_info wi = {}; 1155 bool perm_fail = false; 1156 int ret, idx; 1157 1158 ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr); 1159 if (ret) 1160 goto compute_par; 1161 1162 if (wr.level == S1_MMU_DISABLED) 1163 goto compute_par; 1164 1165 idx = srcu_read_lock(&vcpu->kvm->srcu); 1166 1167 ret = walk_s1(vcpu, &wi, &wr, vaddr); 1168 1169 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1170 1171 if (ret) 1172 goto compute_par; 1173 1174 compute_s1_permissions(vcpu, &wi, &wr); 1175 1176 switch (op) { 1177 case OP_AT_S1E1RP: 1178 case OP_AT_S1E1R: 1179 case OP_AT_S1E2R: 1180 perm_fail = !wr.pr; 1181 break; 1182 case OP_AT_S1E1WP: 1183 case OP_AT_S1E1W: 1184 case OP_AT_S1E2W: 1185 perm_fail = !wr.pw; 1186 break; 1187 case OP_AT_S1E0R: 1188 perm_fail = !wr.ur; 1189 break; 1190 case OP_AT_S1E0W: 1191 perm_fail = !wr.uw; 1192 break; 1193 case OP_AT_S1E1A: 1194 case OP_AT_S1E2A: 1195 break; 1196 default: 1197 BUG(); 1198 } 1199 1200 if (perm_fail) 1201 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); 1202 1203 compute_par: 1204 return compute_par_s1(vcpu, &wr, wi.regime); 1205 } 1206 1207 /* 1208 * Return the PAR_EL1 value as the result of a valid translation. 1209 * 1210 * If the translation is unsuccessful, the value may only contain 1211 * PAR_EL1.F, and cannot be taken at face value. It isn't an 1212 * indication of the translation having failed, only that the fast 1213 * path did not succeed, *unless* it indicates a S1 permission fault. 1214 */ 1215 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1216 { 1217 struct mmu_config config; 1218 struct kvm_s2_mmu *mmu; 1219 bool fail; 1220 u64 par; 1221 1222 par = SYS_PAR_EL1_F; 1223 1224 /* 1225 * We've trapped, so everything is live on the CPU. As we will 1226 * be switching contexts behind everybody's back, disable 1227 * interrupts while holding the mmu lock. 1228 */ 1229 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); 1230 1231 /* 1232 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 1233 * the right one (as we trapped from vEL2). If not, save the 1234 * full MMU context. 1235 */ 1236 if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) 1237 goto skip_mmu_switch; 1238 1239 /* 1240 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not 1241 * find it (recycled by another vcpu, for example). When this 1242 * happens, admit defeat immediately and use the SW (slow) path. 1243 */ 1244 mmu = lookup_s2_mmu(vcpu); 1245 if (!mmu) 1246 return par; 1247 1248 __mmu_config_save(&config); 1249 1250 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); 1251 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); 1252 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); 1253 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); 1254 if (kvm_has_tcr2(vcpu->kvm)) { 1255 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); 1256 if (kvm_has_s1pie(vcpu->kvm)) { 1257 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); 1258 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); 1259 } 1260 if (kvm_has_s1poe(vcpu->kvm)) { 1261 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR); 1262 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0); 1263 } 1264 } 1265 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); 1266 __load_stage2(mmu, mmu->arch); 1267 1268 skip_mmu_switch: 1269 /* Clear TGE, enable S2 translation, we're rolling */ 1270 write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); 1271 isb(); 1272 1273 switch (op) { 1274 case OP_AT_S1E1RP: 1275 case OP_AT_S1E1WP: 1276 fail = at_s1e1p_fast(vcpu, op, vaddr); 1277 break; 1278 case OP_AT_S1E1R: 1279 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1280 break; 1281 case OP_AT_S1E1W: 1282 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1283 break; 1284 case OP_AT_S1E0R: 1285 fail = __kvm_at(OP_AT_S1E0R, vaddr); 1286 break; 1287 case OP_AT_S1E0W: 1288 fail = __kvm_at(OP_AT_S1E0W, vaddr); 1289 break; 1290 case OP_AT_S1E1A: 1291 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1292 break; 1293 default: 1294 WARN_ON_ONCE(1); 1295 fail = true; 1296 break; 1297 } 1298 1299 if (!fail) 1300 par = read_sysreg_par(); 1301 1302 if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) 1303 __mmu_config_restore(&config); 1304 1305 return par; 1306 } 1307 1308 static bool par_check_s1_perm_fault(u64 par) 1309 { 1310 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1311 1312 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && 1313 !(par & SYS_PAR_EL1_S)); 1314 } 1315 1316 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1317 { 1318 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 1319 1320 /* 1321 * If PAR_EL1 reports that AT failed on a S1 permission fault, we 1322 * know for sure that the PTW was able to walk the S1 tables and 1323 * there's nothing else to do. 1324 * 1325 * If AT failed for any other reason, then we must walk the guest S1 1326 * to emulate the instruction. 1327 */ 1328 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1329 par = handle_at_slow(vcpu, op, vaddr); 1330 1331 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1332 } 1333 1334 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1335 { 1336 u64 par; 1337 1338 /* 1339 * We've trapped, so everything is live on the CPU. As we will be 1340 * switching context behind everybody's back, disable interrupts... 1341 */ 1342 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { 1343 u64 val, hcr; 1344 bool fail; 1345 1346 val = hcr = read_sysreg(hcr_el2); 1347 val &= ~HCR_TGE; 1348 val |= HCR_VM; 1349 1350 if (!vcpu_el2_e2h_is_set(vcpu)) 1351 val |= HCR_NV | HCR_NV1; 1352 1353 write_sysreg(val, hcr_el2); 1354 isb(); 1355 1356 par = SYS_PAR_EL1_F; 1357 1358 switch (op) { 1359 case OP_AT_S1E2R: 1360 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1361 break; 1362 case OP_AT_S1E2W: 1363 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1364 break; 1365 case OP_AT_S1E2A: 1366 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1367 break; 1368 default: 1369 WARN_ON_ONCE(1); 1370 fail = true; 1371 } 1372 1373 isb(); 1374 1375 if (!fail) 1376 par = read_sysreg_par(); 1377 1378 write_sysreg(hcr, hcr_el2); 1379 isb(); 1380 } 1381 1382 /* We failed the translation, let's replay it in slow motion */ 1383 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1384 par = handle_at_slow(vcpu, op, vaddr); 1385 1386 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1387 } 1388 1389 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1390 { 1391 struct kvm_s2_trans out = {}; 1392 u64 ipa, par; 1393 bool write; 1394 int ret; 1395 1396 /* Do the stage-1 translation */ 1397 switch (op) { 1398 case OP_AT_S12E1R: 1399 op = OP_AT_S1E1R; 1400 write = false; 1401 break; 1402 case OP_AT_S12E1W: 1403 op = OP_AT_S1E1W; 1404 write = true; 1405 break; 1406 case OP_AT_S12E0R: 1407 op = OP_AT_S1E0R; 1408 write = false; 1409 break; 1410 case OP_AT_S12E0W: 1411 op = OP_AT_S1E0W; 1412 write = true; 1413 break; 1414 default: 1415 WARN_ON_ONCE(1); 1416 return; 1417 } 1418 1419 __kvm_at_s1e01(vcpu, op, vaddr); 1420 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1421 if (par & SYS_PAR_EL1_F) 1422 return; 1423 1424 /* 1425 * If we only have a single stage of translation (E2H=0 or 1426 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. 1427 */ 1428 if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || 1429 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1430 return; 1431 1432 /* Do the stage-2 translation */ 1433 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1434 out.esr = 0; 1435 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1436 if (ret < 0) 1437 return; 1438 1439 /* Check the access permission */ 1440 if (!out.esr && 1441 ((!write && !out.readable) || (write && !out.writable))) 1442 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); 1443 1444 par = compute_par_s12(vcpu, par, &out); 1445 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1446 } 1447