1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 - Linaro Ltd 4 * Author: Jintack Lim <jintack.lim@linaro.org> 5 */ 6 7 #include <linux/kvm_host.h> 8 9 #include <asm/esr.h> 10 #include <asm/kvm_hyp.h> 11 #include <asm/kvm_mmu.h> 12 13 enum trans_regime { 14 TR_EL10, 15 TR_EL20, 16 TR_EL2, 17 }; 18 19 struct s1_walk_info { 20 u64 baddr; 21 enum trans_regime regime; 22 unsigned int max_oa_bits; 23 unsigned int pgshift; 24 unsigned int txsz; 25 int sl; 26 bool hpd; 27 bool e0poe; 28 bool poe; 29 bool pan; 30 bool be; 31 bool s2; 32 }; 33 34 struct s1_walk_result { 35 union { 36 struct { 37 u64 desc; 38 u64 pa; 39 s8 level; 40 u8 APTable; 41 bool UXNTable; 42 bool PXNTable; 43 bool uwxn; 44 bool uov; 45 bool ur; 46 bool uw; 47 bool ux; 48 bool pwxn; 49 bool pov; 50 bool pr; 51 bool pw; 52 bool px; 53 }; 54 struct { 55 u8 fst; 56 bool ptw; 57 bool s2; 58 }; 59 }; 60 bool failed; 61 }; 62 63 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) 64 { 65 wr->fst = fst; 66 wr->ptw = ptw; 67 wr->s2 = s2; 68 wr->failed = true; 69 } 70 71 #define S1_MMU_DISABLED (-127) 72 73 static int get_ia_size(struct s1_walk_info *wi) 74 { 75 return 64 - wi->txsz; 76 } 77 78 /* Return true if the IPA is out of the OA range */ 79 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 80 { 81 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 82 } 83 84 /* Return the translation regime that applies to an AT instruction */ 85 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) 86 { 87 /* 88 * We only get here from guest EL2, so the translation 89 * regime AT applies to is solely defined by {E2H,TGE}. 90 */ 91 switch (op) { 92 case OP_AT_S1E2R: 93 case OP_AT_S1E2W: 94 case OP_AT_S1E2A: 95 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 96 break; 97 default: 98 return (vcpu_el2_e2h_is_set(vcpu) && 99 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; 100 } 101 } 102 103 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 104 { 105 if (!kvm_has_s1pie(vcpu->kvm)) 106 return false; 107 108 switch (regime) { 109 case TR_EL2: 110 case TR_EL20: 111 return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE; 112 case TR_EL10: 113 return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) && 114 (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE); 115 default: 116 BUG(); 117 } 118 } 119 120 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) 121 { 122 u64 val; 123 124 if (!kvm_has_s1poe(vcpu->kvm)) { 125 wi->poe = wi->e0poe = false; 126 return; 127 } 128 129 switch (wi->regime) { 130 case TR_EL2: 131 case TR_EL20: 132 val = vcpu_read_sys_reg(vcpu, TCR2_EL2); 133 wi->poe = val & TCR2_EL2_POE; 134 wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE); 135 break; 136 case TR_EL10: 137 if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) { 138 wi->poe = wi->e0poe = false; 139 return; 140 } 141 142 val = __vcpu_sys_reg(vcpu, TCR2_EL1); 143 wi->poe = val & TCR2_EL1x_POE; 144 wi->e0poe = val & TCR2_EL1x_E0POE; 145 } 146 } 147 148 static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, 149 struct s1_walk_result *wr, u64 va) 150 { 151 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; 152 unsigned int stride, x; 153 bool va55, tbi, lva, as_el0; 154 155 hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 156 157 wi->regime = compute_translation_regime(vcpu, op); 158 as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); 159 wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) && 160 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT); 161 162 va55 = va & BIT(55); 163 164 if (wi->regime == TR_EL2 && va55) 165 goto addrsz; 166 167 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 168 169 switch (wi->regime) { 170 case TR_EL10: 171 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 172 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 173 ttbr = (va55 ? 174 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 175 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 176 break; 177 case TR_EL2: 178 case TR_EL20: 179 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 180 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 181 ttbr = (va55 ? 182 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 183 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 184 break; 185 default: 186 BUG(); 187 } 188 189 tbi = (wi->regime == TR_EL2 ? 190 FIELD_GET(TCR_EL2_TBI, tcr) : 191 (va55 ? 192 FIELD_GET(TCR_TBI1, tcr) : 193 FIELD_GET(TCR_TBI0, tcr))); 194 195 if (!tbi && (u64)sign_extend64(va, 55) != va) 196 goto addrsz; 197 198 va = (u64)sign_extend64(va, 55); 199 200 /* Let's put the MMU disabled case aside immediately */ 201 switch (wi->regime) { 202 case TR_EL10: 203 /* 204 * If dealing with the EL1&0 translation regime, 3 things 205 * can disable the S1 translation: 206 * 207 * - HCR_EL2.DC = 1 208 * - HCR_EL2.{E2H,TGE} = {0,1} 209 * - SCTLR_EL1.M = 0 210 * 211 * The TGE part is interesting. If we have decided that this 212 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or 213 * {0,x}, and we only need to test for TGE == 1. 214 */ 215 if (hcr & (HCR_DC | HCR_TGE)) { 216 wr->level = S1_MMU_DISABLED; 217 break; 218 } 219 fallthrough; 220 case TR_EL2: 221 case TR_EL20: 222 if (!(sctlr & SCTLR_ELx_M)) 223 wr->level = S1_MMU_DISABLED; 224 break; 225 } 226 227 if (wr->level == S1_MMU_DISABLED) { 228 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) 229 goto addrsz; 230 231 wr->pa = va; 232 return 0; 233 } 234 235 wi->be = sctlr & SCTLR_ELx_EE; 236 237 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); 238 wi->hpd &= (wi->regime == TR_EL2 ? 239 FIELD_GET(TCR_EL2_HPD, tcr) : 240 (va55 ? 241 FIELD_GET(TCR_HPD1, tcr) : 242 FIELD_GET(TCR_HPD0, tcr))); 243 /* R_JHSVW */ 244 wi->hpd |= s1pie_enabled(vcpu, wi->regime); 245 246 /* Do we have POE? */ 247 compute_s1poe(vcpu, wi); 248 249 /* R_BVXDG */ 250 wi->hpd |= (wi->poe || wi->e0poe); 251 252 /* Someone was silly enough to encode TG0/TG1 differently */ 253 if (va55) { 254 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 255 tg = FIELD_GET(TCR_TG1_MASK, tcr); 256 257 switch (tg << TCR_TG1_SHIFT) { 258 case TCR_TG1_4K: 259 wi->pgshift = 12; break; 260 case TCR_TG1_16K: 261 wi->pgshift = 14; break; 262 case TCR_TG1_64K: 263 default: /* IMPDEF: treat any other value as 64k */ 264 wi->pgshift = 16; break; 265 } 266 } else { 267 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 268 tg = FIELD_GET(TCR_TG0_MASK, tcr); 269 270 switch (tg << TCR_TG0_SHIFT) { 271 case TCR_TG0_4K: 272 wi->pgshift = 12; break; 273 case TCR_TG0_16K: 274 wi->pgshift = 14; break; 275 case TCR_TG0_64K: 276 default: /* IMPDEF: treat any other value as 64k */ 277 wi->pgshift = 16; break; 278 } 279 } 280 281 /* R_PLCGL, R_YXNYW */ 282 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 283 if (wi->txsz > 39) 284 goto transfault_l0; 285 } else { 286 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 287 goto transfault_l0; 288 } 289 290 /* R_GTJBY, R_SXWGM */ 291 switch (BIT(wi->pgshift)) { 292 case SZ_4K: 293 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); 294 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 295 break; 296 case SZ_16K: 297 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); 298 lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); 299 break; 300 case SZ_64K: 301 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); 302 break; 303 } 304 305 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 306 goto transfault_l0; 307 308 ia_bits = get_ia_size(wi); 309 310 /* R_YYVYV, I_THCZK */ 311 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 312 (va55 && va < GENMASK(63, ia_bits))) 313 goto transfault_l0; 314 315 /* I_ZFSYQ */ 316 if (wi->regime != TR_EL2 && 317 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 318 goto transfault_l0; 319 320 /* R_BNDVG and following statements */ 321 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 322 as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 323 goto transfault_l0; 324 325 /* AArch64.S1StartLevel() */ 326 stride = wi->pgshift - 3; 327 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 328 329 ps = (wi->regime == TR_EL2 ? 330 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 331 332 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); 333 334 /* Compute minimal alignment */ 335 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 336 337 wi->baddr = ttbr & TTBRx_EL1_BADDR; 338 339 /* R_VPBBF */ 340 if (check_output_size(wi->baddr, wi)) 341 goto addrsz; 342 343 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 344 345 return 0; 346 347 addrsz: /* Address Size Fault level 0 */ 348 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); 349 return -EFAULT; 350 351 transfault_l0: /* Translation Fault level 0 */ 352 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); 353 return -EFAULT; 354 } 355 356 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 357 struct s1_walk_result *wr, u64 va) 358 { 359 u64 va_top, va_bottom, baddr, desc; 360 int level, stride, ret; 361 362 level = wi->sl; 363 stride = wi->pgshift - 3; 364 baddr = wi->baddr; 365 366 va_top = get_ia_size(wi) - 1; 367 368 while (1) { 369 u64 index, ipa; 370 371 va_bottom = (3 - level) * stride + wi->pgshift; 372 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); 373 374 ipa = baddr | index; 375 376 if (wi->s2) { 377 struct kvm_s2_trans s2_trans = {}; 378 379 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 380 if (ret) { 381 fail_s1_walk(wr, 382 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, 383 true, true); 384 return ret; 385 } 386 387 if (!kvm_s2_trans_readable(&s2_trans)) { 388 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), 389 true, true); 390 391 return -EPERM; 392 } 393 394 ipa = kvm_s2_trans_output(&s2_trans); 395 } 396 397 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 398 if (ret) { 399 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), 400 true, false); 401 return ret; 402 } 403 404 if (wi->be) 405 desc = be64_to_cpu((__force __be64)desc); 406 else 407 desc = le64_to_cpu((__force __le64)desc); 408 409 /* Invalid descriptor */ 410 if (!(desc & BIT(0))) 411 goto transfault; 412 413 /* Block mapping, check validity down the line */ 414 if (!(desc & BIT(1))) 415 break; 416 417 /* Page mapping */ 418 if (level == 3) 419 break; 420 421 /* Table handling */ 422 if (!wi->hpd) { 423 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); 424 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); 425 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 426 } 427 428 baddr = desc & GENMASK_ULL(47, wi->pgshift); 429 430 /* Check for out-of-range OA */ 431 if (check_output_size(baddr, wi)) 432 goto addrsz; 433 434 /* Prepare for next round */ 435 va_top = va_bottom - 1; 436 level++; 437 } 438 439 /* Block mapping, check the validity of the level */ 440 if (!(desc & BIT(1))) { 441 bool valid_block = false; 442 443 switch (BIT(wi->pgshift)) { 444 case SZ_4K: 445 valid_block = level == 1 || level == 2; 446 break; 447 case SZ_16K: 448 case SZ_64K: 449 valid_block = level == 2; 450 break; 451 } 452 453 if (!valid_block) 454 goto transfault; 455 } 456 457 if (check_output_size(desc & GENMASK(47, va_bottom), wi)) 458 goto addrsz; 459 460 va_bottom += contiguous_bit_shift(desc, wi, level); 461 462 wr->failed = false; 463 wr->level = level; 464 wr->desc = desc; 465 wr->pa = desc & GENMASK(47, va_bottom); 466 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 467 468 return 0; 469 470 addrsz: 471 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); 472 return -EINVAL; 473 transfault: 474 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); 475 return -ENOENT; 476 } 477 478 struct mmu_config { 479 u64 ttbr0; 480 u64 ttbr1; 481 u64 tcr; 482 u64 mair; 483 u64 tcr2; 484 u64 pir; 485 u64 pire0; 486 u64 por_el0; 487 u64 por_el1; 488 u64 sctlr; 489 u64 vttbr; 490 u64 vtcr; 491 u64 hcr; 492 }; 493 494 static void __mmu_config_save(struct mmu_config *config) 495 { 496 config->ttbr0 = read_sysreg_el1(SYS_TTBR0); 497 config->ttbr1 = read_sysreg_el1(SYS_TTBR1); 498 config->tcr = read_sysreg_el1(SYS_TCR); 499 config->mair = read_sysreg_el1(SYS_MAIR); 500 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 501 config->tcr2 = read_sysreg_el1(SYS_TCR2); 502 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 503 config->pir = read_sysreg_el1(SYS_PIR); 504 config->pire0 = read_sysreg_el1(SYS_PIRE0); 505 } 506 if (system_supports_poe()) { 507 config->por_el1 = read_sysreg_el1(SYS_POR); 508 config->por_el0 = read_sysreg_s(SYS_POR_EL0); 509 } 510 } 511 config->sctlr = read_sysreg_el1(SYS_SCTLR); 512 config->vttbr = read_sysreg(vttbr_el2); 513 config->vtcr = read_sysreg(vtcr_el2); 514 config->hcr = read_sysreg(hcr_el2); 515 } 516 517 static void __mmu_config_restore(struct mmu_config *config) 518 { 519 write_sysreg(config->hcr, hcr_el2); 520 521 /* 522 * ARM errata 1165522 and 1530923 require TGE to be 1 before 523 * we update the guest state. 524 */ 525 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 526 527 write_sysreg_el1(config->ttbr0, SYS_TTBR0); 528 write_sysreg_el1(config->ttbr1, SYS_TTBR1); 529 write_sysreg_el1(config->tcr, SYS_TCR); 530 write_sysreg_el1(config->mair, SYS_MAIR); 531 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 532 write_sysreg_el1(config->tcr2, SYS_TCR2); 533 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 534 write_sysreg_el1(config->pir, SYS_PIR); 535 write_sysreg_el1(config->pire0, SYS_PIRE0); 536 } 537 if (system_supports_poe()) { 538 write_sysreg_el1(config->por_el1, SYS_POR); 539 write_sysreg_s(config->por_el0, SYS_POR_EL0); 540 } 541 } 542 write_sysreg_el1(config->sctlr, SYS_SCTLR); 543 write_sysreg(config->vttbr, vttbr_el2); 544 write_sysreg(config->vtcr, vtcr_el2); 545 } 546 547 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 548 { 549 u64 host_pan; 550 bool fail; 551 552 host_pan = read_sysreg_s(SYS_PSTATE_PAN); 553 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); 554 555 switch (op) { 556 case OP_AT_S1E1RP: 557 fail = __kvm_at(OP_AT_S1E1RP, vaddr); 558 break; 559 case OP_AT_S1E1WP: 560 fail = __kvm_at(OP_AT_S1E1WP, vaddr); 561 break; 562 } 563 564 write_sysreg_s(host_pan, SYS_PSTATE_PAN); 565 566 return fail; 567 } 568 569 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) 570 #define MEMATTR_NC 0b0100 571 #define MEMATTR_Wt 0b1000 572 #define MEMATTR_Wb 0b1100 573 #define MEMATTR_WbRaWa 0b1111 574 575 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) 576 577 static u8 s2_memattr_to_attr(u8 memattr) 578 { 579 memattr &= 0b1111; 580 581 switch (memattr) { 582 case 0b0000: 583 case 0b0001: 584 case 0b0010: 585 case 0b0011: 586 return memattr << 2; 587 case 0b0100: 588 return MEMATTR(Wb, Wb); 589 case 0b0101: 590 return MEMATTR(NC, NC); 591 case 0b0110: 592 return MEMATTR(Wt, NC); 593 case 0b0111: 594 return MEMATTR(Wb, NC); 595 case 0b1000: 596 /* Reserved, assume NC */ 597 return MEMATTR(NC, NC); 598 case 0b1001: 599 return MEMATTR(NC, Wt); 600 case 0b1010: 601 return MEMATTR(Wt, Wt); 602 case 0b1011: 603 return MEMATTR(Wb, Wt); 604 case 0b1100: 605 /* Reserved, assume NC */ 606 return MEMATTR(NC, NC); 607 case 0b1101: 608 return MEMATTR(NC, Wb); 609 case 0b1110: 610 return MEMATTR(Wt, Wb); 611 case 0b1111: 612 return MEMATTR(Wb, Wb); 613 default: 614 unreachable(); 615 } 616 } 617 618 static u8 combine_s1_s2_attr(u8 s1, u8 s2) 619 { 620 bool transient; 621 u8 final = 0; 622 623 /* Upgrade transient s1 to non-transient to simplify things */ 624 switch (s1) { 625 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ 626 transient = true; 627 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); 628 break; 629 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ 630 transient = true; 631 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); 632 break; 633 default: 634 transient = false; 635 } 636 637 /* S2CombineS1AttrHints() */ 638 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || 639 (s2 & GENMASK(3, 2)) == MEMATTR_NC) 640 final = MEMATTR_NC; 641 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || 642 (s2 & GENMASK(3, 2)) == MEMATTR_Wt) 643 final = MEMATTR_Wt; 644 else 645 final = MEMATTR_Wb; 646 647 if (final != MEMATTR_NC) { 648 /* Inherit RaWa hints form S1 */ 649 if (transient) { 650 switch (s1 & GENMASK(3, 2)) { 651 case MEMATTR_Wt: 652 final = 0; 653 break; 654 case MEMATTR_Wb: 655 final = MEMATTR_NC; 656 break; 657 } 658 } 659 660 final |= s1 & GENMASK(1, 0); 661 } 662 663 return final; 664 } 665 666 #define ATTR_NSH 0b00 667 #define ATTR_RSV 0b01 668 #define ATTR_OSH 0b10 669 #define ATTR_ISH 0b11 670 671 static u8 compute_sh(u8 attr, u64 desc) 672 { 673 u8 sh; 674 675 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 676 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 677 return ATTR_OSH; 678 679 sh = FIELD_GET(PTE_SHARED, desc); 680 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 681 sh = ATTR_NSH; 682 683 return sh; 684 } 685 686 static u8 combine_sh(u8 s1_sh, u8 s2_sh) 687 { 688 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) 689 return ATTR_OSH; 690 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) 691 return ATTR_ISH; 692 693 return ATTR_NSH; 694 } 695 696 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 697 struct kvm_s2_trans *tr) 698 { 699 u8 s1_parattr, s2_memattr, final_attr; 700 u64 par; 701 702 /* If S2 has failed to translate, report the damage */ 703 if (tr->esr) { 704 par = SYS_PAR_EL1_RES1; 705 par |= SYS_PAR_EL1_F; 706 par |= SYS_PAR_EL1_S; 707 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); 708 return par; 709 } 710 711 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); 712 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); 713 714 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { 715 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) 716 s2_memattr &= ~BIT(3); 717 718 /* Combination of R_VRJSW and R_RHWZM */ 719 switch (s2_memattr) { 720 case 0b0101: 721 if (MEMATTR_IS_DEVICE(s1_parattr)) 722 final_attr = s1_parattr; 723 else 724 final_attr = MEMATTR(NC, NC); 725 break; 726 case 0b0110: 727 case 0b1110: 728 final_attr = MEMATTR(WbRaWa, WbRaWa); 729 break; 730 case 0b0111: 731 case 0b1111: 732 /* Preserve S1 attribute */ 733 final_attr = s1_parattr; 734 break; 735 case 0b0100: 736 case 0b1100: 737 case 0b1101: 738 /* Reserved, do something non-silly */ 739 final_attr = s1_parattr; 740 break; 741 default: 742 /* MemAttr[2]=0, Device from S2 */ 743 final_attr = s2_memattr & GENMASK(1,0) << 2; 744 } 745 } else { 746 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ 747 u8 s2_parattr = s2_memattr_to_attr(s2_memattr); 748 749 if (MEMATTR_IS_DEVICE(s1_parattr) || 750 MEMATTR_IS_DEVICE(s2_parattr)) { 751 final_attr = min(s1_parattr, s2_parattr); 752 } else { 753 /* At this stage, this is memory vs memory */ 754 final_attr = combine_s1_s2_attr(s1_parattr & 0xf, 755 s2_parattr & 0xf); 756 final_attr |= combine_s1_s2_attr(s1_parattr >> 4, 757 s2_parattr >> 4) << 4; 758 } 759 } 760 761 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && 762 !MEMATTR_IS_DEVICE(final_attr)) 763 final_attr = MEMATTR(NC, NC); 764 765 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 766 par |= tr->output & GENMASK(47, 12); 767 par |= FIELD_PREP(SYS_PAR_EL1_SH, 768 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 769 compute_sh(final_attr, tr->desc))); 770 771 return par; 772 } 773 774 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, 775 enum trans_regime regime) 776 { 777 u64 par; 778 779 if (wr->failed) { 780 par = SYS_PAR_EL1_RES1; 781 par |= SYS_PAR_EL1_F; 782 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); 783 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; 784 par |= wr->s2 ? SYS_PAR_EL1_S : 0; 785 } else if (wr->level == S1_MMU_DISABLED) { 786 /* MMU off or HCR_EL2.DC == 1 */ 787 par = SYS_PAR_EL1_NSE; 788 par |= wr->pa & GENMASK_ULL(47, 12); 789 790 if (regime == TR_EL10 && 791 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 792 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 793 MEMATTR(WbRaWa, WbRaWa)); 794 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); 795 } else { 796 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ 797 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); 798 } 799 } else { 800 u64 mair, sctlr; 801 u8 sh; 802 803 par = SYS_PAR_EL1_NSE; 804 805 mair = (regime == TR_EL10 ? 806 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 807 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 808 809 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 810 mair &= 0xff; 811 812 sctlr = (regime == TR_EL10 ? 813 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 814 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 815 816 /* Force NC for memory if SCTLR_ELx.C is clear */ 817 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) 818 mair = MEMATTR(NC, NC); 819 820 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 821 par |= wr->pa & GENMASK_ULL(47, 12); 822 823 sh = compute_sh(mair, wr->desc); 824 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 825 } 826 827 return par; 828 } 829 830 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 831 { 832 u64 sctlr; 833 834 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) 835 return false; 836 837 if (s1pie_enabled(vcpu, regime)) 838 return true; 839 840 if (regime == TR_EL10) 841 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 842 else 843 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 844 845 return sctlr & SCTLR_EL1_EPAN; 846 } 847 848 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, 849 struct s1_walk_info *wi, 850 struct s1_walk_result *wr) 851 { 852 bool wxn; 853 854 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ 855 if (wi->regime != TR_EL2) { 856 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { 857 case 0b00: 858 wr->pr = wr->pw = true; 859 wr->ur = wr->uw = false; 860 break; 861 case 0b01: 862 wr->pr = wr->pw = wr->ur = wr->uw = true; 863 break; 864 case 0b10: 865 wr->pr = true; 866 wr->pw = wr->ur = wr->uw = false; 867 break; 868 case 0b11: 869 wr->pr = wr->ur = true; 870 wr->pw = wr->uw = false; 871 break; 872 } 873 874 /* We don't use px for anything yet, but hey... */ 875 wr->px = !((wr->desc & PTE_PXN) || wr->uw); 876 wr->ux = !(wr->desc & PTE_UXN); 877 } else { 878 wr->ur = wr->uw = wr->ux = false; 879 880 if (!(wr->desc & PTE_RDONLY)) { 881 wr->pr = wr->pw = true; 882 } else { 883 wr->pr = true; 884 wr->pw = false; 885 } 886 887 /* XN maps to UXN */ 888 wr->px = !(wr->desc & PTE_UXN); 889 } 890 891 switch (wi->regime) { 892 case TR_EL2: 893 case TR_EL20: 894 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); 895 break; 896 case TR_EL10: 897 wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 898 break; 899 } 900 901 wr->pwxn = wr->uwxn = wxn; 902 wr->pov = wi->poe; 903 wr->uov = wi->e0poe; 904 } 905 906 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, 907 struct s1_walk_info *wi, 908 struct s1_walk_result *wr) 909 { 910 /* Hierarchical part of AArch64.S1DirectBasePermissions() */ 911 if (wi->regime != TR_EL2) { 912 switch (wr->APTable) { 913 case 0b00: 914 break; 915 case 0b01: 916 wr->ur = wr->uw = false; 917 break; 918 case 0b10: 919 wr->pw = wr->uw = false; 920 break; 921 case 0b11: 922 wr->pw = wr->ur = wr->uw = false; 923 break; 924 } 925 926 wr->px &= !wr->PXNTable; 927 wr->ux &= !wr->UXNTable; 928 } else { 929 if (wr->APTable & BIT(1)) 930 wr->pw = false; 931 932 /* XN maps to UXN */ 933 wr->px &= !wr->UXNTable; 934 } 935 } 936 937 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf) 938 939 #define set_priv_perms(wr, r, w, x) \ 940 do { \ 941 (wr)->pr = (r); \ 942 (wr)->pw = (w); \ 943 (wr)->px = (x); \ 944 } while (0) 945 946 #define set_unpriv_perms(wr, r, w, x) \ 947 do { \ 948 (wr)->ur = (r); \ 949 (wr)->uw = (w); \ 950 (wr)->ux = (x); \ 951 } while (0) 952 953 #define set_priv_wxn(wr, v) \ 954 do { \ 955 (wr)->pwxn = (v); \ 956 } while (0) 957 958 #define set_unpriv_wxn(wr, v) \ 959 do { \ 960 (wr)->uwxn = (v); \ 961 } while (0) 962 963 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ 964 #define set_perms(w, wr, ip) \ 965 do { \ 966 /* R_LLZDZ */ \ 967 switch ((ip)) { \ 968 case 0b0000: \ 969 set_ ## w ## _perms((wr), false, false, false); \ 970 break; \ 971 case 0b0001: \ 972 set_ ## w ## _perms((wr), true , false, false); \ 973 break; \ 974 case 0b0010: \ 975 set_ ## w ## _perms((wr), false, false, true ); \ 976 break; \ 977 case 0b0011: \ 978 set_ ## w ## _perms((wr), true , false, true ); \ 979 break; \ 980 case 0b0100: \ 981 set_ ## w ## _perms((wr), false, false, false); \ 982 break; \ 983 case 0b0101: \ 984 set_ ## w ## _perms((wr), true , true , false); \ 985 break; \ 986 case 0b0110: \ 987 set_ ## w ## _perms((wr), true , true , true ); \ 988 break; \ 989 case 0b0111: \ 990 set_ ## w ## _perms((wr), true , true , true ); \ 991 break; \ 992 case 0b1000: \ 993 set_ ## w ## _perms((wr), true , false, false); \ 994 break; \ 995 case 0b1001: \ 996 set_ ## w ## _perms((wr), true , false, false); \ 997 break; \ 998 case 0b1010: \ 999 set_ ## w ## _perms((wr), true , false, true ); \ 1000 break; \ 1001 case 0b1011: \ 1002 set_ ## w ## _perms((wr), false, false, false); \ 1003 break; \ 1004 case 0b1100: \ 1005 set_ ## w ## _perms((wr), true , true , false); \ 1006 break; \ 1007 case 0b1101: \ 1008 set_ ## w ## _perms((wr), false, false, false); \ 1009 break; \ 1010 case 0b1110: \ 1011 set_ ## w ## _perms((wr), true , true , true ); \ 1012 break; \ 1013 case 0b1111: \ 1014 set_ ## w ## _perms((wr), false, false, false); \ 1015 break; \ 1016 } \ 1017 \ 1018 /* R_HJYGR */ \ 1019 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \ 1020 \ 1021 } while (0) 1022 1023 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, 1024 struct s1_walk_info *wi, 1025 struct s1_walk_result *wr) 1026 { 1027 u8 up, pp, idx; 1028 1029 idx = pte_pi_index(wr->desc); 1030 1031 switch (wi->regime) { 1032 case TR_EL10: 1033 pp = perm_idx(vcpu, PIR_EL1, idx); 1034 up = perm_idx(vcpu, PIRE0_EL1, idx); 1035 break; 1036 case TR_EL20: 1037 pp = perm_idx(vcpu, PIR_EL2, idx); 1038 up = perm_idx(vcpu, PIRE0_EL2, idx); 1039 break; 1040 case TR_EL2: 1041 pp = perm_idx(vcpu, PIR_EL2, idx); 1042 up = 0; 1043 break; 1044 } 1045 1046 set_perms(priv, wr, pp); 1047 1048 if (wi->regime != TR_EL2) 1049 set_perms(unpriv, wr, up); 1050 else 1051 set_unpriv_perms(wr, false, false, false); 1052 1053 wr->pov = wi->poe && !(pp & BIT(3)); 1054 wr->uov = wi->e0poe && !(up & BIT(3)); 1055 1056 /* R_VFPJF */ 1057 if (wr->px && wr->uw) { 1058 set_priv_perms(wr, false, false, false); 1059 set_unpriv_perms(wr, false, false, false); 1060 } 1061 } 1062 1063 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, 1064 struct s1_walk_info *wi, 1065 struct s1_walk_result *wr) 1066 { 1067 u8 idx, pov_perms, uov_perms; 1068 1069 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); 1070 1071 switch (wi->regime) { 1072 case TR_EL10: 1073 pov_perms = perm_idx(vcpu, POR_EL1, idx); 1074 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1075 break; 1076 case TR_EL20: 1077 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1078 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1079 break; 1080 case TR_EL2: 1081 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1082 uov_perms = 0; 1083 break; 1084 } 1085 1086 if (pov_perms & ~POE_RXW) 1087 pov_perms = POE_NONE; 1088 1089 if (wi->poe && wr->pov) { 1090 wr->pr &= pov_perms & POE_R; 1091 wr->px &= pov_perms & POE_X; 1092 wr->pw &= pov_perms & POE_W; 1093 } 1094 1095 if (uov_perms & ~POE_RXW) 1096 uov_perms = POE_NONE; 1097 1098 if (wi->e0poe && wr->uov) { 1099 wr->ur &= uov_perms & POE_R; 1100 wr->ux &= uov_perms & POE_X; 1101 wr->uw &= uov_perms & POE_W; 1102 } 1103 } 1104 1105 static void compute_s1_permissions(struct kvm_vcpu *vcpu, 1106 struct s1_walk_info *wi, 1107 struct s1_walk_result *wr) 1108 { 1109 bool pan; 1110 1111 if (!s1pie_enabled(vcpu, wi->regime)) 1112 compute_s1_direct_permissions(vcpu, wi, wr); 1113 else 1114 compute_s1_indirect_permissions(vcpu, wi, wr); 1115 1116 if (!wi->hpd) 1117 compute_s1_hierarchical_permissions(vcpu, wi, wr); 1118 1119 if (wi->poe || wi->e0poe) 1120 compute_s1_overlay_permissions(vcpu, wi, wr); 1121 1122 /* R_QXXPC */ 1123 if (wr->pwxn) { 1124 if (!wr->pov && wr->pw) 1125 wr->px = false; 1126 if (wr->pov && wr->px) 1127 wr->pw = false; 1128 } 1129 1130 /* R_NPBXC */ 1131 if (wr->uwxn) { 1132 if (!wr->uov && wr->uw) 1133 wr->ux = false; 1134 if (wr->uov && wr->ux) 1135 wr->uw = false; 1136 } 1137 1138 pan = wi->pan && (wr->ur || wr->uw || 1139 (pan3_enabled(vcpu, wi->regime) && wr->ux)); 1140 wr->pw &= !pan; 1141 wr->pr &= !pan; 1142 } 1143 1144 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1145 { 1146 struct s1_walk_result wr = {}; 1147 struct s1_walk_info wi = {}; 1148 bool perm_fail = false; 1149 int ret, idx; 1150 1151 ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr); 1152 if (ret) 1153 goto compute_par; 1154 1155 if (wr.level == S1_MMU_DISABLED) 1156 goto compute_par; 1157 1158 idx = srcu_read_lock(&vcpu->kvm->srcu); 1159 1160 ret = walk_s1(vcpu, &wi, &wr, vaddr); 1161 1162 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1163 1164 if (ret) 1165 goto compute_par; 1166 1167 compute_s1_permissions(vcpu, &wi, &wr); 1168 1169 switch (op) { 1170 case OP_AT_S1E1RP: 1171 case OP_AT_S1E1R: 1172 case OP_AT_S1E2R: 1173 perm_fail = !wr.pr; 1174 break; 1175 case OP_AT_S1E1WP: 1176 case OP_AT_S1E1W: 1177 case OP_AT_S1E2W: 1178 perm_fail = !wr.pw; 1179 break; 1180 case OP_AT_S1E0R: 1181 perm_fail = !wr.ur; 1182 break; 1183 case OP_AT_S1E0W: 1184 perm_fail = !wr.uw; 1185 break; 1186 case OP_AT_S1E1A: 1187 case OP_AT_S1E2A: 1188 break; 1189 default: 1190 BUG(); 1191 } 1192 1193 if (perm_fail) 1194 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); 1195 1196 compute_par: 1197 return compute_par_s1(vcpu, &wr, wi.regime); 1198 } 1199 1200 /* 1201 * Return the PAR_EL1 value as the result of a valid translation. 1202 * 1203 * If the translation is unsuccessful, the value may only contain 1204 * PAR_EL1.F, and cannot be taken at face value. It isn't an 1205 * indication of the translation having failed, only that the fast 1206 * path did not succeed, *unless* it indicates a S1 permission fault. 1207 */ 1208 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1209 { 1210 struct mmu_config config; 1211 struct kvm_s2_mmu *mmu; 1212 bool fail; 1213 u64 par; 1214 1215 par = SYS_PAR_EL1_F; 1216 1217 /* 1218 * We've trapped, so everything is live on the CPU. As we will 1219 * be switching contexts behind everybody's back, disable 1220 * interrupts while holding the mmu lock. 1221 */ 1222 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); 1223 1224 /* 1225 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 1226 * the right one (as we trapped from vEL2). If not, save the 1227 * full MMU context. 1228 */ 1229 if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) 1230 goto skip_mmu_switch; 1231 1232 /* 1233 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not 1234 * find it (recycled by another vcpu, for example). When this 1235 * happens, admit defeat immediately and use the SW (slow) path. 1236 */ 1237 mmu = lookup_s2_mmu(vcpu); 1238 if (!mmu) 1239 return par; 1240 1241 __mmu_config_save(&config); 1242 1243 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); 1244 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); 1245 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); 1246 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); 1247 if (kvm_has_tcr2(vcpu->kvm)) { 1248 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); 1249 if (kvm_has_s1pie(vcpu->kvm)) { 1250 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); 1251 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); 1252 } 1253 if (kvm_has_s1poe(vcpu->kvm)) { 1254 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR); 1255 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0); 1256 } 1257 } 1258 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); 1259 __load_stage2(mmu, mmu->arch); 1260 1261 skip_mmu_switch: 1262 /* Clear TGE, enable S2 translation, we're rolling */ 1263 write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); 1264 isb(); 1265 1266 switch (op) { 1267 case OP_AT_S1E1RP: 1268 case OP_AT_S1E1WP: 1269 fail = at_s1e1p_fast(vcpu, op, vaddr); 1270 break; 1271 case OP_AT_S1E1R: 1272 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1273 break; 1274 case OP_AT_S1E1W: 1275 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1276 break; 1277 case OP_AT_S1E0R: 1278 fail = __kvm_at(OP_AT_S1E0R, vaddr); 1279 break; 1280 case OP_AT_S1E0W: 1281 fail = __kvm_at(OP_AT_S1E0W, vaddr); 1282 break; 1283 case OP_AT_S1E1A: 1284 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1285 break; 1286 default: 1287 WARN_ON_ONCE(1); 1288 fail = true; 1289 break; 1290 } 1291 1292 if (!fail) 1293 par = read_sysreg_par(); 1294 1295 if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) 1296 __mmu_config_restore(&config); 1297 1298 return par; 1299 } 1300 1301 static bool par_check_s1_perm_fault(u64 par) 1302 { 1303 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1304 1305 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && 1306 !(par & SYS_PAR_EL1_S)); 1307 } 1308 1309 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1310 { 1311 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 1312 1313 /* 1314 * If PAR_EL1 reports that AT failed on a S1 permission fault, we 1315 * know for sure that the PTW was able to walk the S1 tables and 1316 * there's nothing else to do. 1317 * 1318 * If AT failed for any other reason, then we must walk the guest S1 1319 * to emulate the instruction. 1320 */ 1321 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1322 par = handle_at_slow(vcpu, op, vaddr); 1323 1324 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1325 } 1326 1327 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1328 { 1329 u64 par; 1330 1331 /* 1332 * We've trapped, so everything is live on the CPU. As we will be 1333 * switching context behind everybody's back, disable interrupts... 1334 */ 1335 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { 1336 u64 val, hcr; 1337 bool fail; 1338 1339 val = hcr = read_sysreg(hcr_el2); 1340 val &= ~HCR_TGE; 1341 val |= HCR_VM; 1342 1343 if (!vcpu_el2_e2h_is_set(vcpu)) 1344 val |= HCR_NV | HCR_NV1; 1345 1346 write_sysreg(val, hcr_el2); 1347 isb(); 1348 1349 par = SYS_PAR_EL1_F; 1350 1351 switch (op) { 1352 case OP_AT_S1E2R: 1353 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1354 break; 1355 case OP_AT_S1E2W: 1356 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1357 break; 1358 case OP_AT_S1E2A: 1359 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1360 break; 1361 default: 1362 WARN_ON_ONCE(1); 1363 fail = true; 1364 } 1365 1366 isb(); 1367 1368 if (!fail) 1369 par = read_sysreg_par(); 1370 1371 write_sysreg(hcr, hcr_el2); 1372 isb(); 1373 } 1374 1375 /* We failed the translation, let's replay it in slow motion */ 1376 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1377 par = handle_at_slow(vcpu, op, vaddr); 1378 1379 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1380 } 1381 1382 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1383 { 1384 struct kvm_s2_trans out = {}; 1385 u64 ipa, par; 1386 bool write; 1387 int ret; 1388 1389 /* Do the stage-1 translation */ 1390 switch (op) { 1391 case OP_AT_S12E1R: 1392 op = OP_AT_S1E1R; 1393 write = false; 1394 break; 1395 case OP_AT_S12E1W: 1396 op = OP_AT_S1E1W; 1397 write = true; 1398 break; 1399 case OP_AT_S12E0R: 1400 op = OP_AT_S1E0R; 1401 write = false; 1402 break; 1403 case OP_AT_S12E0W: 1404 op = OP_AT_S1E0W; 1405 write = true; 1406 break; 1407 default: 1408 WARN_ON_ONCE(1); 1409 return; 1410 } 1411 1412 __kvm_at_s1e01(vcpu, op, vaddr); 1413 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1414 if (par & SYS_PAR_EL1_F) 1415 return; 1416 1417 /* 1418 * If we only have a single stage of translation (E2H=0 or 1419 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. 1420 */ 1421 if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || 1422 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1423 return; 1424 1425 /* Do the stage-2 translation */ 1426 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1427 out.esr = 0; 1428 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1429 if (ret < 0) 1430 return; 1431 1432 /* Check the access permission */ 1433 if (!out.esr && 1434 ((!write && !out.readable) || (write && !out.writable))) 1435 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); 1436 1437 par = compute_par_s12(vcpu, par, &out); 1438 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1439 } 1440