1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 - Linaro Ltd 4 * Author: Jintack Lim <jintack.lim@linaro.org> 5 */ 6 7 #include <linux/kvm_host.h> 8 9 #include <asm/esr.h> 10 #include <asm/kvm_hyp.h> 11 #include <asm/kvm_mmu.h> 12 13 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw) 14 { 15 wr->fst = fst; 16 wr->ptw = s1ptw; 17 wr->s2 = s1ptw; 18 wr->failed = true; 19 } 20 21 #define S1_MMU_DISABLED (-127) 22 23 static int get_ia_size(struct s1_walk_info *wi) 24 { 25 return 64 - wi->txsz; 26 } 27 28 /* Return true if the IPA is out of the OA range */ 29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 30 { 31 if (wi->pa52bit) 32 return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); 33 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 34 } 35 36 static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) 37 { 38 switch (BIT(wi->pgshift)) { 39 case SZ_64K: 40 default: /* IMPDEF: treat any other value as 64k */ 41 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) 42 return false; 43 return ((wi->regime == TR_EL2 ? 44 FIELD_GET(TCR_EL2_PS_MASK, tcr) : 45 FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); 46 case SZ_16K: 47 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) 48 return false; 49 break; 50 case SZ_4K: 51 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) 52 return false; 53 break; 54 } 55 56 return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); 57 } 58 59 static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) 60 { 61 u64 addr; 62 63 if (!wi->pa52bit) 64 return desc & GENMASK_ULL(47, wi->pgshift); 65 66 switch (BIT(wi->pgshift)) { 67 case SZ_4K: 68 case SZ_16K: 69 addr = desc & GENMASK_ULL(49, wi->pgshift); 70 addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; 71 break; 72 case SZ_64K: 73 default: /* IMPDEF: treat any other value as 64k */ 74 addr = desc & GENMASK_ULL(47, wi->pgshift); 75 addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; 76 break; 77 } 78 79 return addr; 80 } 81 82 /* Return the translation regime that applies to an AT instruction */ 83 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) 84 { 85 /* 86 * We only get here from guest EL2, so the translation 87 * regime AT applies to is solely defined by {E2H,TGE}. 88 */ 89 switch (op) { 90 case OP_AT_S1E2R: 91 case OP_AT_S1E2W: 92 case OP_AT_S1E2A: 93 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 94 break; 95 default: 96 return (vcpu_el2_e2h_is_set(vcpu) && 97 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; 98 } 99 } 100 101 static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) 102 { 103 if (regime == TR_EL10) { 104 if (vcpu_has_nv(vcpu) && 105 !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) 106 return 0; 107 108 return vcpu_read_sys_reg(vcpu, TCR2_EL1); 109 } 110 111 return vcpu_read_sys_reg(vcpu, TCR2_EL2); 112 } 113 114 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 115 { 116 if (!kvm_has_s1pie(vcpu->kvm)) 117 return false; 118 119 /* Abuse TCR2_EL1_PIE and use it for EL2 as well */ 120 return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE; 121 } 122 123 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) 124 { 125 u64 val; 126 127 if (!kvm_has_s1poe(vcpu->kvm)) { 128 wi->poe = wi->e0poe = false; 129 return; 130 } 131 132 val = effective_tcr2(vcpu, wi->regime); 133 134 /* Abuse TCR2_EL1_* for EL2 */ 135 wi->poe = val & TCR2_EL1_POE; 136 wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); 137 } 138 139 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 140 struct s1_walk_result *wr, u64 va) 141 { 142 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; 143 unsigned int stride, x; 144 bool va55, tbi, lva; 145 146 va55 = va & BIT(55); 147 148 if (vcpu_has_nv(vcpu)) { 149 hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 150 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 151 } else { 152 WARN_ON_ONCE(wi->regime != TR_EL10); 153 wi->s2 = false; 154 hcr = 0; 155 } 156 157 switch (wi->regime) { 158 case TR_EL10: 159 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 160 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 161 ttbr = (va55 ? 162 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 163 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 164 break; 165 case TR_EL2: 166 case TR_EL20: 167 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 168 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 169 ttbr = (va55 ? 170 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 171 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 172 break; 173 default: 174 BUG(); 175 } 176 177 /* Someone was silly enough to encode TG0/TG1 differently */ 178 if (va55 && wi->regime != TR_EL2) { 179 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 180 tg = FIELD_GET(TCR_TG1_MASK, tcr); 181 182 switch (tg << TCR_TG1_SHIFT) { 183 case TCR_TG1_4K: 184 wi->pgshift = 12; break; 185 case TCR_TG1_16K: 186 wi->pgshift = 14; break; 187 case TCR_TG1_64K: 188 default: /* IMPDEF: treat any other value as 64k */ 189 wi->pgshift = 16; break; 190 } 191 } else { 192 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 193 tg = FIELD_GET(TCR_TG0_MASK, tcr); 194 195 switch (tg << TCR_TG0_SHIFT) { 196 case TCR_TG0_4K: 197 wi->pgshift = 12; break; 198 case TCR_TG0_16K: 199 wi->pgshift = 14; break; 200 case TCR_TG0_64K: 201 default: /* IMPDEF: treat any other value as 64k */ 202 wi->pgshift = 16; break; 203 } 204 } 205 206 wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); 207 208 ia_bits = get_ia_size(wi); 209 210 /* AArch64.S1StartLevel() */ 211 stride = wi->pgshift - 3; 212 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 213 214 if (wi->regime == TR_EL2 && va55) 215 goto addrsz; 216 217 tbi = (wi->regime == TR_EL2 ? 218 FIELD_GET(TCR_EL2_TBI, tcr) : 219 (va55 ? 220 FIELD_GET(TCR_TBI1, tcr) : 221 FIELD_GET(TCR_TBI0, tcr))); 222 223 if (!tbi && (u64)sign_extend64(va, 55) != va) 224 goto addrsz; 225 226 wi->sh = (wi->regime == TR_EL2 ? 227 FIELD_GET(TCR_EL2_SH0_MASK, tcr) : 228 (va55 ? 229 FIELD_GET(TCR_SH1_MASK, tcr) : 230 FIELD_GET(TCR_SH0_MASK, tcr))); 231 232 va = (u64)sign_extend64(va, 55); 233 234 /* Let's put the MMU disabled case aside immediately */ 235 switch (wi->regime) { 236 case TR_EL10: 237 /* 238 * If dealing with the EL1&0 translation regime, 3 things 239 * can disable the S1 translation: 240 * 241 * - HCR_EL2.DC = 1 242 * - HCR_EL2.{E2H,TGE} = {0,1} 243 * - SCTLR_EL1.M = 0 244 * 245 * The TGE part is interesting. If we have decided that this 246 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or 247 * {0,x}, and we only need to test for TGE == 1. 248 */ 249 if (hcr & (HCR_DC | HCR_TGE)) { 250 wr->level = S1_MMU_DISABLED; 251 break; 252 } 253 fallthrough; 254 case TR_EL2: 255 case TR_EL20: 256 if (!(sctlr & SCTLR_ELx_M)) 257 wr->level = S1_MMU_DISABLED; 258 break; 259 } 260 261 if (wr->level == S1_MMU_DISABLED) { 262 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) 263 goto addrsz; 264 265 wr->pa = va; 266 return 0; 267 } 268 269 wi->be = sctlr & SCTLR_ELx_EE; 270 271 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); 272 wi->hpd &= (wi->regime == TR_EL2 ? 273 FIELD_GET(TCR_EL2_HPD, tcr) : 274 (va55 ? 275 FIELD_GET(TCR_HPD1, tcr) : 276 FIELD_GET(TCR_HPD0, tcr))); 277 /* R_JHSVW */ 278 wi->hpd |= s1pie_enabled(vcpu, wi->regime); 279 280 /* Do we have POE? */ 281 compute_s1poe(vcpu, wi); 282 283 /* R_BVXDG */ 284 wi->hpd |= (wi->poe || wi->e0poe); 285 286 /* R_PLCGL, R_YXNYW */ 287 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 288 if (wi->txsz > 39) 289 goto transfault; 290 } else { 291 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 292 goto transfault; 293 } 294 295 /* R_GTJBY, R_SXWGM */ 296 switch (BIT(wi->pgshift)) { 297 case SZ_4K: 298 case SZ_16K: 299 lva = wi->pa52bit; 300 break; 301 case SZ_64K: 302 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); 303 break; 304 } 305 306 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 307 goto transfault; 308 309 /* R_YYVYV, I_THCZK */ 310 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 311 (va55 && va < GENMASK(63, ia_bits))) 312 goto transfault; 313 314 /* I_ZFSYQ */ 315 if (wi->regime != TR_EL2 && 316 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 317 goto transfault; 318 319 /* R_BNDVG and following statements */ 320 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 321 wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 322 goto transfault; 323 324 ps = (wi->regime == TR_EL2 ? 325 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 326 327 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit)); 328 329 /* Compute minimal alignment */ 330 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 331 332 wi->baddr = ttbr & TTBRx_EL1_BADDR; 333 if (wi->pa52bit) { 334 /* 335 * Force the alignment on 64 bytes for top-level tables 336 * smaller than 8 entries, since TTBR.BADDR[5:2] are used to 337 * store bits [51:48] of the first level of lookup. 338 */ 339 x = max(x, 6); 340 341 wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48; 342 } 343 344 /* R_VPBBF */ 345 if (check_output_size(wi->baddr, wi)) 346 goto addrsz; 347 348 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 349 350 return 0; 351 352 addrsz: 353 /* 354 * Address Size Fault level 0 to indicate it comes from TTBR. 355 * yes, this is an oddity. 356 */ 357 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); 358 return -EFAULT; 359 360 transfault: 361 /* Translation Fault on start level */ 362 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false); 363 return -EFAULT; 364 } 365 366 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 367 struct s1_walk_result *wr, u64 va) 368 { 369 u64 va_top, va_bottom, baddr, desc; 370 int level, stride, ret; 371 372 level = wi->sl; 373 stride = wi->pgshift - 3; 374 baddr = wi->baddr; 375 376 va_top = get_ia_size(wi) - 1; 377 378 while (1) { 379 u64 index, ipa; 380 381 va_bottom = (3 - level) * stride + wi->pgshift; 382 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); 383 384 ipa = baddr | index; 385 386 if (wi->s2) { 387 struct kvm_s2_trans s2_trans = {}; 388 389 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 390 if (ret) { 391 fail_s1_walk(wr, 392 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, 393 true); 394 return ret; 395 } 396 397 if (!kvm_s2_trans_readable(&s2_trans)) { 398 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), 399 true); 400 401 return -EPERM; 402 } 403 404 ipa = kvm_s2_trans_output(&s2_trans); 405 } 406 407 if (wi->filter) { 408 ret = wi->filter->fn(&(struct s1_walk_context) 409 { 410 .wi = wi, 411 .table_ipa = baddr, 412 .level = level, 413 }, wi->filter->priv); 414 if (ret) 415 return ret; 416 } 417 418 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 419 if (ret) { 420 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); 421 return ret; 422 } 423 424 if (wi->be) 425 desc = be64_to_cpu((__force __be64)desc); 426 else 427 desc = le64_to_cpu((__force __le64)desc); 428 429 /* Invalid descriptor */ 430 if (!(desc & BIT(0))) 431 goto transfault; 432 433 /* Block mapping, check validity down the line */ 434 if (!(desc & BIT(1))) 435 break; 436 437 /* Page mapping */ 438 if (level == 3) 439 break; 440 441 /* Table handling */ 442 if (!wi->hpd) { 443 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); 444 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); 445 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 446 } 447 448 baddr = desc_to_oa(wi, desc); 449 450 /* Check for out-of-range OA */ 451 if (check_output_size(baddr, wi)) 452 goto addrsz; 453 454 /* Prepare for next round */ 455 va_top = va_bottom - 1; 456 level++; 457 } 458 459 /* Block mapping, check the validity of the level */ 460 if (!(desc & BIT(1))) { 461 bool valid_block = false; 462 463 switch (BIT(wi->pgshift)) { 464 case SZ_4K: 465 valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); 466 break; 467 case SZ_16K: 468 case SZ_64K: 469 valid_block = level == 2 || (wi->pa52bit && level == 1); 470 break; 471 } 472 473 if (!valid_block) 474 goto transfault; 475 } 476 477 baddr = desc_to_oa(wi, desc); 478 if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) 479 goto addrsz; 480 481 if (!(desc & PTE_AF)) { 482 fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); 483 return -EACCES; 484 } 485 486 va_bottom += contiguous_bit_shift(desc, wi, level); 487 488 wr->failed = false; 489 wr->level = level; 490 wr->desc = desc; 491 wr->pa = baddr & GENMASK(52, va_bottom); 492 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 493 494 wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); 495 if (wr->nG) { 496 u64 asid_ttbr, tcr; 497 498 switch (wi->regime) { 499 case TR_EL10: 500 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 501 asid_ttbr = ((tcr & TCR_A1) ? 502 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 503 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 504 break; 505 case TR_EL20: 506 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 507 asid_ttbr = ((tcr & TCR_A1) ? 508 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 509 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 510 break; 511 default: 512 BUG(); 513 } 514 515 wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr); 516 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || 517 !(tcr & TCR_ASID16)) 518 wr->asid &= GENMASK(7, 0); 519 } 520 521 return 0; 522 523 addrsz: 524 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false); 525 return -EINVAL; 526 transfault: 527 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false); 528 return -ENOENT; 529 } 530 531 struct mmu_config { 532 u64 ttbr0; 533 u64 ttbr1; 534 u64 tcr; 535 u64 mair; 536 u64 tcr2; 537 u64 pir; 538 u64 pire0; 539 u64 por_el0; 540 u64 por_el1; 541 u64 sctlr; 542 u64 vttbr; 543 u64 vtcr; 544 }; 545 546 static void __mmu_config_save(struct mmu_config *config) 547 { 548 config->ttbr0 = read_sysreg_el1(SYS_TTBR0); 549 config->ttbr1 = read_sysreg_el1(SYS_TTBR1); 550 config->tcr = read_sysreg_el1(SYS_TCR); 551 config->mair = read_sysreg_el1(SYS_MAIR); 552 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 553 config->tcr2 = read_sysreg_el1(SYS_TCR2); 554 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 555 config->pir = read_sysreg_el1(SYS_PIR); 556 config->pire0 = read_sysreg_el1(SYS_PIRE0); 557 } 558 if (system_supports_poe()) { 559 config->por_el1 = read_sysreg_el1(SYS_POR); 560 config->por_el0 = read_sysreg_s(SYS_POR_EL0); 561 } 562 } 563 config->sctlr = read_sysreg_el1(SYS_SCTLR); 564 config->vttbr = read_sysreg(vttbr_el2); 565 config->vtcr = read_sysreg(vtcr_el2); 566 } 567 568 static void __mmu_config_restore(struct mmu_config *config) 569 { 570 /* 571 * ARM errata 1165522 and 1530923 require TGE to be 1 before 572 * we update the guest state. 573 */ 574 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 575 576 write_sysreg_el1(config->ttbr0, SYS_TTBR0); 577 write_sysreg_el1(config->ttbr1, SYS_TTBR1); 578 write_sysreg_el1(config->tcr, SYS_TCR); 579 write_sysreg_el1(config->mair, SYS_MAIR); 580 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 581 write_sysreg_el1(config->tcr2, SYS_TCR2); 582 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 583 write_sysreg_el1(config->pir, SYS_PIR); 584 write_sysreg_el1(config->pire0, SYS_PIRE0); 585 } 586 if (system_supports_poe()) { 587 write_sysreg_el1(config->por_el1, SYS_POR); 588 write_sysreg_s(config->por_el0, SYS_POR_EL0); 589 } 590 } 591 write_sysreg_el1(config->sctlr, SYS_SCTLR); 592 write_sysreg(config->vttbr, vttbr_el2); 593 write_sysreg(config->vtcr, vtcr_el2); 594 } 595 596 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 597 { 598 u64 host_pan; 599 bool fail; 600 601 host_pan = read_sysreg_s(SYS_PSTATE_PAN); 602 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); 603 604 switch (op) { 605 case OP_AT_S1E1RP: 606 fail = __kvm_at(OP_AT_S1E1RP, vaddr); 607 break; 608 case OP_AT_S1E1WP: 609 fail = __kvm_at(OP_AT_S1E1WP, vaddr); 610 break; 611 } 612 613 write_sysreg_s(host_pan, SYS_PSTATE_PAN); 614 615 return fail; 616 } 617 618 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) 619 #define MEMATTR_NC 0b0100 620 #define MEMATTR_Wt 0b1000 621 #define MEMATTR_Wb 0b1100 622 #define MEMATTR_WbRaWa 0b1111 623 624 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) 625 626 static u8 s2_memattr_to_attr(u8 memattr) 627 { 628 memattr &= 0b1111; 629 630 switch (memattr) { 631 case 0b0000: 632 case 0b0001: 633 case 0b0010: 634 case 0b0011: 635 return memattr << 2; 636 case 0b0100: 637 return MEMATTR(Wb, Wb); 638 case 0b0101: 639 return MEMATTR(NC, NC); 640 case 0b0110: 641 return MEMATTR(Wt, NC); 642 case 0b0111: 643 return MEMATTR(Wb, NC); 644 case 0b1000: 645 /* Reserved, assume NC */ 646 return MEMATTR(NC, NC); 647 case 0b1001: 648 return MEMATTR(NC, Wt); 649 case 0b1010: 650 return MEMATTR(Wt, Wt); 651 case 0b1011: 652 return MEMATTR(Wb, Wt); 653 case 0b1100: 654 /* Reserved, assume NC */ 655 return MEMATTR(NC, NC); 656 case 0b1101: 657 return MEMATTR(NC, Wb); 658 case 0b1110: 659 return MEMATTR(Wt, Wb); 660 case 0b1111: 661 return MEMATTR(Wb, Wb); 662 default: 663 unreachable(); 664 } 665 } 666 667 static u8 combine_s1_s2_attr(u8 s1, u8 s2) 668 { 669 bool transient; 670 u8 final = 0; 671 672 /* Upgrade transient s1 to non-transient to simplify things */ 673 switch (s1) { 674 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ 675 transient = true; 676 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); 677 break; 678 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ 679 transient = true; 680 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); 681 break; 682 default: 683 transient = false; 684 } 685 686 /* S2CombineS1AttrHints() */ 687 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || 688 (s2 & GENMASK(3, 2)) == MEMATTR_NC) 689 final = MEMATTR_NC; 690 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || 691 (s2 & GENMASK(3, 2)) == MEMATTR_Wt) 692 final = MEMATTR_Wt; 693 else 694 final = MEMATTR_Wb; 695 696 if (final != MEMATTR_NC) { 697 /* Inherit RaWa hints form S1 */ 698 if (transient) { 699 switch (s1 & GENMASK(3, 2)) { 700 case MEMATTR_Wt: 701 final = 0; 702 break; 703 case MEMATTR_Wb: 704 final = MEMATTR_NC; 705 break; 706 } 707 } 708 709 final |= s1 & GENMASK(1, 0); 710 } 711 712 return final; 713 } 714 715 #define ATTR_NSH 0b00 716 #define ATTR_RSV 0b01 717 #define ATTR_OSH 0b10 718 #define ATTR_ISH 0b11 719 720 static u8 compute_final_sh(u8 attr, u8 sh) 721 { 722 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 723 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 724 return ATTR_OSH; 725 726 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 727 sh = ATTR_NSH; 728 729 return sh; 730 } 731 732 static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr, 733 u8 attr) 734 { 735 u8 sh; 736 737 /* 738 * non-52bit and LPA have their basic shareability described in the 739 * descriptor. LPA2 gets it from the corresponding field in TCR, 740 * conveniently recorded in the walk info. 741 */ 742 if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K) 743 sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc); 744 else 745 sh = wi->sh; 746 747 return compute_final_sh(attr, sh); 748 } 749 750 static u8 combine_sh(u8 s1_sh, u8 s2_sh) 751 { 752 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) 753 return ATTR_OSH; 754 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) 755 return ATTR_ISH; 756 757 return ATTR_NSH; 758 } 759 760 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 761 struct kvm_s2_trans *tr) 762 { 763 u8 s1_parattr, s2_memattr, final_attr, s2_sh; 764 u64 par; 765 766 /* If S2 has failed to translate, report the damage */ 767 if (tr->esr) { 768 par = SYS_PAR_EL1_RES1; 769 par |= SYS_PAR_EL1_F; 770 par |= SYS_PAR_EL1_S; 771 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); 772 return par; 773 } 774 775 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); 776 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); 777 778 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { 779 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) 780 s2_memattr &= ~BIT(3); 781 782 /* Combination of R_VRJSW and R_RHWZM */ 783 switch (s2_memattr) { 784 case 0b0101: 785 if (MEMATTR_IS_DEVICE(s1_parattr)) 786 final_attr = s1_parattr; 787 else 788 final_attr = MEMATTR(NC, NC); 789 break; 790 case 0b0110: 791 case 0b1110: 792 final_attr = MEMATTR(WbRaWa, WbRaWa); 793 break; 794 case 0b0111: 795 case 0b1111: 796 /* Preserve S1 attribute */ 797 final_attr = s1_parattr; 798 break; 799 case 0b0100: 800 case 0b1100: 801 case 0b1101: 802 /* Reserved, do something non-silly */ 803 final_attr = s1_parattr; 804 break; 805 default: 806 /* 807 * MemAttr[2]=0, Device from S2. 808 * 809 * FWB does not influence the way that stage 1 810 * memory types and attributes are combined 811 * with stage 2 Device type and attributes. 812 */ 813 final_attr = min(s2_memattr_to_attr(s2_memattr), 814 s1_parattr); 815 } 816 } else { 817 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ 818 u8 s2_parattr = s2_memattr_to_attr(s2_memattr); 819 820 if (MEMATTR_IS_DEVICE(s1_parattr) || 821 MEMATTR_IS_DEVICE(s2_parattr)) { 822 final_attr = min(s1_parattr, s2_parattr); 823 } else { 824 /* At this stage, this is memory vs memory */ 825 final_attr = combine_s1_s2_attr(s1_parattr & 0xf, 826 s2_parattr & 0xf); 827 final_attr |= combine_s1_s2_attr(s1_parattr >> 4, 828 s2_parattr >> 4) << 4; 829 } 830 } 831 832 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && 833 !MEMATTR_IS_DEVICE(final_attr)) 834 final_attr = MEMATTR(NC, NC); 835 836 s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc); 837 838 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 839 par |= tr->output & GENMASK(47, 12); 840 par |= FIELD_PREP(SYS_PAR_EL1_SH, 841 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 842 compute_final_sh(final_attr, s2_sh))); 843 844 return par; 845 } 846 847 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 848 struct s1_walk_result *wr) 849 { 850 u64 par; 851 852 if (wr->failed) { 853 par = SYS_PAR_EL1_RES1; 854 par |= SYS_PAR_EL1_F; 855 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); 856 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; 857 par |= wr->s2 ? SYS_PAR_EL1_S : 0; 858 } else if (wr->level == S1_MMU_DISABLED) { 859 /* MMU off or HCR_EL2.DC == 1 */ 860 par = SYS_PAR_EL1_NSE; 861 par |= wr->pa & SYS_PAR_EL1_PA; 862 863 if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) && 864 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 865 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 866 MEMATTR(WbRaWa, WbRaWa)); 867 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); 868 } else { 869 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ 870 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); 871 } 872 } else { 873 u64 mair, sctlr; 874 u8 sh; 875 876 par = SYS_PAR_EL1_NSE; 877 878 mair = (wi->regime == TR_EL10 ? 879 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 880 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 881 882 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 883 mair &= 0xff; 884 885 sctlr = (wi->regime == TR_EL10 ? 886 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 887 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 888 889 /* Force NC for memory if SCTLR_ELx.C is clear */ 890 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) 891 mair = MEMATTR(NC, NC); 892 893 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 894 par |= wr->pa & SYS_PAR_EL1_PA; 895 896 sh = compute_s1_sh(wi, wr, mair); 897 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 898 } 899 900 return par; 901 } 902 903 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 904 { 905 u64 sctlr; 906 907 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) 908 return false; 909 910 if (s1pie_enabled(vcpu, regime)) 911 return true; 912 913 if (regime == TR_EL10) 914 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 915 else 916 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 917 918 return sctlr & SCTLR_EL1_EPAN; 919 } 920 921 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, 922 struct s1_walk_info *wi, 923 struct s1_walk_result *wr) 924 { 925 bool wxn; 926 927 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ 928 if (wi->regime != TR_EL2) { 929 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { 930 case 0b00: 931 wr->pr = wr->pw = true; 932 wr->ur = wr->uw = false; 933 break; 934 case 0b01: 935 wr->pr = wr->pw = wr->ur = wr->uw = true; 936 break; 937 case 0b10: 938 wr->pr = true; 939 wr->pw = wr->ur = wr->uw = false; 940 break; 941 case 0b11: 942 wr->pr = wr->ur = true; 943 wr->pw = wr->uw = false; 944 break; 945 } 946 947 /* We don't use px for anything yet, but hey... */ 948 wr->px = !((wr->desc & PTE_PXN) || wr->uw); 949 wr->ux = !(wr->desc & PTE_UXN); 950 } else { 951 wr->ur = wr->uw = wr->ux = false; 952 953 if (!(wr->desc & PTE_RDONLY)) { 954 wr->pr = wr->pw = true; 955 } else { 956 wr->pr = true; 957 wr->pw = false; 958 } 959 960 /* XN maps to UXN */ 961 wr->px = !(wr->desc & PTE_UXN); 962 } 963 964 switch (wi->regime) { 965 case TR_EL2: 966 case TR_EL20: 967 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); 968 break; 969 case TR_EL10: 970 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 971 break; 972 } 973 974 wr->pwxn = wr->uwxn = wxn; 975 wr->pov = wi->poe; 976 wr->uov = wi->e0poe; 977 } 978 979 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, 980 struct s1_walk_info *wi, 981 struct s1_walk_result *wr) 982 { 983 /* Hierarchical part of AArch64.S1DirectBasePermissions() */ 984 if (wi->regime != TR_EL2) { 985 switch (wr->APTable) { 986 case 0b00: 987 break; 988 case 0b01: 989 wr->ur = wr->uw = false; 990 break; 991 case 0b10: 992 wr->pw = wr->uw = false; 993 break; 994 case 0b11: 995 wr->pw = wr->ur = wr->uw = false; 996 break; 997 } 998 999 wr->px &= !wr->PXNTable; 1000 wr->ux &= !wr->UXNTable; 1001 } else { 1002 if (wr->APTable & BIT(1)) 1003 wr->pw = false; 1004 1005 /* XN maps to UXN */ 1006 wr->px &= !wr->UXNTable; 1007 } 1008 } 1009 1010 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf) 1011 1012 #define set_priv_perms(wr, r, w, x) \ 1013 do { \ 1014 (wr)->pr = (r); \ 1015 (wr)->pw = (w); \ 1016 (wr)->px = (x); \ 1017 } while (0) 1018 1019 #define set_unpriv_perms(wr, r, w, x) \ 1020 do { \ 1021 (wr)->ur = (r); \ 1022 (wr)->uw = (w); \ 1023 (wr)->ux = (x); \ 1024 } while (0) 1025 1026 #define set_priv_wxn(wr, v) \ 1027 do { \ 1028 (wr)->pwxn = (v); \ 1029 } while (0) 1030 1031 #define set_unpriv_wxn(wr, v) \ 1032 do { \ 1033 (wr)->uwxn = (v); \ 1034 } while (0) 1035 1036 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ 1037 #define set_perms(w, wr, ip) \ 1038 do { \ 1039 /* R_LLZDZ */ \ 1040 switch ((ip)) { \ 1041 case 0b0000: \ 1042 set_ ## w ## _perms((wr), false, false, false); \ 1043 break; \ 1044 case 0b0001: \ 1045 set_ ## w ## _perms((wr), true , false, false); \ 1046 break; \ 1047 case 0b0010: \ 1048 set_ ## w ## _perms((wr), false, false, true ); \ 1049 break; \ 1050 case 0b0011: \ 1051 set_ ## w ## _perms((wr), true , false, true ); \ 1052 break; \ 1053 case 0b0100: \ 1054 set_ ## w ## _perms((wr), false, false, false); \ 1055 break; \ 1056 case 0b0101: \ 1057 set_ ## w ## _perms((wr), true , true , false); \ 1058 break; \ 1059 case 0b0110: \ 1060 set_ ## w ## _perms((wr), true , true , true ); \ 1061 break; \ 1062 case 0b0111: \ 1063 set_ ## w ## _perms((wr), true , true , true ); \ 1064 break; \ 1065 case 0b1000: \ 1066 set_ ## w ## _perms((wr), true , false, false); \ 1067 break; \ 1068 case 0b1001: \ 1069 set_ ## w ## _perms((wr), true , false, false); \ 1070 break; \ 1071 case 0b1010: \ 1072 set_ ## w ## _perms((wr), true , false, true ); \ 1073 break; \ 1074 case 0b1011: \ 1075 set_ ## w ## _perms((wr), false, false, false); \ 1076 break; \ 1077 case 0b1100: \ 1078 set_ ## w ## _perms((wr), true , true , false); \ 1079 break; \ 1080 case 0b1101: \ 1081 set_ ## w ## _perms((wr), false, false, false); \ 1082 break; \ 1083 case 0b1110: \ 1084 set_ ## w ## _perms((wr), true , true , true ); \ 1085 break; \ 1086 case 0b1111: \ 1087 set_ ## w ## _perms((wr), false, false, false); \ 1088 break; \ 1089 } \ 1090 \ 1091 /* R_HJYGR */ \ 1092 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \ 1093 \ 1094 } while (0) 1095 1096 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, 1097 struct s1_walk_info *wi, 1098 struct s1_walk_result *wr) 1099 { 1100 u8 up, pp, idx; 1101 1102 idx = pte_pi_index(wr->desc); 1103 1104 switch (wi->regime) { 1105 case TR_EL10: 1106 pp = perm_idx(vcpu, PIR_EL1, idx); 1107 up = perm_idx(vcpu, PIRE0_EL1, idx); 1108 break; 1109 case TR_EL20: 1110 pp = perm_idx(vcpu, PIR_EL2, idx); 1111 up = perm_idx(vcpu, PIRE0_EL2, idx); 1112 break; 1113 case TR_EL2: 1114 pp = perm_idx(vcpu, PIR_EL2, idx); 1115 up = 0; 1116 break; 1117 } 1118 1119 set_perms(priv, wr, pp); 1120 1121 if (wi->regime != TR_EL2) 1122 set_perms(unpriv, wr, up); 1123 else 1124 set_unpriv_perms(wr, false, false, false); 1125 1126 wr->pov = wi->poe && !(pp & BIT(3)); 1127 wr->uov = wi->e0poe && !(up & BIT(3)); 1128 1129 /* R_VFPJF */ 1130 if (wr->px && wr->uw) { 1131 set_priv_perms(wr, false, false, false); 1132 set_unpriv_perms(wr, false, false, false); 1133 } 1134 } 1135 1136 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, 1137 struct s1_walk_info *wi, 1138 struct s1_walk_result *wr) 1139 { 1140 u8 idx, pov_perms, uov_perms; 1141 1142 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); 1143 1144 if (wr->pov) { 1145 switch (wi->regime) { 1146 case TR_EL10: 1147 pov_perms = perm_idx(vcpu, POR_EL1, idx); 1148 break; 1149 case TR_EL20: 1150 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1151 break; 1152 case TR_EL2: 1153 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1154 break; 1155 } 1156 1157 if (pov_perms & ~POE_RWX) 1158 pov_perms = POE_NONE; 1159 1160 /* R_QXXPC, S1PrivOverflow enabled */ 1161 if (wr->pwxn && (pov_perms & POE_X)) 1162 pov_perms &= ~POE_W; 1163 1164 wr->pr &= pov_perms & POE_R; 1165 wr->pw &= pov_perms & POE_W; 1166 wr->px &= pov_perms & POE_X; 1167 } 1168 1169 if (wr->uov) { 1170 switch (wi->regime) { 1171 case TR_EL10: 1172 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1173 break; 1174 case TR_EL20: 1175 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1176 break; 1177 case TR_EL2: 1178 uov_perms = 0; 1179 break; 1180 } 1181 1182 if (uov_perms & ~POE_RWX) 1183 uov_perms = POE_NONE; 1184 1185 /* R_NPBXC, S1UnprivOverlay enabled */ 1186 if (wr->uwxn && (uov_perms & POE_X)) 1187 uov_perms &= ~POE_W; 1188 1189 wr->ur &= uov_perms & POE_R; 1190 wr->uw &= uov_perms & POE_W; 1191 wr->ux &= uov_perms & POE_X; 1192 } 1193 } 1194 1195 static void compute_s1_permissions(struct kvm_vcpu *vcpu, 1196 struct s1_walk_info *wi, 1197 struct s1_walk_result *wr) 1198 { 1199 bool pan; 1200 1201 if (!s1pie_enabled(vcpu, wi->regime)) 1202 compute_s1_direct_permissions(vcpu, wi, wr); 1203 else 1204 compute_s1_indirect_permissions(vcpu, wi, wr); 1205 1206 if (!wi->hpd) 1207 compute_s1_hierarchical_permissions(vcpu, wi, wr); 1208 1209 compute_s1_overlay_permissions(vcpu, wi, wr); 1210 1211 /* R_QXXPC, S1PrivOverlay disabled */ 1212 if (!wr->pov) 1213 wr->px &= !(wr->pwxn && wr->pw); 1214 1215 /* R_NPBXC, S1UnprivOverlay disabled */ 1216 if (!wr->uov) 1217 wr->ux &= !(wr->uwxn && wr->uw); 1218 1219 pan = wi->pan && (wr->ur || wr->uw || 1220 (pan3_enabled(vcpu, wi->regime) && wr->ux)); 1221 wr->pw &= !pan; 1222 wr->pr &= !pan; 1223 } 1224 1225 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1226 { 1227 struct s1_walk_result wr = {}; 1228 struct s1_walk_info wi = {}; 1229 bool perm_fail = false; 1230 int ret, idx; 1231 1232 wi.regime = compute_translation_regime(vcpu, op); 1233 wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); 1234 wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) && 1235 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT); 1236 1237 ret = setup_s1_walk(vcpu, &wi, &wr, vaddr); 1238 if (ret) 1239 goto compute_par; 1240 1241 if (wr.level == S1_MMU_DISABLED) 1242 goto compute_par; 1243 1244 idx = srcu_read_lock(&vcpu->kvm->srcu); 1245 1246 ret = walk_s1(vcpu, &wi, &wr, vaddr); 1247 1248 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1249 1250 if (ret) 1251 goto compute_par; 1252 1253 compute_s1_permissions(vcpu, &wi, &wr); 1254 1255 switch (op) { 1256 case OP_AT_S1E1RP: 1257 case OP_AT_S1E1R: 1258 case OP_AT_S1E2R: 1259 perm_fail = !wr.pr; 1260 break; 1261 case OP_AT_S1E1WP: 1262 case OP_AT_S1E1W: 1263 case OP_AT_S1E2W: 1264 perm_fail = !wr.pw; 1265 break; 1266 case OP_AT_S1E0R: 1267 perm_fail = !wr.ur; 1268 break; 1269 case OP_AT_S1E0W: 1270 perm_fail = !wr.uw; 1271 break; 1272 case OP_AT_S1E1A: 1273 case OP_AT_S1E2A: 1274 break; 1275 default: 1276 BUG(); 1277 } 1278 1279 if (perm_fail) 1280 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); 1281 1282 compute_par: 1283 return compute_par_s1(vcpu, &wi, &wr); 1284 } 1285 1286 /* 1287 * Return the PAR_EL1 value as the result of a valid translation. 1288 * 1289 * If the translation is unsuccessful, the value may only contain 1290 * PAR_EL1.F, and cannot be taken at face value. It isn't an 1291 * indication of the translation having failed, only that the fast 1292 * path did not succeed, *unless* it indicates a S1 permission or 1293 * access fault. 1294 */ 1295 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1296 { 1297 struct mmu_config config; 1298 struct kvm_s2_mmu *mmu; 1299 bool fail, mmu_cs; 1300 u64 par; 1301 1302 par = SYS_PAR_EL1_F; 1303 1304 /* 1305 * We've trapped, so everything is live on the CPU. As we will 1306 * be switching contexts behind everybody's back, disable 1307 * interrupts while holding the mmu lock. 1308 */ 1309 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); 1310 1311 /* 1312 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 1313 * the right one (as we trapped from vEL2). If not, save the 1314 * full MMU context. 1315 * 1316 * We are also guaranteed to be in the correct context if 1317 * we're not in a nested VM. 1318 */ 1319 mmu_cs = (vcpu_has_nv(vcpu) && 1320 !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))); 1321 if (!mmu_cs) 1322 goto skip_mmu_switch; 1323 1324 /* 1325 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not 1326 * find it (recycled by another vcpu, for example). When this 1327 * happens, admit defeat immediately and use the SW (slow) path. 1328 */ 1329 mmu = lookup_s2_mmu(vcpu); 1330 if (!mmu) 1331 return par; 1332 1333 __mmu_config_save(&config); 1334 1335 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); 1336 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); 1337 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); 1338 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); 1339 if (kvm_has_tcr2(vcpu->kvm)) { 1340 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); 1341 if (kvm_has_s1pie(vcpu->kvm)) { 1342 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); 1343 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); 1344 } 1345 if (kvm_has_s1poe(vcpu->kvm)) { 1346 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR); 1347 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0); 1348 } 1349 } 1350 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); 1351 __load_stage2(mmu, mmu->arch); 1352 1353 skip_mmu_switch: 1354 /* Temporarily switch back to guest context */ 1355 write_sysreg_hcr(vcpu->arch.hcr_el2); 1356 isb(); 1357 1358 switch (op) { 1359 case OP_AT_S1E1RP: 1360 case OP_AT_S1E1WP: 1361 fail = at_s1e1p_fast(vcpu, op, vaddr); 1362 break; 1363 case OP_AT_S1E1R: 1364 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1365 break; 1366 case OP_AT_S1E1W: 1367 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1368 break; 1369 case OP_AT_S1E0R: 1370 fail = __kvm_at(OP_AT_S1E0R, vaddr); 1371 break; 1372 case OP_AT_S1E0W: 1373 fail = __kvm_at(OP_AT_S1E0W, vaddr); 1374 break; 1375 case OP_AT_S1E1A: 1376 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1377 break; 1378 default: 1379 WARN_ON_ONCE(1); 1380 fail = true; 1381 break; 1382 } 1383 1384 if (!fail) 1385 par = read_sysreg_par(); 1386 1387 write_sysreg_hcr(HCR_HOST_VHE_FLAGS); 1388 1389 if (mmu_cs) 1390 __mmu_config_restore(&config); 1391 1392 return par; 1393 } 1394 1395 static bool par_check_s1_perm_fault(u64 par) 1396 { 1397 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1398 1399 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && 1400 !(par & SYS_PAR_EL1_S)); 1401 } 1402 1403 static bool par_check_s1_access_fault(u64 par) 1404 { 1405 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1406 1407 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS && 1408 !(par & SYS_PAR_EL1_S)); 1409 } 1410 1411 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1412 { 1413 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 1414 1415 /* 1416 * If PAR_EL1 reports that AT failed on a S1 permission or access 1417 * fault, we know for sure that the PTW was able to walk the S1 1418 * tables and there's nothing else to do. 1419 * 1420 * If AT failed for any other reason, then we must walk the guest S1 1421 * to emulate the instruction. 1422 */ 1423 if ((par & SYS_PAR_EL1_F) && 1424 !par_check_s1_perm_fault(par) && 1425 !par_check_s1_access_fault(par)) 1426 par = handle_at_slow(vcpu, op, vaddr); 1427 1428 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1429 } 1430 1431 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1432 { 1433 u64 par; 1434 1435 /* 1436 * We've trapped, so everything is live on the CPU. As we will be 1437 * switching context behind everybody's back, disable interrupts... 1438 */ 1439 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { 1440 u64 val, hcr; 1441 bool fail; 1442 1443 val = hcr = read_sysreg(hcr_el2); 1444 val &= ~HCR_TGE; 1445 val |= HCR_VM; 1446 1447 if (!vcpu_el2_e2h_is_set(vcpu)) 1448 val |= HCR_NV | HCR_NV1; 1449 1450 write_sysreg_hcr(val); 1451 isb(); 1452 1453 par = SYS_PAR_EL1_F; 1454 1455 switch (op) { 1456 case OP_AT_S1E2R: 1457 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1458 break; 1459 case OP_AT_S1E2W: 1460 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1461 break; 1462 case OP_AT_S1E2A: 1463 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1464 break; 1465 default: 1466 WARN_ON_ONCE(1); 1467 fail = true; 1468 } 1469 1470 isb(); 1471 1472 if (!fail) 1473 par = read_sysreg_par(); 1474 1475 write_sysreg_hcr(hcr); 1476 isb(); 1477 } 1478 1479 /* We failed the translation, let's replay it in slow motion */ 1480 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1481 par = handle_at_slow(vcpu, op, vaddr); 1482 1483 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1484 } 1485 1486 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1487 { 1488 struct kvm_s2_trans out = {}; 1489 u64 ipa, par; 1490 bool write; 1491 int ret; 1492 1493 /* Do the stage-1 translation */ 1494 switch (op) { 1495 case OP_AT_S12E1R: 1496 op = OP_AT_S1E1R; 1497 write = false; 1498 break; 1499 case OP_AT_S12E1W: 1500 op = OP_AT_S1E1W; 1501 write = true; 1502 break; 1503 case OP_AT_S12E0R: 1504 op = OP_AT_S1E0R; 1505 write = false; 1506 break; 1507 case OP_AT_S12E0W: 1508 op = OP_AT_S1E0W; 1509 write = true; 1510 break; 1511 default: 1512 WARN_ON_ONCE(1); 1513 return; 1514 } 1515 1516 __kvm_at_s1e01(vcpu, op, vaddr); 1517 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1518 if (par & SYS_PAR_EL1_F) 1519 return; 1520 1521 /* 1522 * If we only have a single stage of translation (EL2&0), exit 1523 * early. Same thing if {VM,DC}=={0,0}. 1524 */ 1525 if (compute_translation_regime(vcpu, op) == TR_EL20 || 1526 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1527 return; 1528 1529 /* Do the stage-2 translation */ 1530 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1531 out.esr = 0; 1532 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1533 if (ret < 0) 1534 return; 1535 1536 /* Check the access permission */ 1537 if (!out.esr && 1538 ((!write && !out.readable) || (write && !out.writable))) 1539 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); 1540 1541 par = compute_par_s12(vcpu, par, &out); 1542 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1543 } 1544 1545 /* 1546 * Translate a VA for a given EL in a given translation regime, with 1547 * or without PAN. This requires wi->{regime, as_el0, pan} to be 1548 * set. The rest of the wi and wr should be 0-initialised. 1549 */ 1550 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 1551 struct s1_walk_result *wr, u64 va) 1552 { 1553 int ret; 1554 1555 ret = setup_s1_walk(vcpu, wi, wr, va); 1556 if (ret) 1557 return ret; 1558 1559 if (wr->level == S1_MMU_DISABLED) { 1560 wr->ur = wr->uw = wr->ux = true; 1561 wr->pr = wr->pw = wr->px = true; 1562 } else { 1563 ret = walk_s1(vcpu, wi, wr, va); 1564 if (ret) 1565 return ret; 1566 1567 compute_s1_permissions(vcpu, wi, wr); 1568 } 1569 1570 return 0; 1571 } 1572 1573 struct desc_match { 1574 u64 ipa; 1575 int level; 1576 }; 1577 1578 static int match_s1_desc(struct s1_walk_context *ctxt, void *priv) 1579 { 1580 struct desc_match *dm = priv; 1581 u64 ipa = dm->ipa; 1582 1583 /* Use S1 granule alignment */ 1584 ipa &= GENMASK(51, ctxt->wi->pgshift); 1585 1586 /* Not the IPA we're looking for? Continue. */ 1587 if (ipa != ctxt->table_ipa) 1588 return 0; 1589 1590 /* Note the level and interrupt the walk */ 1591 dm->level = ctxt->level; 1592 return -EINTR; 1593 } 1594 1595 int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) 1596 { 1597 struct desc_match dm = { 1598 .ipa = ipa, 1599 }; 1600 struct s1_walk_info wi = { 1601 .filter = &(struct s1_walk_filter){ 1602 .fn = match_s1_desc, 1603 .priv = &dm, 1604 }, 1605 .regime = TR_EL10, 1606 .as_el0 = false, 1607 .pan = false, 1608 }; 1609 struct s1_walk_result wr = {}; 1610 int ret; 1611 1612 ret = setup_s1_walk(vcpu, &wi, &wr, va); 1613 if (ret) 1614 return ret; 1615 1616 /* We really expect the S1 MMU to be on here... */ 1617 if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) { 1618 *level = 0; 1619 return 0; 1620 } 1621 1622 /* Walk the guest's PT, looking for a match along the way */ 1623 ret = walk_s1(vcpu, &wi, &wr, va); 1624 switch (ret) { 1625 case -EINTR: 1626 /* We interrupted the walk on a match, return the level */ 1627 *level = dm.level; 1628 return 0; 1629 case 0: 1630 /* The walk completed, we failed to find the entry */ 1631 return -ENOENT; 1632 default: 1633 /* Any other error... */ 1634 return ret; 1635 } 1636 } 1637