1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017 - Linaro Ltd 4 * Author: Jintack Lim <jintack.lim@linaro.org> 5 */ 6 7 #include <linux/kvm_host.h> 8 9 #include <asm/esr.h> 10 #include <asm/kvm_hyp.h> 11 #include <asm/kvm_mmu.h> 12 13 static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw) 14 { 15 wr->fst = fst; 16 wr->ptw = s1ptw; 17 wr->s2 = s1ptw; 18 wr->failed = true; 19 } 20 21 #define S1_MMU_DISABLED (-127) 22 23 static int get_ia_size(struct s1_walk_info *wi) 24 { 25 return 64 - wi->txsz; 26 } 27 28 /* Return true if the IPA is out of the OA range */ 29 static bool check_output_size(u64 ipa, struct s1_walk_info *wi) 30 { 31 if (wi->pa52bit) 32 return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits)); 33 return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); 34 } 35 36 static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr) 37 { 38 switch (BIT(wi->pgshift)) { 39 case SZ_64K: 40 default: /* IMPDEF: treat any other value as 64k */ 41 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52)) 42 return false; 43 return ((wi->regime == TR_EL2 ? 44 FIELD_GET(TCR_EL2_PS_MASK, tcr) : 45 FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110); 46 case SZ_16K: 47 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT)) 48 return false; 49 break; 50 case SZ_4K: 51 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT)) 52 return false; 53 break; 54 } 55 56 return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS)); 57 } 58 59 static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc) 60 { 61 u64 addr; 62 63 if (!wi->pa52bit) 64 return desc & GENMASK_ULL(47, wi->pgshift); 65 66 switch (BIT(wi->pgshift)) { 67 case SZ_4K: 68 case SZ_16K: 69 addr = desc & GENMASK_ULL(49, wi->pgshift); 70 addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50; 71 break; 72 case SZ_64K: 73 default: /* IMPDEF: treat any other value as 64k */ 74 addr = desc & GENMASK_ULL(47, wi->pgshift); 75 addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48; 76 break; 77 } 78 79 return addr; 80 } 81 82 /* Return the translation regime that applies to an AT instruction */ 83 static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) 84 { 85 /* 86 * We only get here from guest EL2, so the translation 87 * regime AT applies to is solely defined by {E2H,TGE}. 88 */ 89 switch (op) { 90 case OP_AT_S1E2R: 91 case OP_AT_S1E2W: 92 case OP_AT_S1E2A: 93 return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 94 default: 95 return (vcpu_el2_e2h_is_set(vcpu) && 96 vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; 97 } 98 } 99 100 static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime) 101 { 102 if (regime == TR_EL10) { 103 if (vcpu_has_nv(vcpu) && 104 !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En)) 105 return 0; 106 107 return vcpu_read_sys_reg(vcpu, TCR2_EL1); 108 } 109 110 return vcpu_read_sys_reg(vcpu, TCR2_EL2); 111 } 112 113 static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 114 { 115 if (!kvm_has_s1pie(vcpu->kvm)) 116 return false; 117 118 /* Abuse TCR2_EL1_PIE and use it for EL2 as well */ 119 return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE; 120 } 121 122 static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi) 123 { 124 u64 val; 125 126 if (!kvm_has_s1poe(vcpu->kvm)) { 127 wi->poe = wi->e0poe = false; 128 return; 129 } 130 131 val = effective_tcr2(vcpu, wi->regime); 132 133 /* Abuse TCR2_EL1_* for EL2 */ 134 wi->poe = val & TCR2_EL1_POE; 135 wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE); 136 } 137 138 static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 139 struct s1_walk_result *wr, u64 va) 140 { 141 u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; 142 unsigned int stride, x; 143 bool va55, tbi, lva; 144 145 va55 = va & BIT(55); 146 147 if (vcpu_has_nv(vcpu)) { 148 hcr = __vcpu_sys_reg(vcpu, HCR_EL2); 149 wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); 150 } else { 151 WARN_ON_ONCE(wi->regime != TR_EL10); 152 wi->s2 = false; 153 hcr = 0; 154 } 155 156 switch (wi->regime) { 157 case TR_EL10: 158 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 159 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 160 ttbr = (va55 ? 161 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 162 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 163 break; 164 case TR_EL2: 165 case TR_EL20: 166 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 167 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 168 ttbr = (va55 ? 169 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 170 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 171 break; 172 default: 173 BUG(); 174 } 175 176 /* Someone was silly enough to encode TG0/TG1 differently */ 177 if (va55 && wi->regime != TR_EL2) { 178 wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); 179 tg = FIELD_GET(TCR_TG1_MASK, tcr); 180 181 switch (tg << TCR_TG1_SHIFT) { 182 case TCR_TG1_4K: 183 wi->pgshift = 12; break; 184 case TCR_TG1_16K: 185 wi->pgshift = 14; break; 186 case TCR_TG1_64K: 187 default: /* IMPDEF: treat any other value as 64k */ 188 wi->pgshift = 16; break; 189 } 190 } else { 191 wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); 192 tg = FIELD_GET(TCR_TG0_MASK, tcr); 193 194 switch (tg << TCR_TG0_SHIFT) { 195 case TCR_TG0_4K: 196 wi->pgshift = 12; break; 197 case TCR_TG0_16K: 198 wi->pgshift = 14; break; 199 case TCR_TG0_64K: 200 default: /* IMPDEF: treat any other value as 64k */ 201 wi->pgshift = 16; break; 202 } 203 } 204 205 wi->pa52bit = has_52bit_pa(vcpu, wi, tcr); 206 207 ia_bits = get_ia_size(wi); 208 209 /* AArch64.S1StartLevel() */ 210 stride = wi->pgshift - 3; 211 wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); 212 213 if (wi->regime == TR_EL2 && va55) 214 goto addrsz; 215 216 tbi = (wi->regime == TR_EL2 ? 217 FIELD_GET(TCR_EL2_TBI, tcr) : 218 (va55 ? 219 FIELD_GET(TCR_TBI1, tcr) : 220 FIELD_GET(TCR_TBI0, tcr))); 221 222 if (!tbi && (u64)sign_extend64(va, 55) != va) 223 goto addrsz; 224 225 wi->sh = (wi->regime == TR_EL2 ? 226 FIELD_GET(TCR_EL2_SH0_MASK, tcr) : 227 (va55 ? 228 FIELD_GET(TCR_SH1_MASK, tcr) : 229 FIELD_GET(TCR_SH0_MASK, tcr))); 230 231 va = (u64)sign_extend64(va, 55); 232 233 /* Let's put the MMU disabled case aside immediately */ 234 switch (wi->regime) { 235 case TR_EL10: 236 /* 237 * If dealing with the EL1&0 translation regime, 3 things 238 * can disable the S1 translation: 239 * 240 * - HCR_EL2.DC = 1 241 * - HCR_EL2.{E2H,TGE} = {0,1} 242 * - SCTLR_EL1.M = 0 243 * 244 * The TGE part is interesting. If we have decided that this 245 * is EL1&0, then it means that either {E2H,TGE} == {1,0} or 246 * {0,x}, and we only need to test for TGE == 1. 247 */ 248 if (hcr & (HCR_DC | HCR_TGE)) { 249 wr->level = S1_MMU_DISABLED; 250 break; 251 } 252 fallthrough; 253 case TR_EL2: 254 case TR_EL20: 255 if (!(sctlr & SCTLR_ELx_M)) 256 wr->level = S1_MMU_DISABLED; 257 break; 258 } 259 260 if (wr->level == S1_MMU_DISABLED) { 261 if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) 262 goto addrsz; 263 264 wr->pa = va; 265 return 0; 266 } 267 268 wi->be = sctlr & SCTLR_ELx_EE; 269 270 wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); 271 wi->hpd &= (wi->regime == TR_EL2 ? 272 FIELD_GET(TCR_EL2_HPD, tcr) : 273 (va55 ? 274 FIELD_GET(TCR_HPD1, tcr) : 275 FIELD_GET(TCR_HPD0, tcr))); 276 /* R_JHSVW */ 277 wi->hpd |= s1pie_enabled(vcpu, wi->regime); 278 279 /* Do we have POE? */ 280 compute_s1poe(vcpu, wi); 281 282 /* R_BVXDG */ 283 wi->hpd |= (wi->poe || wi->e0poe); 284 285 /* R_PLCGL, R_YXNYW */ 286 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { 287 if (wi->txsz > 39) 288 goto transfault; 289 } else { 290 if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) 291 goto transfault; 292 } 293 294 /* R_GTJBY, R_SXWGM */ 295 switch (BIT(wi->pgshift)) { 296 case SZ_4K: 297 case SZ_16K: 298 lva = wi->pa52bit; 299 break; 300 case SZ_64K: 301 lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); 302 break; 303 } 304 305 if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) 306 goto transfault; 307 308 /* R_YYVYV, I_THCZK */ 309 if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || 310 (va55 && va < GENMASK(63, ia_bits))) 311 goto transfault; 312 313 /* I_ZFSYQ */ 314 if (wi->regime != TR_EL2 && 315 (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) 316 goto transfault; 317 318 /* R_BNDVG and following statements */ 319 if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && 320 wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) 321 goto transfault; 322 323 ps = (wi->regime == TR_EL2 ? 324 FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); 325 326 wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit)); 327 328 /* Compute minimal alignment */ 329 x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); 330 331 wi->baddr = ttbr & TTBRx_EL1_BADDR; 332 if (wi->pa52bit) { 333 /* 334 * Force the alignment on 64 bytes for top-level tables 335 * smaller than 8 entries, since TTBR.BADDR[5:2] are used to 336 * store bits [51:48] of the first level of lookup. 337 */ 338 x = max(x, 6); 339 340 wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48; 341 } 342 343 /* R_VPBBF */ 344 if (check_output_size(wi->baddr, wi)) 345 goto addrsz; 346 347 wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); 348 349 return 0; 350 351 addrsz: 352 /* 353 * Address Size Fault level 0 to indicate it comes from TTBR. 354 * yes, this is an oddity. 355 */ 356 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); 357 return -EFAULT; 358 359 transfault: 360 /* Translation Fault on start level */ 361 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false); 362 return -EFAULT; 363 } 364 365 static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 366 struct s1_walk_result *wr, u64 va) 367 { 368 u64 va_top, va_bottom, baddr, desc; 369 int level, stride, ret; 370 371 level = wi->sl; 372 stride = wi->pgshift - 3; 373 baddr = wi->baddr; 374 375 va_top = get_ia_size(wi) - 1; 376 377 while (1) { 378 u64 index, ipa; 379 380 va_bottom = (3 - level) * stride + wi->pgshift; 381 index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); 382 383 ipa = baddr | index; 384 385 if (wi->s2) { 386 struct kvm_s2_trans s2_trans = {}; 387 388 ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); 389 if (ret) { 390 fail_s1_walk(wr, 391 (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, 392 true); 393 return ret; 394 } 395 396 if (!kvm_s2_trans_readable(&s2_trans)) { 397 fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), 398 true); 399 400 return -EPERM; 401 } 402 403 ipa = kvm_s2_trans_output(&s2_trans); 404 } 405 406 if (wi->filter) { 407 ret = wi->filter->fn(&(struct s1_walk_context) 408 { 409 .wi = wi, 410 .table_ipa = baddr, 411 .level = level, 412 }, wi->filter->priv); 413 if (ret) 414 return ret; 415 } 416 417 ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); 418 if (ret) { 419 fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); 420 return ret; 421 } 422 423 if (wi->be) 424 desc = be64_to_cpu((__force __be64)desc); 425 else 426 desc = le64_to_cpu((__force __le64)desc); 427 428 /* Invalid descriptor */ 429 if (!(desc & BIT(0))) 430 goto transfault; 431 432 /* Block mapping, check validity down the line */ 433 if (!(desc & BIT(1))) 434 break; 435 436 /* Page mapping */ 437 if (level == 3) 438 break; 439 440 /* Table handling */ 441 if (!wi->hpd) { 442 wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); 443 wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); 444 wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); 445 } 446 447 baddr = desc_to_oa(wi, desc); 448 449 /* Check for out-of-range OA */ 450 if (check_output_size(baddr, wi)) 451 goto addrsz; 452 453 /* Prepare for next round */ 454 va_top = va_bottom - 1; 455 level++; 456 } 457 458 /* Block mapping, check the validity of the level */ 459 if (!(desc & BIT(1))) { 460 bool valid_block = false; 461 462 switch (BIT(wi->pgshift)) { 463 case SZ_4K: 464 valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0); 465 break; 466 case SZ_16K: 467 case SZ_64K: 468 valid_block = level == 2 || (wi->pa52bit && level == 1); 469 break; 470 } 471 472 if (!valid_block) 473 goto transfault; 474 } 475 476 baddr = desc_to_oa(wi, desc); 477 if (check_output_size(baddr & GENMASK(52, va_bottom), wi)) 478 goto addrsz; 479 480 if (!(desc & PTE_AF)) { 481 fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); 482 return -EACCES; 483 } 484 485 va_bottom += contiguous_bit_shift(desc, wi, level); 486 487 wr->failed = false; 488 wr->level = level; 489 wr->desc = desc; 490 wr->pa = baddr & GENMASK(52, va_bottom); 491 wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); 492 493 wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG); 494 if (wr->nG) { 495 u64 asid_ttbr, tcr; 496 497 switch (wi->regime) { 498 case TR_EL10: 499 tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); 500 asid_ttbr = ((tcr & TCR_A1) ? 501 vcpu_read_sys_reg(vcpu, TTBR1_EL1) : 502 vcpu_read_sys_reg(vcpu, TTBR0_EL1)); 503 break; 504 case TR_EL20: 505 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); 506 asid_ttbr = ((tcr & TCR_A1) ? 507 vcpu_read_sys_reg(vcpu, TTBR1_EL2) : 508 vcpu_read_sys_reg(vcpu, TTBR0_EL2)); 509 break; 510 default: 511 BUG(); 512 } 513 514 wr->asid = FIELD_GET(TTBR_ASID_MASK, asid_ttbr); 515 if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) || 516 !(tcr & TCR_ASID16)) 517 wr->asid &= GENMASK(7, 0); 518 } 519 520 return 0; 521 522 addrsz: 523 fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false); 524 return -EINVAL; 525 transfault: 526 fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false); 527 return -ENOENT; 528 } 529 530 struct mmu_config { 531 u64 ttbr0; 532 u64 ttbr1; 533 u64 tcr; 534 u64 mair; 535 u64 tcr2; 536 u64 pir; 537 u64 pire0; 538 u64 por_el0; 539 u64 por_el1; 540 u64 sctlr; 541 u64 vttbr; 542 u64 vtcr; 543 }; 544 545 static void __mmu_config_save(struct mmu_config *config) 546 { 547 config->ttbr0 = read_sysreg_el1(SYS_TTBR0); 548 config->ttbr1 = read_sysreg_el1(SYS_TTBR1); 549 config->tcr = read_sysreg_el1(SYS_TCR); 550 config->mair = read_sysreg_el1(SYS_MAIR); 551 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 552 config->tcr2 = read_sysreg_el1(SYS_TCR2); 553 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 554 config->pir = read_sysreg_el1(SYS_PIR); 555 config->pire0 = read_sysreg_el1(SYS_PIRE0); 556 } 557 if (system_supports_poe()) { 558 config->por_el1 = read_sysreg_el1(SYS_POR); 559 config->por_el0 = read_sysreg_s(SYS_POR_EL0); 560 } 561 } 562 config->sctlr = read_sysreg_el1(SYS_SCTLR); 563 config->vttbr = read_sysreg(vttbr_el2); 564 config->vtcr = read_sysreg(vtcr_el2); 565 } 566 567 static void __mmu_config_restore(struct mmu_config *config) 568 { 569 /* 570 * ARM errata 1165522 and 1530923 require TGE to be 1 before 571 * we update the guest state. 572 */ 573 asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); 574 575 write_sysreg_el1(config->ttbr0, SYS_TTBR0); 576 write_sysreg_el1(config->ttbr1, SYS_TTBR1); 577 write_sysreg_el1(config->tcr, SYS_TCR); 578 write_sysreg_el1(config->mair, SYS_MAIR); 579 if (cpus_have_final_cap(ARM64_HAS_TCR2)) { 580 write_sysreg_el1(config->tcr2, SYS_TCR2); 581 if (cpus_have_final_cap(ARM64_HAS_S1PIE)) { 582 write_sysreg_el1(config->pir, SYS_PIR); 583 write_sysreg_el1(config->pire0, SYS_PIRE0); 584 } 585 if (system_supports_poe()) { 586 write_sysreg_el1(config->por_el1, SYS_POR); 587 write_sysreg_s(config->por_el0, SYS_POR_EL0); 588 } 589 } 590 write_sysreg_el1(config->sctlr, SYS_SCTLR); 591 write_sysreg(config->vttbr, vttbr_el2); 592 write_sysreg(config->vtcr, vtcr_el2); 593 } 594 595 static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 596 { 597 u64 host_pan; 598 bool fail; 599 600 host_pan = read_sysreg_s(SYS_PSTATE_PAN); 601 write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); 602 603 switch (op) { 604 case OP_AT_S1E1RP: 605 fail = __kvm_at(OP_AT_S1E1RP, vaddr); 606 break; 607 case OP_AT_S1E1WP: 608 fail = __kvm_at(OP_AT_S1E1WP, vaddr); 609 break; 610 } 611 612 write_sysreg_s(host_pan, SYS_PSTATE_PAN); 613 614 return fail; 615 } 616 617 #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) 618 #define MEMATTR_NC 0b0100 619 #define MEMATTR_Wt 0b1000 620 #define MEMATTR_Wb 0b1100 621 #define MEMATTR_WbRaWa 0b1111 622 623 #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) 624 625 static u8 s2_memattr_to_attr(u8 memattr) 626 { 627 memattr &= 0b1111; 628 629 switch (memattr) { 630 case 0b0000: 631 case 0b0001: 632 case 0b0010: 633 case 0b0011: 634 return memattr << 2; 635 case 0b0100: 636 return MEMATTR(Wb, Wb); 637 case 0b0101: 638 return MEMATTR(NC, NC); 639 case 0b0110: 640 return MEMATTR(Wt, NC); 641 case 0b0111: 642 return MEMATTR(Wb, NC); 643 case 0b1000: 644 /* Reserved, assume NC */ 645 return MEMATTR(NC, NC); 646 case 0b1001: 647 return MEMATTR(NC, Wt); 648 case 0b1010: 649 return MEMATTR(Wt, Wt); 650 case 0b1011: 651 return MEMATTR(Wb, Wt); 652 case 0b1100: 653 /* Reserved, assume NC */ 654 return MEMATTR(NC, NC); 655 case 0b1101: 656 return MEMATTR(NC, Wb); 657 case 0b1110: 658 return MEMATTR(Wt, Wb); 659 case 0b1111: 660 return MEMATTR(Wb, Wb); 661 default: 662 unreachable(); 663 } 664 } 665 666 static u8 combine_s1_s2_attr(u8 s1, u8 s2) 667 { 668 bool transient; 669 u8 final = 0; 670 671 /* Upgrade transient s1 to non-transient to simplify things */ 672 switch (s1) { 673 case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ 674 transient = true; 675 s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); 676 break; 677 case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ 678 transient = true; 679 s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); 680 break; 681 default: 682 transient = false; 683 } 684 685 /* S2CombineS1AttrHints() */ 686 if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || 687 (s2 & GENMASK(3, 2)) == MEMATTR_NC) 688 final = MEMATTR_NC; 689 else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || 690 (s2 & GENMASK(3, 2)) == MEMATTR_Wt) 691 final = MEMATTR_Wt; 692 else 693 final = MEMATTR_Wb; 694 695 if (final != MEMATTR_NC) { 696 /* Inherit RaWa hints form S1 */ 697 if (transient) { 698 switch (s1 & GENMASK(3, 2)) { 699 case MEMATTR_Wt: 700 final = 0; 701 break; 702 case MEMATTR_Wb: 703 final = MEMATTR_NC; 704 break; 705 } 706 } 707 708 final |= s1 & GENMASK(1, 0); 709 } 710 711 return final; 712 } 713 714 #define ATTR_NSH 0b00 715 #define ATTR_RSV 0b01 716 #define ATTR_OSH 0b10 717 #define ATTR_ISH 0b11 718 719 static u8 compute_final_sh(u8 attr, u8 sh) 720 { 721 /* Any form of device, as well as NC has SH[1:0]=0b10 */ 722 if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) 723 return ATTR_OSH; 724 725 if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ 726 sh = ATTR_NSH; 727 728 return sh; 729 } 730 731 static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr, 732 u8 attr) 733 { 734 u8 sh; 735 736 /* 737 * non-52bit and LPA have their basic shareability described in the 738 * descriptor. LPA2 gets it from the corresponding field in TCR, 739 * conveniently recorded in the walk info. 740 */ 741 if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K) 742 sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc); 743 else 744 sh = wi->sh; 745 746 return compute_final_sh(attr, sh); 747 } 748 749 static u8 combine_sh(u8 s1_sh, u8 s2_sh) 750 { 751 if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) 752 return ATTR_OSH; 753 if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) 754 return ATTR_ISH; 755 756 return ATTR_NSH; 757 } 758 759 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, 760 struct kvm_s2_trans *tr) 761 { 762 u8 s1_parattr, s2_memattr, final_attr, s2_sh; 763 u64 par; 764 765 /* If S2 has failed to translate, report the damage */ 766 if (tr->esr) { 767 par = SYS_PAR_EL1_RES1; 768 par |= SYS_PAR_EL1_F; 769 par |= SYS_PAR_EL1_S; 770 par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); 771 return par; 772 } 773 774 s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); 775 s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); 776 777 if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { 778 if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) 779 s2_memattr &= ~BIT(3); 780 781 /* Combination of R_VRJSW and R_RHWZM */ 782 switch (s2_memattr) { 783 case 0b0101: 784 if (MEMATTR_IS_DEVICE(s1_parattr)) 785 final_attr = s1_parattr; 786 else 787 final_attr = MEMATTR(NC, NC); 788 break; 789 case 0b0110: 790 case 0b1110: 791 final_attr = MEMATTR(WbRaWa, WbRaWa); 792 break; 793 case 0b0111: 794 case 0b1111: 795 /* Preserve S1 attribute */ 796 final_attr = s1_parattr; 797 break; 798 case 0b0100: 799 case 0b1100: 800 case 0b1101: 801 /* Reserved, do something non-silly */ 802 final_attr = s1_parattr; 803 break; 804 default: 805 /* 806 * MemAttr[2]=0, Device from S2. 807 * 808 * FWB does not influence the way that stage 1 809 * memory types and attributes are combined 810 * with stage 2 Device type and attributes. 811 */ 812 final_attr = min(s2_memattr_to_attr(s2_memattr), 813 s1_parattr); 814 } 815 } else { 816 /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ 817 u8 s2_parattr = s2_memattr_to_attr(s2_memattr); 818 819 if (MEMATTR_IS_DEVICE(s1_parattr) || 820 MEMATTR_IS_DEVICE(s2_parattr)) { 821 final_attr = min(s1_parattr, s2_parattr); 822 } else { 823 /* At this stage, this is memory vs memory */ 824 final_attr = combine_s1_s2_attr(s1_parattr & 0xf, 825 s2_parattr & 0xf); 826 final_attr |= combine_s1_s2_attr(s1_parattr >> 4, 827 s2_parattr >> 4) << 4; 828 } 829 } 830 831 if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && 832 !MEMATTR_IS_DEVICE(final_attr)) 833 final_attr = MEMATTR(NC, NC); 834 835 s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc); 836 837 par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); 838 par |= tr->output & GENMASK(47, 12); 839 par |= FIELD_PREP(SYS_PAR_EL1_SH, 840 combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), 841 compute_final_sh(final_attr, s2_sh))); 842 843 return par; 844 } 845 846 static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 847 struct s1_walk_result *wr) 848 { 849 u64 par; 850 851 if (wr->failed) { 852 par = SYS_PAR_EL1_RES1; 853 par |= SYS_PAR_EL1_F; 854 par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); 855 par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; 856 par |= wr->s2 ? SYS_PAR_EL1_S : 0; 857 } else if (wr->level == S1_MMU_DISABLED) { 858 /* MMU off or HCR_EL2.DC == 1 */ 859 par = SYS_PAR_EL1_NSE; 860 par |= wr->pa & SYS_PAR_EL1_PA; 861 862 if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) && 863 (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { 864 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 865 MEMATTR(WbRaWa, WbRaWa)); 866 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); 867 } else { 868 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ 869 par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); 870 } 871 } else { 872 u64 mair, sctlr; 873 u8 sh; 874 875 par = SYS_PAR_EL1_NSE; 876 877 mair = (wi->regime == TR_EL10 ? 878 vcpu_read_sys_reg(vcpu, MAIR_EL1) : 879 vcpu_read_sys_reg(vcpu, MAIR_EL2)); 880 881 mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; 882 mair &= 0xff; 883 884 sctlr = (wi->regime == TR_EL10 ? 885 vcpu_read_sys_reg(vcpu, SCTLR_EL1) : 886 vcpu_read_sys_reg(vcpu, SCTLR_EL2)); 887 888 /* Force NC for memory if SCTLR_ELx.C is clear */ 889 if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) 890 mair = MEMATTR(NC, NC); 891 892 par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); 893 par |= wr->pa & SYS_PAR_EL1_PA; 894 895 sh = compute_s1_sh(wi, wr, mair); 896 par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); 897 } 898 899 return par; 900 } 901 902 static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) 903 { 904 u64 sctlr; 905 906 if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) 907 return false; 908 909 if (s1pie_enabled(vcpu, regime)) 910 return true; 911 912 if (regime == TR_EL10) 913 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); 914 else 915 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); 916 917 return sctlr & SCTLR_EL1_EPAN; 918 } 919 920 static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu, 921 struct s1_walk_info *wi, 922 struct s1_walk_result *wr) 923 { 924 bool wxn; 925 926 /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */ 927 if (wi->regime != TR_EL2) { 928 switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) { 929 case 0b00: 930 wr->pr = wr->pw = true; 931 wr->ur = wr->uw = false; 932 break; 933 case 0b01: 934 wr->pr = wr->pw = wr->ur = wr->uw = true; 935 break; 936 case 0b10: 937 wr->pr = true; 938 wr->pw = wr->ur = wr->uw = false; 939 break; 940 case 0b11: 941 wr->pr = wr->ur = true; 942 wr->pw = wr->uw = false; 943 break; 944 } 945 946 /* We don't use px for anything yet, but hey... */ 947 wr->px = !((wr->desc & PTE_PXN) || wr->uw); 948 wr->ux = !(wr->desc & PTE_UXN); 949 } else { 950 wr->ur = wr->uw = wr->ux = false; 951 952 if (!(wr->desc & PTE_RDONLY)) { 953 wr->pr = wr->pw = true; 954 } else { 955 wr->pr = true; 956 wr->pw = false; 957 } 958 959 /* XN maps to UXN */ 960 wr->px = !(wr->desc & PTE_UXN); 961 } 962 963 switch (wi->regime) { 964 case TR_EL2: 965 case TR_EL20: 966 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN); 967 break; 968 case TR_EL10: 969 wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN); 970 break; 971 } 972 973 wr->pwxn = wr->uwxn = wxn; 974 wr->pov = wi->poe; 975 wr->uov = wi->e0poe; 976 } 977 978 static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu, 979 struct s1_walk_info *wi, 980 struct s1_walk_result *wr) 981 { 982 /* Hierarchical part of AArch64.S1DirectBasePermissions() */ 983 if (wi->regime != TR_EL2) { 984 switch (wr->APTable) { 985 case 0b00: 986 break; 987 case 0b01: 988 wr->ur = wr->uw = false; 989 break; 990 case 0b10: 991 wr->pw = wr->uw = false; 992 break; 993 case 0b11: 994 wr->pw = wr->ur = wr->uw = false; 995 break; 996 } 997 998 wr->px &= !wr->PXNTable; 999 wr->ux &= !wr->UXNTable; 1000 } else { 1001 if (wr->APTable & BIT(1)) 1002 wr->pw = false; 1003 1004 /* XN maps to UXN */ 1005 wr->px &= !wr->UXNTable; 1006 } 1007 } 1008 1009 #define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf) 1010 1011 #define set_priv_perms(wr, r, w, x) \ 1012 do { \ 1013 (wr)->pr = (r); \ 1014 (wr)->pw = (w); \ 1015 (wr)->px = (x); \ 1016 } while (0) 1017 1018 #define set_unpriv_perms(wr, r, w, x) \ 1019 do { \ 1020 (wr)->ur = (r); \ 1021 (wr)->uw = (w); \ 1022 (wr)->ux = (x); \ 1023 } while (0) 1024 1025 #define set_priv_wxn(wr, v) \ 1026 do { \ 1027 (wr)->pwxn = (v); \ 1028 } while (0) 1029 1030 #define set_unpriv_wxn(wr, v) \ 1031 do { \ 1032 (wr)->uwxn = (v); \ 1033 } while (0) 1034 1035 /* Similar to AArch64.S1IndirectBasePermissions(), without GCS */ 1036 #define set_perms(w, wr, ip) \ 1037 do { \ 1038 /* R_LLZDZ */ \ 1039 switch ((ip)) { \ 1040 case 0b0000: \ 1041 set_ ## w ## _perms((wr), false, false, false); \ 1042 break; \ 1043 case 0b0001: \ 1044 set_ ## w ## _perms((wr), true , false, false); \ 1045 break; \ 1046 case 0b0010: \ 1047 set_ ## w ## _perms((wr), false, false, true ); \ 1048 break; \ 1049 case 0b0011: \ 1050 set_ ## w ## _perms((wr), true , false, true ); \ 1051 break; \ 1052 case 0b0100: \ 1053 set_ ## w ## _perms((wr), false, false, false); \ 1054 break; \ 1055 case 0b0101: \ 1056 set_ ## w ## _perms((wr), true , true , false); \ 1057 break; \ 1058 case 0b0110: \ 1059 set_ ## w ## _perms((wr), true , true , true ); \ 1060 break; \ 1061 case 0b0111: \ 1062 set_ ## w ## _perms((wr), true , true , true ); \ 1063 break; \ 1064 case 0b1000: \ 1065 set_ ## w ## _perms((wr), true , false, false); \ 1066 break; \ 1067 case 0b1001: \ 1068 set_ ## w ## _perms((wr), true , false, false); \ 1069 break; \ 1070 case 0b1010: \ 1071 set_ ## w ## _perms((wr), true , false, true ); \ 1072 break; \ 1073 case 0b1011: \ 1074 set_ ## w ## _perms((wr), false, false, false); \ 1075 break; \ 1076 case 0b1100: \ 1077 set_ ## w ## _perms((wr), true , true , false); \ 1078 break; \ 1079 case 0b1101: \ 1080 set_ ## w ## _perms((wr), false, false, false); \ 1081 break; \ 1082 case 0b1110: \ 1083 set_ ## w ## _perms((wr), true , true , true ); \ 1084 break; \ 1085 case 0b1111: \ 1086 set_ ## w ## _perms((wr), false, false, false); \ 1087 break; \ 1088 } \ 1089 \ 1090 /* R_HJYGR */ \ 1091 set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \ 1092 \ 1093 } while (0) 1094 1095 static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu, 1096 struct s1_walk_info *wi, 1097 struct s1_walk_result *wr) 1098 { 1099 u8 up, pp, idx; 1100 1101 idx = pte_pi_index(wr->desc); 1102 1103 switch (wi->regime) { 1104 case TR_EL10: 1105 pp = perm_idx(vcpu, PIR_EL1, idx); 1106 up = perm_idx(vcpu, PIRE0_EL1, idx); 1107 break; 1108 case TR_EL20: 1109 pp = perm_idx(vcpu, PIR_EL2, idx); 1110 up = perm_idx(vcpu, PIRE0_EL2, idx); 1111 break; 1112 case TR_EL2: 1113 pp = perm_idx(vcpu, PIR_EL2, idx); 1114 up = 0; 1115 break; 1116 } 1117 1118 set_perms(priv, wr, pp); 1119 1120 if (wi->regime != TR_EL2) 1121 set_perms(unpriv, wr, up); 1122 else 1123 set_unpriv_perms(wr, false, false, false); 1124 1125 wr->pov = wi->poe && !(pp & BIT(3)); 1126 wr->uov = wi->e0poe && !(up & BIT(3)); 1127 1128 /* R_VFPJF */ 1129 if (wr->px && wr->uw) { 1130 set_priv_perms(wr, false, false, false); 1131 set_unpriv_perms(wr, false, false, false); 1132 } 1133 } 1134 1135 static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu, 1136 struct s1_walk_info *wi, 1137 struct s1_walk_result *wr) 1138 { 1139 u8 idx, pov_perms, uov_perms; 1140 1141 idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc); 1142 1143 if (wr->pov) { 1144 switch (wi->regime) { 1145 case TR_EL10: 1146 pov_perms = perm_idx(vcpu, POR_EL1, idx); 1147 break; 1148 case TR_EL20: 1149 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1150 break; 1151 case TR_EL2: 1152 pov_perms = perm_idx(vcpu, POR_EL2, idx); 1153 break; 1154 } 1155 1156 if (pov_perms & ~POE_RWX) 1157 pov_perms = POE_NONE; 1158 1159 /* R_QXXPC, S1PrivOverflow enabled */ 1160 if (wr->pwxn && (pov_perms & POE_X)) 1161 pov_perms &= ~POE_W; 1162 1163 wr->pr &= pov_perms & POE_R; 1164 wr->pw &= pov_perms & POE_W; 1165 wr->px &= pov_perms & POE_X; 1166 } 1167 1168 if (wr->uov) { 1169 switch (wi->regime) { 1170 case TR_EL10: 1171 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1172 break; 1173 case TR_EL20: 1174 uov_perms = perm_idx(vcpu, POR_EL0, idx); 1175 break; 1176 case TR_EL2: 1177 uov_perms = 0; 1178 break; 1179 } 1180 1181 if (uov_perms & ~POE_RWX) 1182 uov_perms = POE_NONE; 1183 1184 /* R_NPBXC, S1UnprivOverlay enabled */ 1185 if (wr->uwxn && (uov_perms & POE_X)) 1186 uov_perms &= ~POE_W; 1187 1188 wr->ur &= uov_perms & POE_R; 1189 wr->uw &= uov_perms & POE_W; 1190 wr->ux &= uov_perms & POE_X; 1191 } 1192 } 1193 1194 static void compute_s1_permissions(struct kvm_vcpu *vcpu, 1195 struct s1_walk_info *wi, 1196 struct s1_walk_result *wr) 1197 { 1198 bool pan; 1199 1200 if (!s1pie_enabled(vcpu, wi->regime)) 1201 compute_s1_direct_permissions(vcpu, wi, wr); 1202 else 1203 compute_s1_indirect_permissions(vcpu, wi, wr); 1204 1205 if (!wi->hpd) 1206 compute_s1_hierarchical_permissions(vcpu, wi, wr); 1207 1208 compute_s1_overlay_permissions(vcpu, wi, wr); 1209 1210 /* R_QXXPC, S1PrivOverlay disabled */ 1211 if (!wr->pov) 1212 wr->px &= !(wr->pwxn && wr->pw); 1213 1214 /* R_NPBXC, S1UnprivOverlay disabled */ 1215 if (!wr->uov) 1216 wr->ux &= !(wr->uwxn && wr->uw); 1217 1218 pan = wi->pan && (wr->ur || wr->uw || 1219 (pan3_enabled(vcpu, wi->regime) && wr->ux)); 1220 wr->pw &= !pan; 1221 wr->pr &= !pan; 1222 } 1223 1224 static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1225 { 1226 struct s1_walk_result wr = {}; 1227 struct s1_walk_info wi = {}; 1228 bool perm_fail = false; 1229 int ret, idx; 1230 1231 wi.regime = compute_translation_regime(vcpu, op); 1232 wi.as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); 1233 wi.pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) && 1234 (*vcpu_cpsr(vcpu) & PSR_PAN_BIT); 1235 1236 ret = setup_s1_walk(vcpu, &wi, &wr, vaddr); 1237 if (ret) 1238 goto compute_par; 1239 1240 if (wr.level == S1_MMU_DISABLED) 1241 goto compute_par; 1242 1243 idx = srcu_read_lock(&vcpu->kvm->srcu); 1244 1245 ret = walk_s1(vcpu, &wi, &wr, vaddr); 1246 1247 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1248 1249 if (ret) 1250 goto compute_par; 1251 1252 compute_s1_permissions(vcpu, &wi, &wr); 1253 1254 switch (op) { 1255 case OP_AT_S1E1RP: 1256 case OP_AT_S1E1R: 1257 case OP_AT_S1E2R: 1258 perm_fail = !wr.pr; 1259 break; 1260 case OP_AT_S1E1WP: 1261 case OP_AT_S1E1W: 1262 case OP_AT_S1E2W: 1263 perm_fail = !wr.pw; 1264 break; 1265 case OP_AT_S1E0R: 1266 perm_fail = !wr.ur; 1267 break; 1268 case OP_AT_S1E0W: 1269 perm_fail = !wr.uw; 1270 break; 1271 case OP_AT_S1E1A: 1272 case OP_AT_S1E2A: 1273 break; 1274 default: 1275 BUG(); 1276 } 1277 1278 if (perm_fail) 1279 fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); 1280 1281 compute_par: 1282 return compute_par_s1(vcpu, &wi, &wr); 1283 } 1284 1285 /* 1286 * Return the PAR_EL1 value as the result of a valid translation. 1287 * 1288 * If the translation is unsuccessful, the value may only contain 1289 * PAR_EL1.F, and cannot be taken at face value. It isn't an 1290 * indication of the translation having failed, only that the fast 1291 * path did not succeed, *unless* it indicates a S1 permission or 1292 * access fault. 1293 */ 1294 static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1295 { 1296 struct mmu_config config; 1297 struct kvm_s2_mmu *mmu; 1298 bool fail, mmu_cs; 1299 u64 par; 1300 1301 par = SYS_PAR_EL1_F; 1302 1303 /* 1304 * We've trapped, so everything is live on the CPU. As we will 1305 * be switching contexts behind everybody's back, disable 1306 * interrupts while holding the mmu lock. 1307 */ 1308 guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); 1309 1310 /* 1311 * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already 1312 * the right one (as we trapped from vEL2). If not, save the 1313 * full MMU context. 1314 * 1315 * We are also guaranteed to be in the correct context if 1316 * we're not in a nested VM. 1317 */ 1318 mmu_cs = (vcpu_has_nv(vcpu) && 1319 !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))); 1320 if (!mmu_cs) 1321 goto skip_mmu_switch; 1322 1323 /* 1324 * Obtaining the S2 MMU for a L2 is horribly racy, and we may not 1325 * find it (recycled by another vcpu, for example). When this 1326 * happens, admit defeat immediately and use the SW (slow) path. 1327 */ 1328 mmu = lookup_s2_mmu(vcpu); 1329 if (!mmu) 1330 return par; 1331 1332 __mmu_config_save(&config); 1333 1334 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); 1335 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); 1336 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); 1337 write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); 1338 if (kvm_has_tcr2(vcpu->kvm)) { 1339 write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2); 1340 if (kvm_has_s1pie(vcpu->kvm)) { 1341 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR); 1342 write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0); 1343 } 1344 if (kvm_has_s1poe(vcpu->kvm)) { 1345 write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR); 1346 write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0); 1347 } 1348 } 1349 write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); 1350 __load_stage2(mmu, mmu->arch); 1351 1352 skip_mmu_switch: 1353 /* Temporarily switch back to guest context */ 1354 write_sysreg_hcr(vcpu->arch.hcr_el2); 1355 isb(); 1356 1357 switch (op) { 1358 case OP_AT_S1E1RP: 1359 case OP_AT_S1E1WP: 1360 fail = at_s1e1p_fast(vcpu, op, vaddr); 1361 break; 1362 case OP_AT_S1E1R: 1363 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1364 break; 1365 case OP_AT_S1E1W: 1366 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1367 break; 1368 case OP_AT_S1E0R: 1369 fail = __kvm_at(OP_AT_S1E0R, vaddr); 1370 break; 1371 case OP_AT_S1E0W: 1372 fail = __kvm_at(OP_AT_S1E0W, vaddr); 1373 break; 1374 case OP_AT_S1E1A: 1375 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1376 break; 1377 default: 1378 WARN_ON_ONCE(1); 1379 fail = true; 1380 break; 1381 } 1382 1383 if (!fail) 1384 par = read_sysreg_par(); 1385 1386 write_sysreg_hcr(HCR_HOST_VHE_FLAGS); 1387 1388 if (mmu_cs) 1389 __mmu_config_restore(&config); 1390 1391 return par; 1392 } 1393 1394 static bool par_check_s1_perm_fault(u64 par) 1395 { 1396 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1397 1398 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && 1399 !(par & SYS_PAR_EL1_S)); 1400 } 1401 1402 static bool par_check_s1_access_fault(u64 par) 1403 { 1404 u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); 1405 1406 return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS && 1407 !(par & SYS_PAR_EL1_S)); 1408 } 1409 1410 void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1411 { 1412 u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); 1413 1414 /* 1415 * If PAR_EL1 reports that AT failed on a S1 permission or access 1416 * fault, we know for sure that the PTW was able to walk the S1 1417 * tables and there's nothing else to do. 1418 * 1419 * If AT failed for any other reason, then we must walk the guest S1 1420 * to emulate the instruction. 1421 */ 1422 if ((par & SYS_PAR_EL1_F) && 1423 !par_check_s1_perm_fault(par) && 1424 !par_check_s1_access_fault(par)) 1425 par = handle_at_slow(vcpu, op, vaddr); 1426 1427 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1428 } 1429 1430 void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1431 { 1432 u64 par; 1433 1434 /* 1435 * We've trapped, so everything is live on the CPU. As we will be 1436 * switching context behind everybody's back, disable interrupts... 1437 */ 1438 scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { 1439 u64 val, hcr; 1440 bool fail; 1441 1442 val = hcr = read_sysreg(hcr_el2); 1443 val &= ~HCR_TGE; 1444 val |= HCR_VM; 1445 1446 if (!vcpu_el2_e2h_is_set(vcpu)) 1447 val |= HCR_NV | HCR_NV1; 1448 1449 write_sysreg_hcr(val); 1450 isb(); 1451 1452 par = SYS_PAR_EL1_F; 1453 1454 switch (op) { 1455 case OP_AT_S1E2R: 1456 fail = __kvm_at(OP_AT_S1E1R, vaddr); 1457 break; 1458 case OP_AT_S1E2W: 1459 fail = __kvm_at(OP_AT_S1E1W, vaddr); 1460 break; 1461 case OP_AT_S1E2A: 1462 fail = __kvm_at(OP_AT_S1E1A, vaddr); 1463 break; 1464 default: 1465 WARN_ON_ONCE(1); 1466 fail = true; 1467 } 1468 1469 isb(); 1470 1471 if (!fail) 1472 par = read_sysreg_par(); 1473 1474 write_sysreg_hcr(hcr); 1475 isb(); 1476 } 1477 1478 /* We failed the translation, let's replay it in slow motion */ 1479 if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) 1480 par = handle_at_slow(vcpu, op, vaddr); 1481 1482 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1483 } 1484 1485 void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) 1486 { 1487 struct kvm_s2_trans out = {}; 1488 u64 ipa, par; 1489 bool write; 1490 int ret; 1491 1492 /* Do the stage-1 translation */ 1493 switch (op) { 1494 case OP_AT_S12E1R: 1495 op = OP_AT_S1E1R; 1496 write = false; 1497 break; 1498 case OP_AT_S12E1W: 1499 op = OP_AT_S1E1W; 1500 write = true; 1501 break; 1502 case OP_AT_S12E0R: 1503 op = OP_AT_S1E0R; 1504 write = false; 1505 break; 1506 case OP_AT_S12E0W: 1507 op = OP_AT_S1E0W; 1508 write = true; 1509 break; 1510 default: 1511 WARN_ON_ONCE(1); 1512 return; 1513 } 1514 1515 __kvm_at_s1e01(vcpu, op, vaddr); 1516 par = vcpu_read_sys_reg(vcpu, PAR_EL1); 1517 if (par & SYS_PAR_EL1_F) 1518 return; 1519 1520 /* 1521 * If we only have a single stage of translation (EL2&0), exit 1522 * early. Same thing if {VM,DC}=={0,0}. 1523 */ 1524 if (compute_translation_regime(vcpu, op) == TR_EL20 || 1525 !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) 1526 return; 1527 1528 /* Do the stage-2 translation */ 1529 ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); 1530 out.esr = 0; 1531 ret = kvm_walk_nested_s2(vcpu, ipa, &out); 1532 if (ret < 0) 1533 return; 1534 1535 /* Check the access permission */ 1536 if (!out.esr && 1537 ((!write && !out.readable) || (write && !out.writable))) 1538 out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); 1539 1540 par = compute_par_s12(vcpu, par, &out); 1541 vcpu_write_sys_reg(vcpu, par, PAR_EL1); 1542 } 1543 1544 /* 1545 * Translate a VA for a given EL in a given translation regime, with 1546 * or without PAN. This requires wi->{regime, as_el0, pan} to be 1547 * set. The rest of the wi and wr should be 0-initialised. 1548 */ 1549 int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, 1550 struct s1_walk_result *wr, u64 va) 1551 { 1552 int ret; 1553 1554 ret = setup_s1_walk(vcpu, wi, wr, va); 1555 if (ret) 1556 return ret; 1557 1558 if (wr->level == S1_MMU_DISABLED) { 1559 wr->ur = wr->uw = wr->ux = true; 1560 wr->pr = wr->pw = wr->px = true; 1561 } else { 1562 ret = walk_s1(vcpu, wi, wr, va); 1563 if (ret) 1564 return ret; 1565 1566 compute_s1_permissions(vcpu, wi, wr); 1567 } 1568 1569 return 0; 1570 } 1571 1572 struct desc_match { 1573 u64 ipa; 1574 int level; 1575 }; 1576 1577 static int match_s1_desc(struct s1_walk_context *ctxt, void *priv) 1578 { 1579 struct desc_match *dm = priv; 1580 u64 ipa = dm->ipa; 1581 1582 /* Use S1 granule alignment */ 1583 ipa &= GENMASK(51, ctxt->wi->pgshift); 1584 1585 /* Not the IPA we're looking for? Continue. */ 1586 if (ipa != ctxt->table_ipa) 1587 return 0; 1588 1589 /* Note the level and interrupt the walk */ 1590 dm->level = ctxt->level; 1591 return -EINTR; 1592 } 1593 1594 int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level) 1595 { 1596 struct desc_match dm = { 1597 .ipa = ipa, 1598 }; 1599 struct s1_walk_info wi = { 1600 .filter = &(struct s1_walk_filter){ 1601 .fn = match_s1_desc, 1602 .priv = &dm, 1603 }, 1604 .as_el0 = false, 1605 .pan = false, 1606 }; 1607 struct s1_walk_result wr = {}; 1608 int ret; 1609 1610 if (is_hyp_ctxt(vcpu)) 1611 wi.regime = vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; 1612 else 1613 wi.regime = TR_EL10; 1614 1615 ret = setup_s1_walk(vcpu, &wi, &wr, va); 1616 if (ret) 1617 return ret; 1618 1619 /* We really expect the S1 MMU to be on here... */ 1620 if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) { 1621 *level = 0; 1622 return 0; 1623 } 1624 1625 /* Walk the guest's PT, looking for a match along the way */ 1626 ret = walk_s1(vcpu, &wi, &wr, va); 1627 switch (ret) { 1628 case -EINTR: 1629 /* We interrupted the walk on a match, return the level */ 1630 *level = dm.level; 1631 return 0; 1632 case 0: 1633 /* The walk completed, we failed to find the entry */ 1634 return -ENOENT; 1635 default: 1636 /* Any other error... */ 1637 return ret; 1638 } 1639 } 1640