1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * guest access functions 4 * 5 * Copyright IBM Corp. 2014 6 * 7 */ 8 9 #include <linux/vmalloc.h> 10 #include <linux/mm_types.h> 11 #include <linux/err.h> 12 #include <linux/pgtable.h> 13 #include <linux/bitfield.h> 14 #include <asm/access-regs.h> 15 #include <asm/fault.h> 16 #include <asm/gmap.h> 17 #include <asm/dat-bits.h> 18 #include "kvm-s390.h" 19 #include "gmap.h" 20 #include "gaccess.h" 21 22 /* 23 * vaddress union in order to easily decode a virtual address into its 24 * region first index, region second index etc. parts. 25 */ 26 union vaddress { 27 unsigned long addr; 28 struct { 29 unsigned long rfx : 11; 30 unsigned long rsx : 11; 31 unsigned long rtx : 11; 32 unsigned long sx : 11; 33 unsigned long px : 8; 34 unsigned long bx : 12; 35 }; 36 struct { 37 unsigned long rfx01 : 2; 38 unsigned long : 9; 39 unsigned long rsx01 : 2; 40 unsigned long : 9; 41 unsigned long rtx01 : 2; 42 unsigned long : 9; 43 unsigned long sx01 : 2; 44 unsigned long : 29; 45 }; 46 }; 47 48 /* 49 * raddress union which will contain the result (real or absolute address) 50 * after a page table walk. The rfaa, sfaa and pfra members are used to 51 * simply assign them the value of a region, segment or page table entry. 52 */ 53 union raddress { 54 unsigned long addr; 55 unsigned long rfaa : 33; /* Region-Frame Absolute Address */ 56 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ 57 unsigned long pfra : 52; /* Page-Frame Real Address */ 58 }; 59 60 union alet { 61 u32 val; 62 struct { 63 u32 reserved : 7; 64 u32 p : 1; 65 u32 alesn : 8; 66 u32 alen : 16; 67 }; 68 }; 69 70 union ald { 71 u32 val; 72 struct { 73 u32 : 1; 74 u32 alo : 24; 75 u32 all : 7; 76 }; 77 }; 78 79 struct ale { 80 unsigned long i : 1; /* ALEN-Invalid Bit */ 81 unsigned long : 5; 82 unsigned long fo : 1; /* Fetch-Only Bit */ 83 unsigned long p : 1; /* Private Bit */ 84 unsigned long alesn : 8; /* Access-List-Entry Sequence Number */ 85 unsigned long aleax : 16; /* Access-List-Entry Authorization Index */ 86 unsigned long : 32; 87 unsigned long : 1; 88 unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ 89 unsigned long : 6; 90 unsigned long astesn : 32; /* ASTE Sequence Number */ 91 }; 92 93 struct aste { 94 unsigned long i : 1; /* ASX-Invalid Bit */ 95 unsigned long ato : 29; /* Authority-Table Origin */ 96 unsigned long : 1; 97 unsigned long b : 1; /* Base-Space Bit */ 98 unsigned long ax : 16; /* Authorization Index */ 99 unsigned long atl : 12; /* Authority-Table Length */ 100 unsigned long : 2; 101 unsigned long ca : 1; /* Controlled-ASN Bit */ 102 unsigned long ra : 1; /* Reusable-ASN Bit */ 103 unsigned long asce : 64; /* Address-Space-Control Element */ 104 unsigned long ald : 32; 105 unsigned long astesn : 32; 106 /* .. more fields there */ 107 }; 108 109 int ipte_lock_held(struct kvm *kvm) 110 { 111 if (sclp.has_siif) { 112 int rc; 113 114 read_lock(&kvm->arch.sca_lock); 115 rc = kvm_s390_get_ipte_control(kvm)->kh != 0; 116 read_unlock(&kvm->arch.sca_lock); 117 return rc; 118 } 119 return kvm->arch.ipte_lock_count != 0; 120 } 121 122 static void ipte_lock_simple(struct kvm *kvm) 123 { 124 union ipte_control old, new, *ic; 125 126 mutex_lock(&kvm->arch.ipte_mutex); 127 kvm->arch.ipte_lock_count++; 128 if (kvm->arch.ipte_lock_count > 1) 129 goto out; 130 retry: 131 read_lock(&kvm->arch.sca_lock); 132 ic = kvm_s390_get_ipte_control(kvm); 133 old = READ_ONCE(*ic); 134 do { 135 if (old.k) { 136 read_unlock(&kvm->arch.sca_lock); 137 cond_resched(); 138 goto retry; 139 } 140 new = old; 141 new.k = 1; 142 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 143 read_unlock(&kvm->arch.sca_lock); 144 out: 145 mutex_unlock(&kvm->arch.ipte_mutex); 146 } 147 148 static void ipte_unlock_simple(struct kvm *kvm) 149 { 150 union ipte_control old, new, *ic; 151 152 mutex_lock(&kvm->arch.ipte_mutex); 153 kvm->arch.ipte_lock_count--; 154 if (kvm->arch.ipte_lock_count) 155 goto out; 156 read_lock(&kvm->arch.sca_lock); 157 ic = kvm_s390_get_ipte_control(kvm); 158 old = READ_ONCE(*ic); 159 do { 160 new = old; 161 new.k = 0; 162 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 163 read_unlock(&kvm->arch.sca_lock); 164 wake_up(&kvm->arch.ipte_wq); 165 out: 166 mutex_unlock(&kvm->arch.ipte_mutex); 167 } 168 169 static void ipte_lock_siif(struct kvm *kvm) 170 { 171 union ipte_control old, new, *ic; 172 173 retry: 174 read_lock(&kvm->arch.sca_lock); 175 ic = kvm_s390_get_ipte_control(kvm); 176 old = READ_ONCE(*ic); 177 do { 178 if (old.kg) { 179 read_unlock(&kvm->arch.sca_lock); 180 cond_resched(); 181 goto retry; 182 } 183 new = old; 184 new.k = 1; 185 new.kh++; 186 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 187 read_unlock(&kvm->arch.sca_lock); 188 } 189 190 static void ipte_unlock_siif(struct kvm *kvm) 191 { 192 union ipte_control old, new, *ic; 193 194 read_lock(&kvm->arch.sca_lock); 195 ic = kvm_s390_get_ipte_control(kvm); 196 old = READ_ONCE(*ic); 197 do { 198 new = old; 199 new.kh--; 200 if (!new.kh) 201 new.k = 0; 202 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 203 read_unlock(&kvm->arch.sca_lock); 204 if (!new.kh) 205 wake_up(&kvm->arch.ipte_wq); 206 } 207 208 void ipte_lock(struct kvm *kvm) 209 { 210 if (sclp.has_siif) 211 ipte_lock_siif(kvm); 212 else 213 ipte_lock_simple(kvm); 214 } 215 216 void ipte_unlock(struct kvm *kvm) 217 { 218 if (sclp.has_siif) 219 ipte_unlock_siif(kvm); 220 else 221 ipte_unlock_simple(kvm); 222 } 223 224 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, 225 enum gacc_mode mode) 226 { 227 union alet alet; 228 struct ale ale; 229 struct aste aste; 230 unsigned long ald_addr, authority_table_addr; 231 union ald ald; 232 int eax, rc; 233 u8 authority_table; 234 235 if (ar >= NUM_ACRS) 236 return -EINVAL; 237 238 if (vcpu->arch.acrs_loaded) 239 save_access_regs(vcpu->run->s.regs.acrs); 240 alet.val = vcpu->run->s.regs.acrs[ar]; 241 242 if (ar == 0 || alet.val == 0) { 243 asce->val = vcpu->arch.sie_block->gcr[1]; 244 return 0; 245 } else if (alet.val == 1) { 246 asce->val = vcpu->arch.sie_block->gcr[7]; 247 return 0; 248 } 249 250 if (alet.reserved) 251 return PGM_ALET_SPECIFICATION; 252 253 if (alet.p) 254 ald_addr = vcpu->arch.sie_block->gcr[5]; 255 else 256 ald_addr = vcpu->arch.sie_block->gcr[2]; 257 ald_addr &= 0x7fffffc0; 258 259 rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald)); 260 if (rc) 261 return rc; 262 263 if (alet.alen / 8 > ald.all) 264 return PGM_ALEN_TRANSLATION; 265 266 if (0x7fffffff - ald.alo * 128 < alet.alen * 16) 267 return PGM_ADDRESSING; 268 269 rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale, 270 sizeof(struct ale)); 271 if (rc) 272 return rc; 273 274 if (ale.i == 1) 275 return PGM_ALEN_TRANSLATION; 276 if (ale.alesn != alet.alesn) 277 return PGM_ALE_SEQUENCE; 278 279 rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste)); 280 if (rc) 281 return rc; 282 283 if (aste.i) 284 return PGM_ASTE_VALIDITY; 285 if (aste.astesn != ale.astesn) 286 return PGM_ASTE_SEQUENCE; 287 288 if (ale.p == 1) { 289 eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff; 290 if (ale.aleax != eax) { 291 if (eax / 16 > aste.atl) 292 return PGM_EXTENDED_AUTHORITY; 293 294 authority_table_addr = aste.ato * 4 + eax / 4; 295 296 rc = read_guest_real(vcpu, authority_table_addr, 297 &authority_table, 298 sizeof(u8)); 299 if (rc) 300 return rc; 301 302 if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0) 303 return PGM_EXTENDED_AUTHORITY; 304 } 305 } 306 307 if (ale.fo == 1 && mode == GACC_STORE) 308 return PGM_PROTECTION; 309 310 asce->val = aste.asce; 311 return 0; 312 } 313 314 enum prot_type { 315 PROT_TYPE_LA = 0, 316 PROT_TYPE_KEYC = 1, 317 PROT_TYPE_ALC = 2, 318 PROT_TYPE_DAT = 3, 319 PROT_TYPE_IEP = 4, 320 /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ 321 PROT_NONE, 322 }; 323 324 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, 325 enum gacc_mode mode, enum prot_type prot, bool terminate) 326 { 327 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 328 union teid *teid; 329 330 memset(pgm, 0, sizeof(*pgm)); 331 pgm->code = code; 332 teid = (union teid *)&pgm->trans_exc_code; 333 334 switch (code) { 335 case PGM_PROTECTION: 336 switch (prot) { 337 case PROT_NONE: 338 /* We should never get here, acts like termination */ 339 WARN_ON_ONCE(1); 340 break; 341 case PROT_TYPE_IEP: 342 teid->b61 = 1; 343 fallthrough; 344 case PROT_TYPE_LA: 345 teid->b56 = 1; 346 break; 347 case PROT_TYPE_KEYC: 348 teid->b60 = 1; 349 break; 350 case PROT_TYPE_ALC: 351 teid->b60 = 1; 352 fallthrough; 353 case PROT_TYPE_DAT: 354 teid->b61 = 1; 355 break; 356 } 357 if (terminate) { 358 teid->b56 = 0; 359 teid->b60 = 0; 360 teid->b61 = 0; 361 } 362 fallthrough; 363 case PGM_ASCE_TYPE: 364 case PGM_PAGE_TRANSLATION: 365 case PGM_REGION_FIRST_TRANS: 366 case PGM_REGION_SECOND_TRANS: 367 case PGM_REGION_THIRD_TRANS: 368 case PGM_SEGMENT_TRANSLATION: 369 /* 370 * op_access_id only applies to MOVE_PAGE -> set bit 61 371 * exc_access_id has to be set to 0 for some instructions. Both 372 * cases have to be handled by the caller. 373 */ 374 teid->addr = gva >> PAGE_SHIFT; 375 teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH; 376 teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as; 377 fallthrough; 378 case PGM_ALEN_TRANSLATION: 379 case PGM_ALE_SEQUENCE: 380 case PGM_ASTE_VALIDITY: 381 case PGM_ASTE_SEQUENCE: 382 case PGM_EXTENDED_AUTHORITY: 383 /* 384 * We can always store exc_access_id, as it is 385 * undefined for non-ar cases. It is undefined for 386 * most DAT protection exceptions. 387 */ 388 pgm->exc_access_id = ar; 389 break; 390 } 391 return code; 392 } 393 394 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, 395 enum gacc_mode mode, enum prot_type prot) 396 { 397 return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false); 398 } 399 400 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, 401 unsigned long ga, u8 ar, enum gacc_mode mode) 402 { 403 int rc; 404 struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); 405 406 if (!psw.dat) { 407 asce->val = 0; 408 asce->r = 1; 409 return 0; 410 } 411 412 if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME)) 413 psw.as = PSW_BITS_AS_PRIMARY; 414 415 switch (psw.as) { 416 case PSW_BITS_AS_PRIMARY: 417 asce->val = vcpu->arch.sie_block->gcr[1]; 418 return 0; 419 case PSW_BITS_AS_SECONDARY: 420 asce->val = vcpu->arch.sie_block->gcr[7]; 421 return 0; 422 case PSW_BITS_AS_HOME: 423 asce->val = vcpu->arch.sie_block->gcr[13]; 424 return 0; 425 case PSW_BITS_AS_ACCREG: 426 rc = ar_translation(vcpu, asce, ar, mode); 427 if (rc > 0) 428 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); 429 return rc; 430 } 431 return 0; 432 } 433 434 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) 435 { 436 return kvm_read_guest(kvm, gpa, val, sizeof(*val)); 437 } 438 439 /** 440 * guest_translate - translate a guest virtual into a guest absolute address 441 * @vcpu: virtual cpu 442 * @gva: guest virtual address 443 * @gpa: points to where guest physical (absolute) address should be stored 444 * @asce: effective asce 445 * @mode: indicates the access mode to be used 446 * @prot: returns the type for protection exceptions 447 * 448 * Translate a guest virtual address into a guest absolute address by means 449 * of dynamic address translation as specified by the architecture. 450 * If the resulting absolute address is not available in the configuration 451 * an addressing exception is indicated and @gpa will not be changed. 452 * 453 * Returns: - zero on success; @gpa contains the resulting absolute address 454 * - a negative value if guest access failed due to e.g. broken 455 * guest mapping 456 * - a positive value if an access exception happened. In this case 457 * the returned value is the program interruption code as defined 458 * by the architecture 459 */ 460 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, 461 unsigned long *gpa, const union asce asce, 462 enum gacc_mode mode, enum prot_type *prot) 463 { 464 union vaddress vaddr = {.addr = gva}; 465 union raddress raddr = {.addr = gva}; 466 union page_table_entry pte; 467 int dat_protection = 0; 468 int iep_protection = 0; 469 union ctlreg0 ctlreg0; 470 unsigned long ptr; 471 int edat1, edat2, iep; 472 473 ctlreg0.val = vcpu->arch.sie_block->gcr[0]; 474 edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); 475 edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); 476 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); 477 if (asce.r) 478 goto real_address; 479 ptr = asce.rsto * PAGE_SIZE; 480 switch (asce.dt) { 481 case ASCE_TYPE_REGION1: 482 if (vaddr.rfx01 > asce.tl) 483 return PGM_REGION_FIRST_TRANS; 484 ptr += vaddr.rfx * 8; 485 break; 486 case ASCE_TYPE_REGION2: 487 if (vaddr.rfx) 488 return PGM_ASCE_TYPE; 489 if (vaddr.rsx01 > asce.tl) 490 return PGM_REGION_SECOND_TRANS; 491 ptr += vaddr.rsx * 8; 492 break; 493 case ASCE_TYPE_REGION3: 494 if (vaddr.rfx || vaddr.rsx) 495 return PGM_ASCE_TYPE; 496 if (vaddr.rtx01 > asce.tl) 497 return PGM_REGION_THIRD_TRANS; 498 ptr += vaddr.rtx * 8; 499 break; 500 case ASCE_TYPE_SEGMENT: 501 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 502 return PGM_ASCE_TYPE; 503 if (vaddr.sx01 > asce.tl) 504 return PGM_SEGMENT_TRANSLATION; 505 ptr += vaddr.sx * 8; 506 break; 507 } 508 switch (asce.dt) { 509 case ASCE_TYPE_REGION1: { 510 union region1_table_entry rfte; 511 512 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 513 return PGM_ADDRESSING; 514 if (deref_table(vcpu->kvm, ptr, &rfte.val)) 515 return -EFAULT; 516 if (rfte.i) 517 return PGM_REGION_FIRST_TRANS; 518 if (rfte.tt != TABLE_TYPE_REGION1) 519 return PGM_TRANSLATION_SPEC; 520 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 521 return PGM_REGION_SECOND_TRANS; 522 if (edat1) 523 dat_protection |= rfte.p; 524 ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8; 525 } 526 fallthrough; 527 case ASCE_TYPE_REGION2: { 528 union region2_table_entry rste; 529 530 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 531 return PGM_ADDRESSING; 532 if (deref_table(vcpu->kvm, ptr, &rste.val)) 533 return -EFAULT; 534 if (rste.i) 535 return PGM_REGION_SECOND_TRANS; 536 if (rste.tt != TABLE_TYPE_REGION2) 537 return PGM_TRANSLATION_SPEC; 538 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 539 return PGM_REGION_THIRD_TRANS; 540 if (edat1) 541 dat_protection |= rste.p; 542 ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8; 543 } 544 fallthrough; 545 case ASCE_TYPE_REGION3: { 546 union region3_table_entry rtte; 547 548 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 549 return PGM_ADDRESSING; 550 if (deref_table(vcpu->kvm, ptr, &rtte.val)) 551 return -EFAULT; 552 if (rtte.i) 553 return PGM_REGION_THIRD_TRANS; 554 if (rtte.tt != TABLE_TYPE_REGION3) 555 return PGM_TRANSLATION_SPEC; 556 if (rtte.cr && asce.p && edat2) 557 return PGM_TRANSLATION_SPEC; 558 if (rtte.fc && edat2) { 559 dat_protection |= rtte.fc1.p; 560 iep_protection = rtte.fc1.iep; 561 raddr.rfaa = rtte.fc1.rfaa; 562 goto absolute_address; 563 } 564 if (vaddr.sx01 < rtte.fc0.tf) 565 return PGM_SEGMENT_TRANSLATION; 566 if (vaddr.sx01 > rtte.fc0.tl) 567 return PGM_SEGMENT_TRANSLATION; 568 if (edat1) 569 dat_protection |= rtte.fc0.p; 570 ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8; 571 } 572 fallthrough; 573 case ASCE_TYPE_SEGMENT: { 574 union segment_table_entry ste; 575 576 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 577 return PGM_ADDRESSING; 578 if (deref_table(vcpu->kvm, ptr, &ste.val)) 579 return -EFAULT; 580 if (ste.i) 581 return PGM_SEGMENT_TRANSLATION; 582 if (ste.tt != TABLE_TYPE_SEGMENT) 583 return PGM_TRANSLATION_SPEC; 584 if (ste.cs && asce.p) 585 return PGM_TRANSLATION_SPEC; 586 if (ste.fc && edat1) { 587 dat_protection |= ste.fc1.p; 588 iep_protection = ste.fc1.iep; 589 raddr.sfaa = ste.fc1.sfaa; 590 goto absolute_address; 591 } 592 dat_protection |= ste.fc0.p; 593 ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8; 594 } 595 } 596 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 597 return PGM_ADDRESSING; 598 if (deref_table(vcpu->kvm, ptr, &pte.val)) 599 return -EFAULT; 600 if (pte.i) 601 return PGM_PAGE_TRANSLATION; 602 if (pte.z) 603 return PGM_TRANSLATION_SPEC; 604 dat_protection |= pte.p; 605 iep_protection = pte.iep; 606 raddr.pfra = pte.pfra; 607 real_address: 608 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); 609 absolute_address: 610 if (mode == GACC_STORE && dat_protection) { 611 *prot = PROT_TYPE_DAT; 612 return PGM_PROTECTION; 613 } 614 if (mode == GACC_IFETCH && iep_protection && iep) { 615 *prot = PROT_TYPE_IEP; 616 return PGM_PROTECTION; 617 } 618 if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr)) 619 return PGM_ADDRESSING; 620 *gpa = raddr.addr; 621 return 0; 622 } 623 624 static inline int is_low_address(unsigned long ga) 625 { 626 /* Check for address ranges 0..511 and 4096..4607 */ 627 return (ga & ~0x11fful) == 0; 628 } 629 630 static int low_address_protection_enabled(struct kvm_vcpu *vcpu, 631 const union asce asce) 632 { 633 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 634 psw_t *psw = &vcpu->arch.sie_block->gpsw; 635 636 if (!ctlreg0.lap) 637 return 0; 638 if (psw_bits(*psw).dat && asce.p) 639 return 0; 640 return 1; 641 } 642 643 static int vm_check_access_key(struct kvm *kvm, u8 access_key, 644 enum gacc_mode mode, gpa_t gpa) 645 { 646 u8 storage_key, access_control; 647 bool fetch_protected; 648 unsigned long hva; 649 int r; 650 651 if (access_key == 0) 652 return 0; 653 654 hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); 655 if (kvm_is_error_hva(hva)) 656 return PGM_ADDRESSING; 657 658 mmap_read_lock(current->mm); 659 r = get_guest_storage_key(current->mm, hva, &storage_key); 660 mmap_read_unlock(current->mm); 661 if (r) 662 return r; 663 access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); 664 if (access_control == access_key) 665 return 0; 666 fetch_protected = storage_key & _PAGE_FP_BIT; 667 if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) 668 return 0; 669 return PGM_PROTECTION; 670 } 671 672 static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, 673 union asce asce) 674 { 675 psw_t *psw = &vcpu->arch.sie_block->gpsw; 676 unsigned long override; 677 678 if (mode == GACC_FETCH || mode == GACC_IFETCH) { 679 /* check if fetch protection override enabled */ 680 override = vcpu->arch.sie_block->gcr[0]; 681 override &= CR0_FETCH_PROTECTION_OVERRIDE; 682 /* not applicable if subject to DAT && private space */ 683 override = override && !(psw_bits(*psw).dat && asce.p); 684 return override; 685 } 686 return false; 687 } 688 689 static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) 690 { 691 return ga < 2048 && ga + len <= 2048; 692 } 693 694 static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) 695 { 696 /* check if storage protection override enabled */ 697 return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; 698 } 699 700 static bool storage_prot_override_applies(u8 access_control) 701 { 702 /* matches special storage protection override key (9) -> allow */ 703 return access_control == PAGE_SPO_ACC; 704 } 705 706 static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, 707 enum gacc_mode mode, union asce asce, gpa_t gpa, 708 unsigned long ga, unsigned int len) 709 { 710 u8 storage_key, access_control; 711 unsigned long hva; 712 int r; 713 714 /* access key 0 matches any storage key -> allow */ 715 if (access_key == 0) 716 return 0; 717 /* 718 * caller needs to ensure that gfn is accessible, so we can 719 * assume that this cannot fail 720 */ 721 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); 722 mmap_read_lock(current->mm); 723 r = get_guest_storage_key(current->mm, hva, &storage_key); 724 mmap_read_unlock(current->mm); 725 if (r) 726 return r; 727 access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); 728 /* access key matches storage key -> allow */ 729 if (access_control == access_key) 730 return 0; 731 if (mode == GACC_FETCH || mode == GACC_IFETCH) { 732 /* it is a fetch and fetch protection is off -> allow */ 733 if (!(storage_key & _PAGE_FP_BIT)) 734 return 0; 735 if (fetch_prot_override_applicable(vcpu, mode, asce) && 736 fetch_prot_override_applies(ga, len)) 737 return 0; 738 } 739 if (storage_prot_override_applicable(vcpu) && 740 storage_prot_override_applies(access_control)) 741 return 0; 742 return PGM_PROTECTION; 743 } 744 745 /** 746 * guest_range_to_gpas() - Calculate guest physical addresses of page fragments 747 * covering a logical range 748 * @vcpu: virtual cpu 749 * @ga: guest address, start of range 750 * @ar: access register 751 * @gpas: output argument, may be NULL 752 * @len: length of range in bytes 753 * @asce: address-space-control element to use for translation 754 * @mode: access mode 755 * @access_key: access key to mach the range's storage keys against 756 * 757 * Translate a logical range to a series of guest absolute addresses, 758 * such that the concatenation of page fragments starting at each gpa make up 759 * the whole range. 760 * The translation is performed as if done by the cpu for the given @asce, @ar, 761 * @mode and state of the @vcpu. 762 * If the translation causes an exception, its program interruption code is 763 * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified 764 * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject 765 * a correct exception into the guest. 766 * The resulting gpas are stored into @gpas, unless it is NULL. 767 * 768 * Note: All fragments except the first one start at the beginning of a page. 769 * When deriving the boundaries of a fragment from a gpa, all but the last 770 * fragment end at the end of the page. 771 * 772 * Return: 773 * * 0 - success 774 * * <0 - translation could not be performed, for example if guest 775 * memory could not be accessed 776 * * >0 - an access exception occurred. In this case the returned value 777 * is the program interruption code and the contents of pgm may 778 * be used to inject an exception into the guest. 779 */ 780 static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 781 unsigned long *gpas, unsigned long len, 782 const union asce asce, enum gacc_mode mode, 783 u8 access_key) 784 { 785 psw_t *psw = &vcpu->arch.sie_block->gpsw; 786 unsigned int offset = offset_in_page(ga); 787 unsigned int fragment_len; 788 int lap_enabled, rc = 0; 789 enum prot_type prot; 790 unsigned long gpa; 791 792 lap_enabled = low_address_protection_enabled(vcpu, asce); 793 while (min(PAGE_SIZE - offset, len) > 0) { 794 fragment_len = min(PAGE_SIZE - offset, len); 795 ga = kvm_s390_logical_to_effective(vcpu, ga); 796 if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) 797 return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, 798 PROT_TYPE_LA); 799 if (psw_bits(*psw).dat) { 800 rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot); 801 if (rc < 0) 802 return rc; 803 } else { 804 gpa = kvm_s390_real_to_abs(vcpu, ga); 805 if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { 806 rc = PGM_ADDRESSING; 807 prot = PROT_NONE; 808 } 809 } 810 if (rc) 811 return trans_exc(vcpu, rc, ga, ar, mode, prot); 812 rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, 813 fragment_len); 814 if (rc) 815 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); 816 if (gpas) 817 *gpas++ = gpa; 818 offset = 0; 819 ga += fragment_len; 820 len -= fragment_len; 821 } 822 return 0; 823 } 824 825 static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, 826 void *data, unsigned int len) 827 { 828 const unsigned int offset = offset_in_page(gpa); 829 const gfn_t gfn = gpa_to_gfn(gpa); 830 int rc; 831 832 if (!gfn_to_memslot(kvm, gfn)) 833 return PGM_ADDRESSING; 834 if (mode == GACC_STORE) 835 rc = kvm_write_guest_page(kvm, gfn, data, offset, len); 836 else 837 rc = kvm_read_guest_page(kvm, gfn, data, offset, len); 838 return rc; 839 } 840 841 static int 842 access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, 843 void *data, unsigned int len, u8 access_key) 844 { 845 struct kvm_memory_slot *slot; 846 bool writable; 847 gfn_t gfn; 848 hva_t hva; 849 int rc; 850 851 gfn = gpa >> PAGE_SHIFT; 852 slot = gfn_to_memslot(kvm, gfn); 853 hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); 854 855 if (kvm_is_error_hva(hva)) 856 return PGM_ADDRESSING; 857 /* 858 * Check if it's a ro memslot, even tho that can't occur (they're unsupported). 859 * Don't try to actually handle that case. 860 */ 861 if (!writable && mode == GACC_STORE) 862 return -EOPNOTSUPP; 863 hva += offset_in_page(gpa); 864 if (mode == GACC_STORE) 865 rc = copy_to_user_key((void __user *)hva, data, len, access_key); 866 else 867 rc = copy_from_user_key(data, (void __user *)hva, len, access_key); 868 if (rc) 869 return PGM_PROTECTION; 870 if (mode == GACC_STORE) 871 mark_page_dirty_in_slot(kvm, slot, gfn); 872 return 0; 873 } 874 875 int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, 876 unsigned long len, enum gacc_mode mode, u8 access_key) 877 { 878 int offset = offset_in_page(gpa); 879 int fragment_len; 880 int rc; 881 882 while (min(PAGE_SIZE - offset, len) > 0) { 883 fragment_len = min(PAGE_SIZE - offset, len); 884 rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); 885 if (rc) 886 return rc; 887 offset = 0; 888 len -= fragment_len; 889 data += fragment_len; 890 gpa += fragment_len; 891 } 892 return 0; 893 } 894 895 int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 896 void *data, unsigned long len, enum gacc_mode mode, 897 u8 access_key) 898 { 899 psw_t *psw = &vcpu->arch.sie_block->gpsw; 900 unsigned long nr_pages, idx; 901 unsigned long gpa_array[2]; 902 unsigned int fragment_len; 903 unsigned long *gpas; 904 enum prot_type prot; 905 int need_ipte_lock; 906 union asce asce; 907 bool try_storage_prot_override; 908 bool try_fetch_prot_override; 909 int rc; 910 911 if (!len) 912 return 0; 913 ga = kvm_s390_logical_to_effective(vcpu, ga); 914 rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); 915 if (rc) 916 return rc; 917 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; 918 gpas = gpa_array; 919 if (nr_pages > ARRAY_SIZE(gpa_array)) 920 gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); 921 if (!gpas) 922 return -ENOMEM; 923 try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); 924 try_storage_prot_override = storage_prot_override_applicable(vcpu); 925 need_ipte_lock = psw_bits(*psw).dat && !asce.r; 926 if (need_ipte_lock) 927 ipte_lock(vcpu->kvm); 928 /* 929 * Since we do the access further down ultimately via a move instruction 930 * that does key checking and returns an error in case of a protection 931 * violation, we don't need to do the check during address translation. 932 * Skip it by passing access key 0, which matches any storage key, 933 * obviating the need for any further checks. As a result the check is 934 * handled entirely in hardware on access, we only need to take care to 935 * forego key protection checking if fetch protection override applies or 936 * retry with the special key 9 in case of storage protection override. 937 */ 938 rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); 939 if (rc) 940 goto out_unlock; 941 for (idx = 0; idx < nr_pages; idx++) { 942 fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); 943 if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { 944 rc = access_guest_page(vcpu->kvm, mode, gpas[idx], 945 data, fragment_len); 946 } else { 947 rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], 948 data, fragment_len, access_key); 949 } 950 if (rc == PGM_PROTECTION && try_storage_prot_override) 951 rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], 952 data, fragment_len, PAGE_SPO_ACC); 953 if (rc) 954 break; 955 len -= fragment_len; 956 data += fragment_len; 957 ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); 958 } 959 if (rc > 0) { 960 bool terminate = (mode == GACC_STORE) && (idx > 0); 961 962 if (rc == PGM_PROTECTION) 963 prot = PROT_TYPE_KEYC; 964 else 965 prot = PROT_NONE; 966 rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); 967 } 968 out_unlock: 969 if (need_ipte_lock) 970 ipte_unlock(vcpu->kvm); 971 if (nr_pages > ARRAY_SIZE(gpa_array)) 972 vfree(gpas); 973 return rc; 974 } 975 976 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, 977 void *data, unsigned long len, enum gacc_mode mode) 978 { 979 unsigned int fragment_len; 980 unsigned long gpa; 981 int rc = 0; 982 983 while (len && !rc) { 984 gpa = kvm_s390_real_to_abs(vcpu, gra); 985 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); 986 rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len); 987 len -= fragment_len; 988 gra += fragment_len; 989 data += fragment_len; 990 } 991 if (rc > 0) 992 vcpu->arch.pgm.code = rc; 993 return rc; 994 } 995 996 /** 997 * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. 998 * @kvm: Virtual machine instance. 999 * @gpa: Absolute guest address of the location to be changed. 1000 * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a 1001 * non power of two will result in failure. 1002 * @old_addr: Pointer to old value. If the location at @gpa contains this value, 1003 * the exchange will succeed. After calling cmpxchg_guest_abs_with_key() 1004 * *@old_addr contains the value at @gpa before the attempt to 1005 * exchange the value. 1006 * @new: The value to place at @gpa. 1007 * @access_key: The access key to use for the guest access. 1008 * @success: output value indicating if an exchange occurred. 1009 * 1010 * Atomically exchange the value at @gpa by @new, if it contains *@old. 1011 * Honors storage keys. 1012 * 1013 * Return: * 0: successful exchange 1014 * * >0: a program interruption code indicating the reason cmpxchg could 1015 * not be attempted 1016 * * -EINVAL: address misaligned or len not power of two 1017 * * -EAGAIN: transient failure (len 1 or 2) 1018 * * -EOPNOTSUPP: read-only memslot (should never occur) 1019 */ 1020 int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, 1021 __uint128_t *old_addr, __uint128_t new, 1022 u8 access_key, bool *success) 1023 { 1024 gfn_t gfn = gpa_to_gfn(gpa); 1025 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 1026 bool writable; 1027 hva_t hva; 1028 int ret; 1029 1030 if (!IS_ALIGNED(gpa, len)) 1031 return -EINVAL; 1032 1033 hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); 1034 if (kvm_is_error_hva(hva)) 1035 return PGM_ADDRESSING; 1036 /* 1037 * Check if it's a read-only memslot, even though that cannot occur 1038 * since those are unsupported. 1039 * Don't try to actually handle that case. 1040 */ 1041 if (!writable) 1042 return -EOPNOTSUPP; 1043 1044 hva += offset_in_page(gpa); 1045 /* 1046 * The cmpxchg_user_key macro depends on the type of "old", so we need 1047 * a case for each valid length and get some code duplication as long 1048 * as we don't introduce a new macro. 1049 */ 1050 switch (len) { 1051 case 1: { 1052 u8 old; 1053 1054 ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key); 1055 *success = !ret && old == *old_addr; 1056 *old_addr = old; 1057 break; 1058 } 1059 case 2: { 1060 u16 old; 1061 1062 ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key); 1063 *success = !ret && old == *old_addr; 1064 *old_addr = old; 1065 break; 1066 } 1067 case 4: { 1068 u32 old; 1069 1070 ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key); 1071 *success = !ret && old == *old_addr; 1072 *old_addr = old; 1073 break; 1074 } 1075 case 8: { 1076 u64 old; 1077 1078 ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key); 1079 *success = !ret && old == *old_addr; 1080 *old_addr = old; 1081 break; 1082 } 1083 case 16: { 1084 __uint128_t old; 1085 1086 ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key); 1087 *success = !ret && old == *old_addr; 1088 *old_addr = old; 1089 break; 1090 } 1091 default: 1092 return -EINVAL; 1093 } 1094 if (*success) 1095 mark_page_dirty_in_slot(kvm, slot, gfn); 1096 /* 1097 * Assume that the fault is caused by protection, either key protection 1098 * or user page write protection. 1099 */ 1100 if (ret == -EFAULT) 1101 ret = PGM_PROTECTION; 1102 return ret; 1103 } 1104 1105 /** 1106 * guest_translate_address_with_key - translate guest logical into guest absolute address 1107 * @vcpu: virtual cpu 1108 * @gva: Guest virtual address 1109 * @ar: Access register 1110 * @gpa: Guest physical address 1111 * @mode: Translation access mode 1112 * @access_key: access key to mach the storage key with 1113 * 1114 * Parameter semantics are the same as the ones from guest_translate. 1115 * The memory contents at the guest address are not changed. 1116 * 1117 * Note: The IPTE lock is not taken during this function, so the caller 1118 * has to take care of this. 1119 */ 1120 int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 1121 unsigned long *gpa, enum gacc_mode mode, 1122 u8 access_key) 1123 { 1124 union asce asce; 1125 int rc; 1126 1127 gva = kvm_s390_logical_to_effective(vcpu, gva); 1128 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 1129 if (rc) 1130 return rc; 1131 return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, 1132 access_key); 1133 } 1134 1135 /** 1136 * check_gva_range - test a range of guest virtual addresses for accessibility 1137 * @vcpu: virtual cpu 1138 * @gva: Guest virtual address 1139 * @ar: Access register 1140 * @length: Length of test range 1141 * @mode: Translation access mode 1142 * @access_key: access key to mach the storage keys with 1143 */ 1144 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 1145 unsigned long length, enum gacc_mode mode, u8 access_key) 1146 { 1147 union asce asce; 1148 int rc = 0; 1149 1150 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 1151 if (rc) 1152 return rc; 1153 ipte_lock(vcpu->kvm); 1154 rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, 1155 access_key); 1156 ipte_unlock(vcpu->kvm); 1157 1158 return rc; 1159 } 1160 1161 /** 1162 * check_gpa_range - test a range of guest physical addresses for accessibility 1163 * @kvm: virtual machine instance 1164 * @gpa: guest physical address 1165 * @length: length of test range 1166 * @mode: access mode to test, relevant for storage keys 1167 * @access_key: access key to mach the storage keys with 1168 */ 1169 int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, 1170 enum gacc_mode mode, u8 access_key) 1171 { 1172 unsigned int fragment_len; 1173 int rc = 0; 1174 1175 while (length && !rc) { 1176 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); 1177 rc = vm_check_access_key(kvm, access_key, mode, gpa); 1178 length -= fragment_len; 1179 gpa += fragment_len; 1180 } 1181 return rc; 1182 } 1183 1184 /** 1185 * kvm_s390_check_low_addr_prot_real - check for low-address protection 1186 * @vcpu: virtual cpu 1187 * @gra: Guest real address 1188 * 1189 * Checks whether an address is subject to low-address protection and set 1190 * up vcpu->arch.pgm accordingly if necessary. 1191 * 1192 * Return: 0 if no protection exception, or PGM_PROTECTION if protected. 1193 */ 1194 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) 1195 { 1196 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 1197 1198 if (!ctlreg0.lap || !is_low_address(gra)) 1199 return 0; 1200 return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); 1201 } 1202 1203 /** 1204 * kvm_s390_shadow_tables - walk the guest page table and create shadow tables 1205 * @sg: pointer to the shadow guest address space structure 1206 * @saddr: faulting address in the shadow gmap 1207 * @pgt: pointer to the beginning of the page table for the given address if 1208 * successful (return value 0), or to the first invalid DAT entry in 1209 * case of exceptions (return value > 0) 1210 * @dat_protection: referenced memory is write protected 1211 * @fake: pgt references contiguous guest memory block, not a pgtable 1212 */ 1213 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, 1214 unsigned long *pgt, int *dat_protection, 1215 int *fake) 1216 { 1217 struct kvm *kvm; 1218 struct gmap *parent; 1219 union asce asce; 1220 union vaddress vaddr; 1221 unsigned long ptr; 1222 int rc; 1223 1224 *fake = 0; 1225 *dat_protection = 0; 1226 kvm = sg->private; 1227 parent = sg->parent; 1228 vaddr.addr = saddr; 1229 asce.val = sg->orig_asce; 1230 ptr = asce.rsto * PAGE_SIZE; 1231 if (asce.r) { 1232 *fake = 1; 1233 ptr = 0; 1234 asce.dt = ASCE_TYPE_REGION1; 1235 } 1236 switch (asce.dt) { 1237 case ASCE_TYPE_REGION1: 1238 if (vaddr.rfx01 > asce.tl && !*fake) 1239 return PGM_REGION_FIRST_TRANS; 1240 break; 1241 case ASCE_TYPE_REGION2: 1242 if (vaddr.rfx) 1243 return PGM_ASCE_TYPE; 1244 if (vaddr.rsx01 > asce.tl) 1245 return PGM_REGION_SECOND_TRANS; 1246 break; 1247 case ASCE_TYPE_REGION3: 1248 if (vaddr.rfx || vaddr.rsx) 1249 return PGM_ASCE_TYPE; 1250 if (vaddr.rtx01 > asce.tl) 1251 return PGM_REGION_THIRD_TRANS; 1252 break; 1253 case ASCE_TYPE_SEGMENT: 1254 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 1255 return PGM_ASCE_TYPE; 1256 if (vaddr.sx01 > asce.tl) 1257 return PGM_SEGMENT_TRANSLATION; 1258 break; 1259 } 1260 1261 switch (asce.dt) { 1262 case ASCE_TYPE_REGION1: { 1263 union region1_table_entry rfte; 1264 1265 if (*fake) { 1266 ptr += vaddr.rfx * _REGION1_SIZE; 1267 rfte.val = ptr; 1268 goto shadow_r2t; 1269 } 1270 *pgt = ptr + vaddr.rfx * 8; 1271 rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); 1272 if (rc) 1273 return rc; 1274 if (rfte.i) 1275 return PGM_REGION_FIRST_TRANS; 1276 if (rfte.tt != TABLE_TYPE_REGION1) 1277 return PGM_TRANSLATION_SPEC; 1278 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 1279 return PGM_REGION_SECOND_TRANS; 1280 if (sg->edat_level >= 1) 1281 *dat_protection |= rfte.p; 1282 ptr = rfte.rto * PAGE_SIZE; 1283 shadow_r2t: 1284 rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); 1285 if (rc) 1286 return rc; 1287 kvm->stat.gmap_shadow_r1_entry++; 1288 } 1289 fallthrough; 1290 case ASCE_TYPE_REGION2: { 1291 union region2_table_entry rste; 1292 1293 if (*fake) { 1294 ptr += vaddr.rsx * _REGION2_SIZE; 1295 rste.val = ptr; 1296 goto shadow_r3t; 1297 } 1298 *pgt = ptr + vaddr.rsx * 8; 1299 rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); 1300 if (rc) 1301 return rc; 1302 if (rste.i) 1303 return PGM_REGION_SECOND_TRANS; 1304 if (rste.tt != TABLE_TYPE_REGION2) 1305 return PGM_TRANSLATION_SPEC; 1306 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 1307 return PGM_REGION_THIRD_TRANS; 1308 if (sg->edat_level >= 1) 1309 *dat_protection |= rste.p; 1310 ptr = rste.rto * PAGE_SIZE; 1311 shadow_r3t: 1312 rste.p |= *dat_protection; 1313 rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); 1314 if (rc) 1315 return rc; 1316 kvm->stat.gmap_shadow_r2_entry++; 1317 } 1318 fallthrough; 1319 case ASCE_TYPE_REGION3: { 1320 union region3_table_entry rtte; 1321 1322 if (*fake) { 1323 ptr += vaddr.rtx * _REGION3_SIZE; 1324 rtte.val = ptr; 1325 goto shadow_sgt; 1326 } 1327 *pgt = ptr + vaddr.rtx * 8; 1328 rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); 1329 if (rc) 1330 return rc; 1331 if (rtte.i) 1332 return PGM_REGION_THIRD_TRANS; 1333 if (rtte.tt != TABLE_TYPE_REGION3) 1334 return PGM_TRANSLATION_SPEC; 1335 if (rtte.cr && asce.p && sg->edat_level >= 2) 1336 return PGM_TRANSLATION_SPEC; 1337 if (rtte.fc && sg->edat_level >= 2) { 1338 *dat_protection |= rtte.fc0.p; 1339 *fake = 1; 1340 ptr = rtte.fc1.rfaa * _REGION3_SIZE; 1341 rtte.val = ptr; 1342 goto shadow_sgt; 1343 } 1344 if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl) 1345 return PGM_SEGMENT_TRANSLATION; 1346 if (sg->edat_level >= 1) 1347 *dat_protection |= rtte.fc0.p; 1348 ptr = rtte.fc0.sto * PAGE_SIZE; 1349 shadow_sgt: 1350 rtte.fc0.p |= *dat_protection; 1351 rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); 1352 if (rc) 1353 return rc; 1354 kvm->stat.gmap_shadow_r3_entry++; 1355 } 1356 fallthrough; 1357 case ASCE_TYPE_SEGMENT: { 1358 union segment_table_entry ste; 1359 1360 if (*fake) { 1361 ptr += vaddr.sx * _SEGMENT_SIZE; 1362 ste.val = ptr; 1363 goto shadow_pgt; 1364 } 1365 *pgt = ptr + vaddr.sx * 8; 1366 rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); 1367 if (rc) 1368 return rc; 1369 if (ste.i) 1370 return PGM_SEGMENT_TRANSLATION; 1371 if (ste.tt != TABLE_TYPE_SEGMENT) 1372 return PGM_TRANSLATION_SPEC; 1373 if (ste.cs && asce.p) 1374 return PGM_TRANSLATION_SPEC; 1375 *dat_protection |= ste.fc0.p; 1376 if (ste.fc && sg->edat_level >= 1) { 1377 *fake = 1; 1378 ptr = ste.fc1.sfaa * _SEGMENT_SIZE; 1379 ste.val = ptr; 1380 goto shadow_pgt; 1381 } 1382 ptr = ste.fc0.pto * (PAGE_SIZE / 2); 1383 shadow_pgt: 1384 ste.fc0.p |= *dat_protection; 1385 rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); 1386 if (rc) 1387 return rc; 1388 kvm->stat.gmap_shadow_sg_entry++; 1389 } 1390 } 1391 /* Return the parent address of the page table */ 1392 *pgt = ptr; 1393 return 0; 1394 } 1395 1396 /** 1397 * shadow_pgt_lookup() - find a shadow page table 1398 * @sg: pointer to the shadow guest address space structure 1399 * @saddr: the address in the shadow aguest address space 1400 * @pgt: parent gmap address of the page table to get shadowed 1401 * @dat_protection: if the pgtable is marked as protected by dat 1402 * @fake: pgt references contiguous guest memory block, not a pgtable 1403 * 1404 * Returns 0 if the shadow page table was found and -EAGAIN if the page 1405 * table was not found. 1406 * 1407 * Called with sg->mm->mmap_lock in read. 1408 */ 1409 static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, 1410 int *dat_protection, int *fake) 1411 { 1412 unsigned long pt_index; 1413 unsigned long *table; 1414 struct page *page; 1415 int rc; 1416 1417 spin_lock(&sg->guest_table_lock); 1418 table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ 1419 if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { 1420 /* Shadow page tables are full pages (pte+pgste) */ 1421 page = pfn_to_page(*table >> PAGE_SHIFT); 1422 pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); 1423 *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; 1424 *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); 1425 *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); 1426 rc = 0; 1427 } else { 1428 rc = -EAGAIN; 1429 } 1430 spin_unlock(&sg->guest_table_lock); 1431 return rc; 1432 } 1433 1434 /** 1435 * kvm_s390_shadow_fault - handle fault on a shadow page table 1436 * @vcpu: virtual cpu 1437 * @sg: pointer to the shadow guest address space structure 1438 * @saddr: faulting address in the shadow gmap 1439 * @datptr: will contain the address of the faulting DAT table entry, or of 1440 * the valid leaf, plus some flags 1441 * 1442 * Returns: - 0 if the shadow fault was successfully resolved 1443 * - > 0 (pgm exception code) on exceptions while faulting 1444 * - -EAGAIN if the caller can retry immediately 1445 * - -EFAULT when accessing invalid guest addresses 1446 * - -ENOMEM if out of memory 1447 */ 1448 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, 1449 unsigned long saddr, unsigned long *datptr) 1450 { 1451 union vaddress vaddr; 1452 union page_table_entry pte; 1453 unsigned long pgt = 0; 1454 int dat_protection, fake; 1455 int rc; 1456 1457 if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) 1458 return -EFAULT; 1459 1460 mmap_read_lock(sg->mm); 1461 /* 1462 * We don't want any guest-2 tables to change - so the parent 1463 * tables/pointers we read stay valid - unshadowing is however 1464 * always possible - only guest_table_lock protects us. 1465 */ 1466 ipte_lock(vcpu->kvm); 1467 1468 rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); 1469 if (rc) 1470 rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, 1471 &fake); 1472 1473 vaddr.addr = saddr; 1474 if (fake) { 1475 pte.val = pgt + vaddr.px * PAGE_SIZE; 1476 goto shadow_page; 1477 } 1478 1479 switch (rc) { 1480 case PGM_SEGMENT_TRANSLATION: 1481 case PGM_REGION_THIRD_TRANS: 1482 case PGM_REGION_SECOND_TRANS: 1483 case PGM_REGION_FIRST_TRANS: 1484 pgt |= PEI_NOT_PTE; 1485 break; 1486 case 0: 1487 pgt += vaddr.px * 8; 1488 rc = gmap_read_table(sg->parent, pgt, &pte.val); 1489 } 1490 if (datptr) 1491 *datptr = pgt | dat_protection * PEI_DAT_PROT; 1492 if (!rc && pte.i) 1493 rc = PGM_PAGE_TRANSLATION; 1494 if (!rc && pte.z) 1495 rc = PGM_TRANSLATION_SPEC; 1496 shadow_page: 1497 pte.p |= dat_protection; 1498 if (!rc) 1499 rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); 1500 vcpu->kvm->stat.gmap_shadow_pg_entry++; 1501 ipte_unlock(vcpu->kvm); 1502 mmap_read_unlock(sg->mm); 1503 return rc; 1504 } 1505