1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * guest access functions 4 * 5 * Copyright IBM Corp. 2014 6 * 7 */ 8 9 #include <linux/vmalloc.h> 10 #include <linux/mm_types.h> 11 #include <linux/err.h> 12 #include <linux/pgtable.h> 13 #include <linux/bitfield.h> 14 #include <linux/kvm_host.h> 15 #include <linux/kvm_types.h> 16 #include <asm/diag.h> 17 #include <asm/access-regs.h> 18 #include <asm/fault.h> 19 #include <asm/dat-bits.h> 20 #include "kvm-s390.h" 21 #include "dat.h" 22 #include "gmap.h" 23 #include "gaccess.h" 24 #include "faultin.h" 25 26 #define GMAP_SHADOW_FAKE_TABLE 1ULL 27 28 union dat_table_entry { 29 unsigned long val; 30 union region1_table_entry pgd; 31 union region2_table_entry p4d; 32 union region3_table_entry pud; 33 union segment_table_entry pmd; 34 union page_table_entry pte; 35 }; 36 37 #define WALK_N_ENTRIES 7 38 #define LEVEL_MEM -2 39 struct pgtwalk { 40 struct guest_fault raw_entries[WALK_N_ENTRIES]; 41 gpa_t last_addr; 42 int level; 43 bool p; 44 }; 45 46 static inline struct guest_fault *get_entries(struct pgtwalk *w) 47 { 48 return w->raw_entries - LEVEL_MEM; 49 } 50 51 /* 52 * raddress union which will contain the result (real or absolute address) 53 * after a page table walk. The rfaa, sfaa and pfra members are used to 54 * simply assign them the value of a region, segment or page table entry. 55 */ 56 union raddress { 57 unsigned long addr; 58 unsigned long rfaa : 33; /* Region-Frame Absolute Address */ 59 unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ 60 unsigned long pfra : 52; /* Page-Frame Real Address */ 61 }; 62 63 union alet { 64 u32 val; 65 struct { 66 u32 reserved : 7; 67 u32 p : 1; 68 u32 alesn : 8; 69 u32 alen : 16; 70 }; 71 }; 72 73 union ald { 74 u32 val; 75 struct { 76 u32 : 1; 77 u32 alo : 24; 78 u32 all : 7; 79 }; 80 }; 81 82 struct ale { 83 unsigned long i : 1; /* ALEN-Invalid Bit */ 84 unsigned long : 5; 85 unsigned long fo : 1; /* Fetch-Only Bit */ 86 unsigned long p : 1; /* Private Bit */ 87 unsigned long alesn : 8; /* Access-List-Entry Sequence Number */ 88 unsigned long aleax : 16; /* Access-List-Entry Authorization Index */ 89 unsigned long : 32; 90 unsigned long : 1; 91 unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ 92 unsigned long : 6; 93 unsigned long astesn : 32; /* ASTE Sequence Number */ 94 }; 95 96 struct aste { 97 unsigned long i : 1; /* ASX-Invalid Bit */ 98 unsigned long ato : 29; /* Authority-Table Origin */ 99 unsigned long : 1; 100 unsigned long b : 1; /* Base-Space Bit */ 101 unsigned long ax : 16; /* Authorization Index */ 102 unsigned long atl : 12; /* Authority-Table Length */ 103 unsigned long : 2; 104 unsigned long ca : 1; /* Controlled-ASN Bit */ 105 unsigned long ra : 1; /* Reusable-ASN Bit */ 106 unsigned long asce : 64; /* Address-Space-Control Element */ 107 unsigned long ald : 32; 108 unsigned long astesn : 32; 109 /* .. more fields there */ 110 }; 111 112 union oac { 113 unsigned int val; 114 struct { 115 struct { 116 unsigned short key : 4; 117 unsigned short : 4; 118 unsigned short as : 2; 119 unsigned short : 4; 120 unsigned short k : 1; 121 unsigned short a : 1; 122 } oac1; 123 struct { 124 unsigned short key : 4; 125 unsigned short : 4; 126 unsigned short as : 2; 127 unsigned short : 4; 128 unsigned short k : 1; 129 unsigned short a : 1; 130 } oac2; 131 }; 132 }; 133 134 int ipte_lock_held(struct kvm *kvm) 135 { 136 if (sclp.has_siif) 137 return kvm->arch.sca->ipte_control.kh != 0; 138 139 return kvm->arch.ipte_lock_count != 0; 140 } 141 142 static void ipte_lock_simple(struct kvm *kvm) 143 { 144 union ipte_control old, new, *ic; 145 146 mutex_lock(&kvm->arch.ipte_mutex); 147 kvm->arch.ipte_lock_count++; 148 if (kvm->arch.ipte_lock_count > 1) 149 goto out; 150 retry: 151 ic = &kvm->arch.sca->ipte_control; 152 old = READ_ONCE(*ic); 153 do { 154 if (old.k) { 155 cond_resched(); 156 goto retry; 157 } 158 new = old; 159 new.k = 1; 160 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 161 out: 162 mutex_unlock(&kvm->arch.ipte_mutex); 163 } 164 165 static void ipte_unlock_simple(struct kvm *kvm) 166 { 167 union ipte_control old, new, *ic; 168 169 mutex_lock(&kvm->arch.ipte_mutex); 170 kvm->arch.ipte_lock_count--; 171 if (kvm->arch.ipte_lock_count) 172 goto out; 173 ic = &kvm->arch.sca->ipte_control; 174 old = READ_ONCE(*ic); 175 do { 176 new = old; 177 new.k = 0; 178 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 179 wake_up(&kvm->arch.ipte_wq); 180 out: 181 mutex_unlock(&kvm->arch.ipte_mutex); 182 } 183 184 static void ipte_lock_siif(struct kvm *kvm) 185 { 186 union ipte_control old, new, *ic; 187 188 retry: 189 ic = &kvm->arch.sca->ipte_control; 190 old = READ_ONCE(*ic); 191 do { 192 if (old.kg) { 193 cond_resched(); 194 goto retry; 195 } 196 new = old; 197 new.k = 1; 198 new.kh++; 199 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 200 } 201 202 static void ipte_unlock_siif(struct kvm *kvm) 203 { 204 union ipte_control old, new, *ic; 205 206 ic = &kvm->arch.sca->ipte_control; 207 old = READ_ONCE(*ic); 208 do { 209 new = old; 210 new.kh--; 211 if (!new.kh) 212 new.k = 0; 213 } while (!try_cmpxchg(&ic->val, &old.val, new.val)); 214 if (!new.kh) 215 wake_up(&kvm->arch.ipte_wq); 216 } 217 218 void ipte_lock(struct kvm *kvm) 219 { 220 if (sclp.has_siif) 221 ipte_lock_siif(kvm); 222 else 223 ipte_lock_simple(kvm); 224 } 225 226 void ipte_unlock(struct kvm *kvm) 227 { 228 if (sclp.has_siif) 229 ipte_unlock_siif(kvm); 230 else 231 ipte_unlock_simple(kvm); 232 } 233 234 static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, 235 enum gacc_mode mode) 236 { 237 union alet alet; 238 struct ale ale; 239 struct aste aste; 240 unsigned long ald_addr, authority_table_addr; 241 union ald ald; 242 int eax, rc; 243 u8 authority_table; 244 245 if (ar >= NUM_ACRS) 246 return -EINVAL; 247 248 if (vcpu->arch.acrs_loaded) 249 save_access_regs(vcpu->run->s.regs.acrs); 250 alet.val = vcpu->run->s.regs.acrs[ar]; 251 252 if (ar == 0 || alet.val == 0) { 253 asce->val = vcpu->arch.sie_block->gcr[1]; 254 return 0; 255 } else if (alet.val == 1) { 256 asce->val = vcpu->arch.sie_block->gcr[7]; 257 return 0; 258 } 259 260 if (alet.reserved) 261 return PGM_ALET_SPECIFICATION; 262 263 if (alet.p) 264 ald_addr = vcpu->arch.sie_block->gcr[5]; 265 else 266 ald_addr = vcpu->arch.sie_block->gcr[2]; 267 ald_addr &= 0x7fffffc0; 268 269 rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald)); 270 if (rc) 271 return rc; 272 273 if (alet.alen / 8 > ald.all) 274 return PGM_ALEN_TRANSLATION; 275 276 if (0x7fffffff - ald.alo * 128 < alet.alen * 16) 277 return PGM_ADDRESSING; 278 279 rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale, 280 sizeof(struct ale)); 281 if (rc) 282 return rc; 283 284 if (ale.i == 1) 285 return PGM_ALEN_TRANSLATION; 286 if (ale.alesn != alet.alesn) 287 return PGM_ALE_SEQUENCE; 288 289 rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste)); 290 if (rc) 291 return rc; 292 293 if (aste.i) 294 return PGM_ASTE_VALIDITY; 295 if (aste.astesn != ale.astesn) 296 return PGM_ASTE_SEQUENCE; 297 298 if (ale.p == 1) { 299 eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff; 300 if (ale.aleax != eax) { 301 if (eax / 16 > aste.atl) 302 return PGM_EXTENDED_AUTHORITY; 303 304 authority_table_addr = aste.ato * 4 + eax / 4; 305 306 rc = read_guest_real(vcpu, authority_table_addr, 307 &authority_table, 308 sizeof(u8)); 309 if (rc) 310 return rc; 311 312 if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0) 313 return PGM_EXTENDED_AUTHORITY; 314 } 315 } 316 317 if (ale.fo == 1 && mode == GACC_STORE) 318 return PGM_PROTECTION; 319 320 asce->val = aste.asce; 321 return 0; 322 } 323 324 enum prot_type { 325 PROT_TYPE_LA = 0, 326 PROT_TYPE_KEYC = 1, 327 PROT_TYPE_ALC = 2, 328 PROT_TYPE_DAT = 3, 329 PROT_TYPE_IEP = 4, 330 /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ 331 PROT_TYPE_DUMMY, 332 }; 333 334 static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, 335 enum gacc_mode mode, enum prot_type prot, bool terminate) 336 { 337 struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; 338 union teid *teid; 339 340 memset(pgm, 0, sizeof(*pgm)); 341 pgm->code = code; 342 teid = (union teid *)&pgm->trans_exc_code; 343 344 switch (code) { 345 case PGM_PROTECTION: 346 switch (prot) { 347 case PROT_TYPE_DUMMY: 348 /* We should never get here, acts like termination */ 349 WARN_ON_ONCE(1); 350 break; 351 case PROT_TYPE_IEP: 352 teid->b61 = 1; 353 fallthrough; 354 case PROT_TYPE_LA: 355 teid->b56 = 1; 356 break; 357 case PROT_TYPE_KEYC: 358 teid->b60 = 1; 359 break; 360 case PROT_TYPE_ALC: 361 teid->b60 = 1; 362 fallthrough; 363 case PROT_TYPE_DAT: 364 teid->b61 = 1; 365 break; 366 } 367 if (terminate) { 368 teid->b56 = 0; 369 teid->b60 = 0; 370 teid->b61 = 0; 371 } 372 fallthrough; 373 case PGM_ASCE_TYPE: 374 case PGM_PAGE_TRANSLATION: 375 case PGM_REGION_FIRST_TRANS: 376 case PGM_REGION_SECOND_TRANS: 377 case PGM_REGION_THIRD_TRANS: 378 case PGM_SEGMENT_TRANSLATION: 379 /* 380 * op_access_id only applies to MOVE_PAGE -> set bit 61 381 * exc_access_id has to be set to 0 for some instructions. Both 382 * cases have to be handled by the caller. 383 */ 384 teid->addr = gva >> PAGE_SHIFT; 385 teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH; 386 teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as; 387 fallthrough; 388 case PGM_ALEN_TRANSLATION: 389 case PGM_ALE_SEQUENCE: 390 case PGM_ASTE_VALIDITY: 391 case PGM_ASTE_SEQUENCE: 392 case PGM_EXTENDED_AUTHORITY: 393 /* 394 * We can always store exc_access_id, as it is 395 * undefined for non-ar cases. It is undefined for 396 * most DAT protection exceptions. 397 */ 398 pgm->exc_access_id = ar; 399 break; 400 } 401 return code; 402 } 403 404 static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, 405 enum gacc_mode mode, enum prot_type prot) 406 { 407 return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false); 408 } 409 410 static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, 411 unsigned long ga, u8 ar, enum gacc_mode mode) 412 { 413 int rc; 414 struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); 415 416 if (!psw.dat) { 417 asce->val = 0; 418 asce->r = 1; 419 return 0; 420 } 421 422 if ((mode == GACC_IFETCH) && (psw.as != PSW_BITS_AS_HOME)) 423 psw.as = PSW_BITS_AS_PRIMARY; 424 425 switch (psw.as) { 426 case PSW_BITS_AS_PRIMARY: 427 asce->val = vcpu->arch.sie_block->gcr[1]; 428 return 0; 429 case PSW_BITS_AS_SECONDARY: 430 asce->val = vcpu->arch.sie_block->gcr[7]; 431 return 0; 432 case PSW_BITS_AS_HOME: 433 asce->val = vcpu->arch.sie_block->gcr[13]; 434 return 0; 435 case PSW_BITS_AS_ACCREG: 436 rc = ar_translation(vcpu, asce, ar, mode); 437 if (rc > 0) 438 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); 439 return rc; 440 } 441 return 0; 442 } 443 444 static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) 445 { 446 return kvm_read_guest(kvm, gpa, val, sizeof(*val)); 447 } 448 449 /** 450 * guest_translate_gva() - translate a guest virtual into a guest absolute address 451 * @vcpu: virtual cpu 452 * @gva: guest virtual address 453 * @gpa: points to where guest physical (absolute) address should be stored 454 * @asce: effective asce 455 * @mode: indicates the access mode to be used 456 * @prot: returns the type for protection exceptions 457 * 458 * Translate a guest virtual address into a guest absolute address by means 459 * of dynamic address translation as specified by the architecture. 460 * If the resulting absolute address is not available in the configuration 461 * an addressing exception is indicated and @gpa will not be changed. 462 * 463 * Returns: - zero on success; @gpa contains the resulting absolute address 464 * - a negative value if guest access failed due to e.g. broken 465 * guest mapping 466 * - a positive value if an access exception happened. In this case 467 * the returned value is the program interruption code as defined 468 * by the architecture 469 */ 470 static unsigned long guest_translate_gva(struct kvm_vcpu *vcpu, unsigned long gva, 471 unsigned long *gpa, const union asce asce, 472 enum gacc_mode mode, enum prot_type *prot) 473 { 474 union vaddress vaddr = {.addr = gva}; 475 union raddress raddr = {.addr = gva}; 476 union page_table_entry pte; 477 int dat_protection = 0; 478 int iep_protection = 0; 479 union ctlreg0 ctlreg0; 480 unsigned long ptr; 481 int edat1, edat2, iep; 482 483 ctlreg0.val = vcpu->arch.sie_block->gcr[0]; 484 edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); 485 edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); 486 iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); 487 if (asce.r) 488 goto real_address; 489 ptr = asce.rsto * PAGE_SIZE; 490 switch (asce.dt) { 491 case ASCE_TYPE_REGION1: 492 if (vaddr.rfx01 > asce.tl) 493 return PGM_REGION_FIRST_TRANS; 494 ptr += vaddr.rfx * 8; 495 break; 496 case ASCE_TYPE_REGION2: 497 if (vaddr.rfx) 498 return PGM_ASCE_TYPE; 499 if (vaddr.rsx01 > asce.tl) 500 return PGM_REGION_SECOND_TRANS; 501 ptr += vaddr.rsx * 8; 502 break; 503 case ASCE_TYPE_REGION3: 504 if (vaddr.rfx || vaddr.rsx) 505 return PGM_ASCE_TYPE; 506 if (vaddr.rtx01 > asce.tl) 507 return PGM_REGION_THIRD_TRANS; 508 ptr += vaddr.rtx * 8; 509 break; 510 case ASCE_TYPE_SEGMENT: 511 if (vaddr.rfx || vaddr.rsx || vaddr.rtx) 512 return PGM_ASCE_TYPE; 513 if (vaddr.sx01 > asce.tl) 514 return PGM_SEGMENT_TRANSLATION; 515 ptr += vaddr.sx * 8; 516 break; 517 } 518 switch (asce.dt) { 519 case ASCE_TYPE_REGION1: { 520 union region1_table_entry rfte; 521 522 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 523 return PGM_ADDRESSING; 524 if (deref_table(vcpu->kvm, ptr, &rfte.val)) 525 return -EFAULT; 526 if (rfte.i) 527 return PGM_REGION_FIRST_TRANS; 528 if (rfte.tt != TABLE_TYPE_REGION1) 529 return PGM_TRANSLATION_SPEC; 530 if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) 531 return PGM_REGION_SECOND_TRANS; 532 if (edat1) 533 dat_protection |= rfte.p; 534 ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8; 535 } 536 fallthrough; 537 case ASCE_TYPE_REGION2: { 538 union region2_table_entry rste; 539 540 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 541 return PGM_ADDRESSING; 542 if (deref_table(vcpu->kvm, ptr, &rste.val)) 543 return -EFAULT; 544 if (rste.i) 545 return PGM_REGION_SECOND_TRANS; 546 if (rste.tt != TABLE_TYPE_REGION2) 547 return PGM_TRANSLATION_SPEC; 548 if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) 549 return PGM_REGION_THIRD_TRANS; 550 if (edat1) 551 dat_protection |= rste.p; 552 ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8; 553 } 554 fallthrough; 555 case ASCE_TYPE_REGION3: { 556 union region3_table_entry rtte; 557 558 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 559 return PGM_ADDRESSING; 560 if (deref_table(vcpu->kvm, ptr, &rtte.val)) 561 return -EFAULT; 562 if (rtte.i) 563 return PGM_REGION_THIRD_TRANS; 564 if (rtte.tt != TABLE_TYPE_REGION3) 565 return PGM_TRANSLATION_SPEC; 566 if (rtte.cr && asce.p && edat2) 567 return PGM_TRANSLATION_SPEC; 568 if (rtte.fc && edat2) { 569 dat_protection |= rtte.fc1.p; 570 iep_protection = rtte.fc1.iep; 571 raddr.rfaa = rtte.fc1.rfaa; 572 goto absolute_address; 573 } 574 if (vaddr.sx01 < rtte.fc0.tf) 575 return PGM_SEGMENT_TRANSLATION; 576 if (vaddr.sx01 > rtte.fc0.tl) 577 return PGM_SEGMENT_TRANSLATION; 578 if (edat1) 579 dat_protection |= rtte.fc0.p; 580 ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8; 581 } 582 fallthrough; 583 case ASCE_TYPE_SEGMENT: { 584 union segment_table_entry ste; 585 586 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 587 return PGM_ADDRESSING; 588 if (deref_table(vcpu->kvm, ptr, &ste.val)) 589 return -EFAULT; 590 if (ste.i) 591 return PGM_SEGMENT_TRANSLATION; 592 if (ste.tt != TABLE_TYPE_SEGMENT) 593 return PGM_TRANSLATION_SPEC; 594 if (ste.cs && asce.p) 595 return PGM_TRANSLATION_SPEC; 596 if (ste.fc && edat1) { 597 dat_protection |= ste.fc1.p; 598 iep_protection = ste.fc1.iep; 599 raddr.sfaa = ste.fc1.sfaa; 600 goto absolute_address; 601 } 602 dat_protection |= ste.fc0.p; 603 ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8; 604 } 605 } 606 if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) 607 return PGM_ADDRESSING; 608 if (deref_table(vcpu->kvm, ptr, &pte.val)) 609 return -EFAULT; 610 if (pte.i) 611 return PGM_PAGE_TRANSLATION; 612 if (pte.z) 613 return PGM_TRANSLATION_SPEC; 614 dat_protection |= pte.p; 615 iep_protection = pte.iep; 616 raddr.pfra = pte.pfra; 617 real_address: 618 raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr); 619 absolute_address: 620 if (mode == GACC_STORE && dat_protection) { 621 *prot = PROT_TYPE_DAT; 622 return PGM_PROTECTION; 623 } 624 if (mode == GACC_IFETCH && iep_protection && iep) { 625 *prot = PROT_TYPE_IEP; 626 return PGM_PROTECTION; 627 } 628 if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr)) 629 return PGM_ADDRESSING; 630 *gpa = raddr.addr; 631 return 0; 632 } 633 634 static inline int is_low_address(unsigned long ga) 635 { 636 /* Check for address ranges 0..511 and 4096..4607 */ 637 return (ga & ~0x11fful) == 0; 638 } 639 640 static int low_address_protection_enabled(struct kvm_vcpu *vcpu, 641 const union asce asce) 642 { 643 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 644 psw_t *psw = &vcpu->arch.sie_block->gpsw; 645 646 if (!ctlreg0.lap) 647 return 0; 648 if (psw_bits(*psw).dat && asce.p) 649 return 0; 650 return 1; 651 } 652 653 static int vm_check_access_key_gpa(struct kvm *kvm, u8 access_key, 654 enum gacc_mode mode, gpa_t gpa) 655 { 656 union skey storage_key; 657 int r; 658 659 scoped_guard(read_lock, &kvm->mmu_lock) 660 r = dat_get_storage_key(kvm->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key); 661 if (r) 662 return r; 663 if (access_key == 0 || storage_key.acc == access_key) 664 return 0; 665 if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !storage_key.fp) 666 return 0; 667 return PGM_PROTECTION; 668 } 669 670 static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, 671 union asce asce) 672 { 673 psw_t *psw = &vcpu->arch.sie_block->gpsw; 674 unsigned long override; 675 676 if (mode == GACC_FETCH || mode == GACC_IFETCH) { 677 /* check if fetch protection override enabled */ 678 override = vcpu->arch.sie_block->gcr[0]; 679 override &= CR0_FETCH_PROTECTION_OVERRIDE; 680 /* not applicable if subject to DAT && private space */ 681 override = override && !(psw_bits(*psw).dat && asce.p); 682 return override; 683 } 684 return false; 685 } 686 687 static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) 688 { 689 return ga < 2048 && ga + len <= 2048; 690 } 691 692 static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) 693 { 694 /* check if storage protection override enabled */ 695 return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; 696 } 697 698 static bool storage_prot_override_applies(u8 access_control) 699 { 700 /* matches special storage protection override key (9) -> allow */ 701 return access_control == PAGE_SPO_ACC; 702 } 703 704 static int vcpu_check_access_key_gpa(struct kvm_vcpu *vcpu, u8 access_key, 705 enum gacc_mode mode, union asce asce, gpa_t gpa, 706 unsigned long ga, unsigned int len) 707 { 708 union skey storage_key; 709 int r; 710 711 /* access key 0 matches any storage key -> allow */ 712 if (access_key == 0) 713 return 0; 714 /* 715 * caller needs to ensure that gfn is accessible, so we can 716 * assume that this cannot fail 717 */ 718 scoped_guard(read_lock, &vcpu->kvm->mmu_lock) 719 r = dat_get_storage_key(vcpu->arch.gmap->asce, gpa_to_gfn(gpa), &storage_key); 720 if (r) 721 return r; 722 /* access key matches storage key -> allow */ 723 if (storage_key.acc == access_key) 724 return 0; 725 if (mode == GACC_FETCH || mode == GACC_IFETCH) { 726 /* it is a fetch and fetch protection is off -> allow */ 727 if (!storage_key.fp) 728 return 0; 729 if (fetch_prot_override_applicable(vcpu, mode, asce) && 730 fetch_prot_override_applies(ga, len)) 731 return 0; 732 } 733 if (storage_prot_override_applicable(vcpu) && 734 storage_prot_override_applies(storage_key.acc)) 735 return 0; 736 return PGM_PROTECTION; 737 } 738 739 /** 740 * guest_range_to_gpas() - Calculate guest physical addresses of page fragments 741 * covering a logical range 742 * @vcpu: virtual cpu 743 * @ga: guest address, start of range 744 * @ar: access register 745 * @gpas: output argument, may be NULL 746 * @len: length of range in bytes 747 * @asce: address-space-control element to use for translation 748 * @mode: access mode 749 * @access_key: access key to mach the range's storage keys against 750 * 751 * Translate a logical range to a series of guest absolute addresses, 752 * such that the concatenation of page fragments starting at each gpa make up 753 * the whole range. 754 * The translation is performed as if done by the cpu for the given @asce, @ar, 755 * @mode and state of the @vcpu. 756 * If the translation causes an exception, its program interruption code is 757 * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified 758 * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject 759 * a correct exception into the guest. 760 * The resulting gpas are stored into @gpas, unless it is NULL. 761 * 762 * Note: All fragments except the first one start at the beginning of a page. 763 * When deriving the boundaries of a fragment from a gpa, all but the last 764 * fragment end at the end of the page. 765 * 766 * Return: 767 * * 0 - success 768 * * <0 - translation could not be performed, for example if guest 769 * memory could not be accessed 770 * * >0 - an access exception occurred. In this case the returned value 771 * is the program interruption code and the contents of pgm may 772 * be used to inject an exception into the guest. 773 */ 774 static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 775 unsigned long *gpas, unsigned long len, 776 const union asce asce, enum gacc_mode mode, 777 u8 access_key) 778 { 779 psw_t *psw = &vcpu->arch.sie_block->gpsw; 780 unsigned int offset = offset_in_page(ga); 781 unsigned int fragment_len; 782 int lap_enabled, rc = 0; 783 enum prot_type prot; 784 unsigned long gpa; 785 786 lap_enabled = low_address_protection_enabled(vcpu, asce); 787 while (min(PAGE_SIZE - offset, len) > 0) { 788 fragment_len = min(PAGE_SIZE - offset, len); 789 ga = kvm_s390_logical_to_effective(vcpu, ga); 790 if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) 791 return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, 792 PROT_TYPE_LA); 793 if (psw_bits(*psw).dat) { 794 rc = guest_translate_gva(vcpu, ga, &gpa, asce, mode, &prot); 795 if (rc < 0) 796 return rc; 797 } else { 798 gpa = kvm_s390_real_to_abs(vcpu, ga); 799 if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { 800 rc = PGM_ADDRESSING; 801 prot = PROT_TYPE_DUMMY; 802 } 803 } 804 if (rc) 805 return trans_exc(vcpu, rc, ga, ar, mode, prot); 806 rc = vcpu_check_access_key_gpa(vcpu, access_key, mode, asce, gpa, ga, fragment_len); 807 if (rc) 808 return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); 809 if (gpas) 810 *gpas++ = gpa; 811 offset = 0; 812 ga += fragment_len; 813 len -= fragment_len; 814 } 815 return 0; 816 } 817 818 static int access_guest_page_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, 819 void *data, unsigned int len) 820 { 821 const unsigned int offset = offset_in_page(gpa); 822 const gfn_t gfn = gpa_to_gfn(gpa); 823 int rc; 824 825 if (!gfn_to_memslot(kvm, gfn)) 826 return PGM_ADDRESSING; 827 if (mode == GACC_STORE) 828 rc = kvm_write_guest_page(kvm, gfn, data, offset, len); 829 else 830 rc = kvm_read_guest_page(kvm, gfn, data, offset, len); 831 return rc; 832 } 833 834 static int mvcos_key(void *to, const void *from, unsigned long size, u8 dst_key, u8 src_key) 835 { 836 union oac spec = { 837 .oac1.key = dst_key, 838 .oac1.k = !!dst_key, 839 .oac2.key = src_key, 840 .oac2.k = !!src_key, 841 }; 842 int exception = PGM_PROTECTION; 843 844 asm_inline volatile( 845 " lr %%r0,%[spec]\n" 846 "0: mvcos %[to],%[from],%[size]\n" 847 "1: lhi %[exc],0\n" 848 "2:\n" 849 EX_TABLE(0b, 2b) 850 EX_TABLE(1b, 2b) 851 : [size] "+d" (size), [to] "=Q" (*(char *)to), [exc] "+d" (exception) 852 : [spec] "d" (spec.val), [from] "Q" (*(const char *)from) 853 : "memory", "cc", "0"); 854 return exception; 855 } 856 857 struct acc_page_key_context { 858 void *data; 859 int exception; 860 unsigned short offset; 861 unsigned short len; 862 bool store; 863 u8 access_key; 864 }; 865 866 static void _access_guest_page_with_key_gpa(struct guest_fault *f) 867 { 868 struct acc_page_key_context *context = f->priv; 869 void *ptr; 870 int r; 871 872 ptr = __va(PFN_PHYS(f->pfn) | context->offset); 873 874 if (context->store) 875 r = mvcos_key(ptr, context->data, context->len, context->access_key, 0); 876 else 877 r = mvcos_key(context->data, ptr, context->len, 0, context->access_key); 878 879 context->exception = r; 880 } 881 882 static int access_guest_page_with_key_gpa(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, 883 void *data, unsigned int len, u8 acc) 884 { 885 struct acc_page_key_context context = { 886 .offset = offset_in_page(gpa), 887 .len = len, 888 .data = data, 889 .access_key = acc, 890 .store = mode == GACC_STORE, 891 }; 892 struct guest_fault fault = { 893 .gfn = gpa_to_gfn(gpa), 894 .priv = &context, 895 .write_attempt = mode == GACC_STORE, 896 .callback = _access_guest_page_with_key_gpa, 897 }; 898 int rc; 899 900 if (KVM_BUG_ON((len + context.offset) > PAGE_SIZE, kvm)) 901 return -EINVAL; 902 903 rc = kvm_s390_faultin_gfn(NULL, kvm, &fault); 904 if (rc) 905 return rc; 906 return context.exception; 907 } 908 909 int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, 910 unsigned long len, enum gacc_mode mode, u8 access_key) 911 { 912 int offset = offset_in_page(gpa); 913 int fragment_len; 914 int rc; 915 916 while (min(PAGE_SIZE - offset, len) > 0) { 917 fragment_len = min(PAGE_SIZE - offset, len); 918 rc = access_guest_page_with_key_gpa(kvm, mode, gpa, data, fragment_len, access_key); 919 if (rc) 920 return rc; 921 offset = 0; 922 len -= fragment_len; 923 data += fragment_len; 924 gpa += fragment_len; 925 } 926 return 0; 927 } 928 929 int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, 930 void *data, unsigned long len, enum gacc_mode mode, 931 u8 access_key) 932 { 933 psw_t *psw = &vcpu->arch.sie_block->gpsw; 934 unsigned long nr_pages, idx; 935 unsigned long gpa_array[2]; 936 unsigned int fragment_len; 937 unsigned long *gpas; 938 enum prot_type prot; 939 int need_ipte_lock; 940 union asce asce; 941 bool try_storage_prot_override; 942 bool try_fetch_prot_override; 943 int rc; 944 945 if (!len) 946 return 0; 947 ga = kvm_s390_logical_to_effective(vcpu, ga); 948 rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); 949 if (rc) 950 return rc; 951 nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; 952 gpas = gpa_array; 953 if (nr_pages > ARRAY_SIZE(gpa_array)) 954 gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); 955 if (!gpas) 956 return -ENOMEM; 957 try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); 958 try_storage_prot_override = storage_prot_override_applicable(vcpu); 959 need_ipte_lock = psw_bits(*psw).dat && !asce.r; 960 if (need_ipte_lock) 961 ipte_lock(vcpu->kvm); 962 /* 963 * Since we do the access further down ultimately via a move instruction 964 * that does key checking and returns an error in case of a protection 965 * violation, we don't need to do the check during address translation. 966 * Skip it by passing access key 0, which matches any storage key, 967 * obviating the need for any further checks. As a result the check is 968 * handled entirely in hardware on access, we only need to take care to 969 * forego key protection checking if fetch protection override applies or 970 * retry with the special key 9 in case of storage protection override. 971 */ 972 rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); 973 if (rc) 974 goto out_unlock; 975 for (idx = 0; idx < nr_pages; idx++) { 976 fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); 977 if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { 978 rc = access_guest_page_gpa(vcpu->kvm, mode, gpas[idx], data, fragment_len); 979 } else { 980 rc = access_guest_page_with_key_gpa(vcpu->kvm, mode, gpas[idx], 981 data, fragment_len, access_key); 982 } 983 if (rc == PGM_PROTECTION && try_storage_prot_override) 984 rc = access_guest_page_with_key_gpa(vcpu->kvm, mode, gpas[idx], 985 data, fragment_len, PAGE_SPO_ACC); 986 if (rc) 987 break; 988 len -= fragment_len; 989 data += fragment_len; 990 ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); 991 } 992 if (rc > 0) { 993 bool terminate = (mode == GACC_STORE) && (idx > 0); 994 995 if (rc == PGM_PROTECTION) 996 prot = PROT_TYPE_KEYC; 997 else 998 prot = PROT_TYPE_DUMMY; 999 rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); 1000 } 1001 out_unlock: 1002 if (need_ipte_lock) 1003 ipte_unlock(vcpu->kvm); 1004 if (nr_pages > ARRAY_SIZE(gpa_array)) 1005 vfree(gpas); 1006 return rc; 1007 } 1008 1009 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, 1010 void *data, unsigned long len, enum gacc_mode mode) 1011 { 1012 unsigned int fragment_len; 1013 unsigned long gpa; 1014 int rc = 0; 1015 1016 while (len && !rc) { 1017 gpa = kvm_s390_real_to_abs(vcpu, gra); 1018 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); 1019 rc = access_guest_page_gpa(vcpu->kvm, mode, gpa, data, fragment_len); 1020 len -= fragment_len; 1021 gra += fragment_len; 1022 data += fragment_len; 1023 } 1024 if (rc > 0) 1025 vcpu->arch.pgm.code = rc; 1026 return rc; 1027 } 1028 1029 /** 1030 * __cmpxchg_with_key() - Perform cmpxchg, honoring storage keys. 1031 * @ptr: Address of value to compare to *@old and exchange with 1032 * @new. Must be aligned to @size. 1033 * @old: Old value. Compared to the content pointed to by @ptr in order to 1034 * determine if the exchange occurs. The old value read from *@ptr is 1035 * written here. 1036 * @new: New value to place at *@ptr. 1037 * @size: Size of the operation in bytes, may only be a power of two up to 16. 1038 * @access_key: Access key to use for checking storage key protection. 1039 * 1040 * Perform a cmpxchg on guest memory, honoring storage key protection. 1041 * @access_key alone determines how key checking is performed, neither 1042 * storage-protection-override nor fetch-protection-override apply. 1043 * In case of an exception *@uval is set to zero. 1044 * 1045 * Return: 1046 * * %0: cmpxchg executed successfully 1047 * * %1: cmpxchg executed unsuccessfully 1048 * * %PGM_PROTECTION: an exception happened when trying to access *@ptr 1049 * * %-EAGAIN: maxed out number of retries (byte and short only) 1050 * * %-EINVAL: invalid value for @size 1051 */ 1052 static int __cmpxchg_with_key(union kvm_s390_quad *ptr, union kvm_s390_quad *old, 1053 union kvm_s390_quad new, int size, u8 access_key) 1054 { 1055 union kvm_s390_quad tmp = { .sixteen = 0 }; 1056 int rc; 1057 1058 /* 1059 * The cmpxchg_key macro depends on the type of "old", so we need 1060 * a case for each valid length and get some code duplication as long 1061 * as we don't introduce a new macro. 1062 */ 1063 switch (size) { 1064 case 1: 1065 rc = __cmpxchg_key1(&ptr->one, &tmp.one, old->one, new.one, access_key); 1066 break; 1067 case 2: 1068 rc = __cmpxchg_key2(&ptr->two, &tmp.two, old->two, new.two, access_key); 1069 break; 1070 case 4: 1071 rc = __cmpxchg_key4(&ptr->four, &tmp.four, old->four, new.four, access_key); 1072 break; 1073 case 8: 1074 rc = __cmpxchg_key8(&ptr->eight, &tmp.eight, old->eight, new.eight, access_key); 1075 break; 1076 case 16: 1077 rc = __cmpxchg_key16(&ptr->sixteen, &tmp.sixteen, old->sixteen, new.sixteen, 1078 access_key); 1079 break; 1080 default: 1081 return -EINVAL; 1082 } 1083 if (!rc && memcmp(&tmp, old, size)) 1084 rc = 1; 1085 *old = tmp; 1086 /* 1087 * Assume that the fault is caused by protection, either key protection 1088 * or user page write protection. 1089 */ 1090 if (rc == -EFAULT) 1091 rc = PGM_PROTECTION; 1092 return rc; 1093 } 1094 1095 struct cmpxchg_key_context { 1096 union kvm_s390_quad new; 1097 union kvm_s390_quad *old; 1098 int exception; 1099 unsigned short offset; 1100 u8 access_key; 1101 u8 len; 1102 }; 1103 1104 static void _cmpxchg_guest_abs_with_key(struct guest_fault *f) 1105 { 1106 struct cmpxchg_key_context *context = f->priv; 1107 1108 context->exception = __cmpxchg_with_key(__va(PFN_PHYS(f->pfn) | context->offset), 1109 context->old, context->new, context->len, 1110 context->access_key); 1111 } 1112 1113 /** 1114 * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. 1115 * @kvm: Virtual machine instance. 1116 * @gpa: Absolute guest address of the location to be changed. 1117 * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a 1118 * non power of two will result in failure. 1119 * @old: Pointer to old value. If the location at @gpa contains this value, 1120 * the exchange will succeed. After calling cmpxchg_guest_abs_with_key() 1121 * *@old contains the value at @gpa before the attempt to 1122 * exchange the value. 1123 * @new: The value to place at @gpa. 1124 * @acc: The access key to use for the guest access. 1125 * @success: output value indicating if an exchange occurred. 1126 * 1127 * Atomically exchange the value at @gpa by @new, if it contains *@old. 1128 * Honors storage keys. 1129 * 1130 * Return: * 0: successful exchange 1131 * * >0: a program interruption code indicating the reason cmpxchg could 1132 * not be attempted 1133 * * -EINVAL: address misaligned or len not power of two 1134 * * -EAGAIN: transient failure (len 1 or 2) 1135 * * -EOPNOTSUPP: read-only memslot (should never occur) 1136 */ 1137 int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, union kvm_s390_quad *old, 1138 union kvm_s390_quad new, u8 acc, bool *success) 1139 { 1140 struct cmpxchg_key_context context = { 1141 .old = old, 1142 .new = new, 1143 .offset = offset_in_page(gpa), 1144 .len = len, 1145 .access_key = acc, 1146 }; 1147 struct guest_fault fault = { 1148 .gfn = gpa_to_gfn(gpa), 1149 .priv = &context, 1150 .write_attempt = true, 1151 .callback = _cmpxchg_guest_abs_with_key, 1152 }; 1153 int rc; 1154 1155 lockdep_assert_held(&kvm->srcu); 1156 1157 if (len > 16 || !IS_ALIGNED(gpa, len)) 1158 return -EINVAL; 1159 1160 rc = kvm_s390_faultin_gfn(NULL, kvm, &fault); 1161 if (rc) 1162 return rc; 1163 *success = !context.exception; 1164 if (context.exception == 1) 1165 return 0; 1166 return context.exception; 1167 } 1168 1169 /** 1170 * guest_translate_address_with_key - translate guest logical into guest absolute address 1171 * @vcpu: virtual cpu 1172 * @gva: Guest virtual address 1173 * @ar: Access register 1174 * @gpa: Guest physical address 1175 * @mode: Translation access mode 1176 * @access_key: access key to mach the storage key with 1177 * 1178 * Parameter semantics are the same as the ones from guest_translate. 1179 * The memory contents at the guest address are not changed. 1180 * 1181 * Note: The IPTE lock is not taken during this function, so the caller 1182 * has to take care of this. 1183 */ 1184 int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 1185 unsigned long *gpa, enum gacc_mode mode, 1186 u8 access_key) 1187 { 1188 union asce asce; 1189 int rc; 1190 1191 gva = kvm_s390_logical_to_effective(vcpu, gva); 1192 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 1193 if (rc) 1194 return rc; 1195 return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, 1196 access_key); 1197 } 1198 1199 /** 1200 * check_gva_range - test a range of guest virtual addresses for accessibility 1201 * @vcpu: virtual cpu 1202 * @gva: Guest virtual address 1203 * @ar: Access register 1204 * @length: Length of test range 1205 * @mode: Translation access mode 1206 * @access_key: access key to mach the storage keys with 1207 */ 1208 int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, 1209 unsigned long length, enum gacc_mode mode, u8 access_key) 1210 { 1211 union asce asce; 1212 int rc = 0; 1213 1214 rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); 1215 if (rc) 1216 return rc; 1217 ipte_lock(vcpu->kvm); 1218 rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, 1219 access_key); 1220 ipte_unlock(vcpu->kvm); 1221 1222 return rc; 1223 } 1224 1225 /** 1226 * check_gpa_range - test a range of guest physical addresses for accessibility 1227 * @kvm: virtual machine instance 1228 * @gpa: guest physical address 1229 * @length: length of test range 1230 * @mode: access mode to test, relevant for storage keys 1231 * @access_key: access key to mach the storage keys with 1232 */ 1233 int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, 1234 enum gacc_mode mode, u8 access_key) 1235 { 1236 unsigned int fragment_len; 1237 int rc = 0; 1238 1239 while (length && !rc) { 1240 fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); 1241 rc = vm_check_access_key_gpa(kvm, access_key, mode, gpa); 1242 length -= fragment_len; 1243 gpa += fragment_len; 1244 } 1245 return rc; 1246 } 1247 1248 /** 1249 * kvm_s390_check_low_addr_prot_real - check for low-address protection 1250 * @vcpu: virtual cpu 1251 * @gra: Guest real address 1252 * 1253 * Checks whether an address is subject to low-address protection and set 1254 * up vcpu->arch.pgm accordingly if necessary. 1255 * 1256 * Return: 0 if no protection exception, or PGM_PROTECTION if protected. 1257 */ 1258 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) 1259 { 1260 union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; 1261 1262 if (!ctlreg0.lap || !is_low_address(gra)) 1263 return 0; 1264 return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); 1265 } 1266 1267 /** 1268 * walk_guest_tables() - Walk the guest page table and pin the dat tables. 1269 * @sg: Pointer to the shadow guest address space structure. 1270 * @saddr: Faulting address in the shadow gmap. 1271 * @w: Will be filled with information on the pinned pages. 1272 * @wr: Wndicates a write access if true. 1273 * 1274 * Return: 1275 * * %0 in case of success, 1276 * * a PIC code > 0 in case the address translation fails 1277 * * an error code < 0 if other errors happen in the host 1278 */ 1279 static int walk_guest_tables(struct gmap *sg, unsigned long saddr, struct pgtwalk *w, bool wr) 1280 { 1281 struct gmap *parent = sg->parent; 1282 struct guest_fault *entries; 1283 union dat_table_entry table; 1284 union vaddress vaddr; 1285 unsigned long ptr; 1286 struct kvm *kvm; 1287 union asce asce; 1288 int rc; 1289 1290 if (!parent) 1291 return -EAGAIN; 1292 kvm = parent->kvm; 1293 WARN_ON(!kvm); 1294 asce = sg->guest_asce; 1295 entries = get_entries(w); 1296 1297 w->level = LEVEL_MEM; 1298 w->last_addr = saddr; 1299 if (asce.r) 1300 return kvm_s390_get_guest_page(kvm, entries + LEVEL_MEM, gpa_to_gfn(saddr), false); 1301 1302 vaddr.addr = saddr; 1303 ptr = asce.rsto * PAGE_SIZE; 1304 1305 if (!asce_contains_gfn(asce, gpa_to_gfn(saddr))) 1306 return PGM_ASCE_TYPE; 1307 switch (asce.dt) { 1308 case ASCE_TYPE_REGION1: 1309 if (vaddr.rfx01 > asce.tl) 1310 return PGM_REGION_FIRST_TRANS; 1311 break; 1312 case ASCE_TYPE_REGION2: 1313 if (vaddr.rsx01 > asce.tl) 1314 return PGM_REGION_SECOND_TRANS; 1315 break; 1316 case ASCE_TYPE_REGION3: 1317 if (vaddr.rtx01 > asce.tl) 1318 return PGM_REGION_THIRD_TRANS; 1319 break; 1320 case ASCE_TYPE_SEGMENT: 1321 if (vaddr.sx01 > asce.tl) 1322 return PGM_SEGMENT_TRANSLATION; 1323 break; 1324 } 1325 1326 w->level = asce.dt; 1327 switch (asce.dt) { 1328 case ASCE_TYPE_REGION1: 1329 w->last_addr = ptr + vaddr.rfx * 8; 1330 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level, 1331 w->last_addr, &table.val); 1332 if (rc) 1333 return rc; 1334 if (table.pgd.i) 1335 return PGM_REGION_FIRST_TRANS; 1336 if (table.pgd.tt != TABLE_TYPE_REGION1) 1337 return PGM_TRANSLATION_SPEC; 1338 if (vaddr.rsx01 < table.pgd.tf || vaddr.rsx01 > table.pgd.tl) 1339 return PGM_REGION_SECOND_TRANS; 1340 if (sg->edat_level >= 1) 1341 w->p |= table.pgd.p; 1342 ptr = table.pgd.rto * PAGE_SIZE; 1343 w->level--; 1344 fallthrough; 1345 case ASCE_TYPE_REGION2: 1346 w->last_addr = ptr + vaddr.rsx * 8; 1347 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level, 1348 w->last_addr, &table.val); 1349 if (rc) 1350 return rc; 1351 if (table.p4d.i) 1352 return PGM_REGION_SECOND_TRANS; 1353 if (table.p4d.tt != TABLE_TYPE_REGION2) 1354 return PGM_TRANSLATION_SPEC; 1355 if (vaddr.rtx01 < table.p4d.tf || vaddr.rtx01 > table.p4d.tl) 1356 return PGM_REGION_THIRD_TRANS; 1357 if (sg->edat_level >= 1) 1358 w->p |= table.p4d.p; 1359 ptr = table.p4d.rto * PAGE_SIZE; 1360 w->level--; 1361 fallthrough; 1362 case ASCE_TYPE_REGION3: 1363 w->last_addr = ptr + vaddr.rtx * 8; 1364 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level, 1365 w->last_addr, &table.val); 1366 if (rc) 1367 return rc; 1368 if (table.pud.i) 1369 return PGM_REGION_THIRD_TRANS; 1370 if (table.pud.tt != TABLE_TYPE_REGION3) 1371 return PGM_TRANSLATION_SPEC; 1372 if (table.pud.cr && asce.p && sg->edat_level >= 2) 1373 return PGM_TRANSLATION_SPEC; 1374 if (sg->edat_level >= 1) 1375 w->p |= table.pud.p; 1376 if (table.pud.fc && sg->edat_level >= 2) { 1377 table.val = u64_replace_bits(table.val, saddr, ~_REGION3_MASK); 1378 goto edat_applies; 1379 } 1380 if (vaddr.sx01 < table.pud.fc0.tf || vaddr.sx01 > table.pud.fc0.tl) 1381 return PGM_SEGMENT_TRANSLATION; 1382 ptr = table.pud.fc0.sto * PAGE_SIZE; 1383 w->level--; 1384 fallthrough; 1385 case ASCE_TYPE_SEGMENT: 1386 w->last_addr = ptr + vaddr.sx * 8; 1387 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level, 1388 w->last_addr, &table.val); 1389 if (rc) 1390 return rc; 1391 if (table.pmd.i) 1392 return PGM_SEGMENT_TRANSLATION; 1393 if (table.pmd.tt != TABLE_TYPE_SEGMENT) 1394 return PGM_TRANSLATION_SPEC; 1395 if (table.pmd.cs && asce.p) 1396 return PGM_TRANSLATION_SPEC; 1397 w->p |= table.pmd.p; 1398 if (table.pmd.fc && sg->edat_level >= 1) { 1399 table.val = u64_replace_bits(table.val, saddr, ~_SEGMENT_MASK); 1400 goto edat_applies; 1401 } 1402 ptr = table.pmd.fc0.pto * (PAGE_SIZE / 2); 1403 w->level--; 1404 } 1405 w->last_addr = ptr + vaddr.px * 8; 1406 rc = kvm_s390_get_guest_page_and_read_gpa(kvm, entries + w->level, 1407 w->last_addr, &table.val); 1408 if (rc) 1409 return rc; 1410 if (table.pte.i) 1411 return PGM_PAGE_TRANSLATION; 1412 if (table.pte.z) 1413 return PGM_TRANSLATION_SPEC; 1414 w->p |= table.pte.p; 1415 edat_applies: 1416 if (wr && w->p) 1417 return PGM_PROTECTION; 1418 1419 return kvm_s390_get_guest_page(kvm, entries + LEVEL_MEM, table.pte.pfra, wr); 1420 } 1421 1422 static int _do_shadow_pte(struct gmap *sg, gpa_t raddr, union pte *ptep_h, union pte *ptep, 1423 struct guest_fault *f, bool p) 1424 { 1425 union pgste pgste; 1426 union pte newpte; 1427 int rc; 1428 1429 lockdep_assert_held(&sg->kvm->mmu_lock); 1430 lockdep_assert_held(&sg->parent->children_lock); 1431 1432 scoped_guard(spinlock, &sg->host_to_rmap_lock) 1433 rc = gmap_insert_rmap(sg, f->gfn, gpa_to_gfn(raddr), TABLE_TYPE_PAGE_TABLE); 1434 if (rc) 1435 return rc; 1436 1437 pgste = pgste_get_lock(ptep_h); 1438 newpte = _pte(f->pfn, f->writable, !p, 0); 1439 newpte.s.d |= ptep->s.d; 1440 newpte.s.sd |= ptep->s.sd; 1441 newpte.h.p &= ptep->h.p; 1442 pgste = _gmap_ptep_xchg(sg->parent, ptep_h, newpte, pgste, f->gfn, false); 1443 pgste.vsie_notif = 1; 1444 pgste_set_unlock(ptep_h, pgste); 1445 1446 newpte = _pte(f->pfn, 0, !p, 0); 1447 pgste = pgste_get_lock(ptep); 1448 pgste = __dat_ptep_xchg(ptep, pgste, newpte, gpa_to_gfn(raddr), sg->asce, uses_skeys(sg)); 1449 pgste_set_unlock(ptep, pgste); 1450 1451 return 0; 1452 } 1453 1454 static int _do_shadow_crste(struct gmap *sg, gpa_t raddr, union crste *host, union crste *table, 1455 struct guest_fault *f, bool p) 1456 { 1457 union crste newcrste; 1458 gfn_t gfn; 1459 int rc; 1460 1461 lockdep_assert_held(&sg->kvm->mmu_lock); 1462 lockdep_assert_held(&sg->parent->children_lock); 1463 1464 gfn = f->gfn & gpa_to_gfn(is_pmd(*table) ? _SEGMENT_MASK : _REGION3_MASK); 1465 scoped_guard(spinlock, &sg->host_to_rmap_lock) 1466 rc = gmap_insert_rmap(sg, gfn, gpa_to_gfn(raddr), host->h.tt); 1467 if (rc) 1468 return rc; 1469 1470 newcrste = _crste_fc1(f->pfn, host->h.tt, f->writable, !p); 1471 newcrste.s.fc1.d |= host->s.fc1.d; 1472 newcrste.s.fc1.sd |= host->s.fc1.sd; 1473 newcrste.h.p &= host->h.p; 1474 newcrste.s.fc1.vsie_notif = 1; 1475 newcrste.s.fc1.prefix_notif = host->s.fc1.prefix_notif; 1476 _gmap_crstep_xchg(sg->parent, host, newcrste, f->gfn, false); 1477 1478 newcrste = _crste_fc1(f->pfn, host->h.tt, 0, !p); 1479 dat_crstep_xchg(table, newcrste, gpa_to_gfn(raddr), sg->asce); 1480 return 0; 1481 } 1482 1483 static int _gaccess_do_shadow(struct kvm_s390_mmu_cache *mc, struct gmap *sg, 1484 unsigned long saddr, struct pgtwalk *w) 1485 { 1486 struct guest_fault *entries; 1487 int flags, i, hl, gl, l, rc; 1488 union crste *table, *host; 1489 union pte *ptep, *ptep_h; 1490 1491 lockdep_assert_held(&sg->kvm->mmu_lock); 1492 lockdep_assert_held(&sg->parent->children_lock); 1493 1494 entries = get_entries(w); 1495 ptep_h = NULL; 1496 ptep = NULL; 1497 1498 rc = dat_entry_walk(NULL, gpa_to_gfn(saddr), sg->asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, 1499 &table, &ptep); 1500 if (rc) 1501 return rc; 1502 1503 /* A race occourred. The shadow mapping is already valid, nothing to do */ 1504 if ((ptep && !ptep->h.i) || (!ptep && crste_leaf(*table))) 1505 return 0; 1506 1507 gl = get_level(table, ptep); 1508 1509 /* 1510 * Skip levels that are already protected. For each level, protect 1511 * only the page containing the entry, not the whole table. 1512 */ 1513 for (i = gl ; i >= w->level; i--) { 1514 rc = gmap_protect_rmap(mc, sg, entries[i - 1].gfn, gpa_to_gfn(saddr), 1515 entries[i - 1].pfn, i, entries[i - 1].writable); 1516 if (rc) 1517 return rc; 1518 } 1519 1520 rc = dat_entry_walk(NULL, entries[LEVEL_MEM].gfn, sg->parent->asce, DAT_WALK_LEAF, 1521 TABLE_TYPE_PAGE_TABLE, &host, &ptep_h); 1522 if (rc) 1523 return rc; 1524 1525 hl = get_level(host, ptep_h); 1526 /* Get the smallest granularity */ 1527 l = min3(gl, hl, w->level); 1528 1529 flags = DAT_WALK_SPLIT_ALLOC | (uses_skeys(sg->parent) ? DAT_WALK_USES_SKEYS : 0); 1530 /* If necessary, create the shadow mapping */ 1531 if (l < gl) { 1532 rc = dat_entry_walk(mc, gpa_to_gfn(saddr), sg->asce, flags, l, &table, &ptep); 1533 if (rc) 1534 return rc; 1535 } 1536 if (l < hl) { 1537 rc = dat_entry_walk(mc, entries[LEVEL_MEM].gfn, sg->parent->asce, 1538 flags, l, &host, &ptep_h); 1539 if (rc) 1540 return rc; 1541 } 1542 1543 if (KVM_BUG_ON(l > TABLE_TYPE_REGION3, sg->kvm)) 1544 return -EFAULT; 1545 if (l == TABLE_TYPE_PAGE_TABLE) 1546 return _do_shadow_pte(sg, saddr, ptep_h, ptep, entries + LEVEL_MEM, w->p); 1547 return _do_shadow_crste(sg, saddr, host, table, entries + LEVEL_MEM, w->p); 1548 } 1549 1550 static inline int _gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr, 1551 unsigned long seq, struct pgtwalk *walk) 1552 { 1553 struct gmap *parent; 1554 int rc; 1555 1556 if (kvm_s390_array_needs_retry_unsafe(vcpu->kvm, seq, walk->raw_entries)) 1557 return -EAGAIN; 1558 again: 1559 rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc); 1560 if (rc) 1561 return rc; 1562 scoped_guard(read_lock, &vcpu->kvm->mmu_lock) { 1563 if (kvm_s390_array_needs_retry_safe(vcpu->kvm, seq, walk->raw_entries)) 1564 return -EAGAIN; 1565 parent = READ_ONCE(sg->parent); 1566 if (!parent) 1567 return -EAGAIN; 1568 scoped_guard(spinlock, &parent->children_lock) { 1569 if (READ_ONCE(sg->parent) != parent) 1570 return -EAGAIN; 1571 rc = _gaccess_do_shadow(vcpu->arch.mc, sg, saddr, walk); 1572 } 1573 if (rc == -ENOMEM) 1574 goto again; 1575 if (!rc) 1576 kvm_s390_release_faultin_array(vcpu->kvm, walk->raw_entries, false); 1577 } 1578 return rc; 1579 } 1580 1581 /** 1582 * __gaccess_shadow_fault() - Handle fault on a shadow page table. 1583 * @vcpu: Virtual cpu that triggered the action. 1584 * @sg: The shadow guest address space structure. 1585 * @saddr: Faulting address in the shadow gmap. 1586 * @datptr: Will contain the address of the faulting DAT table entry, or of 1587 * the valid leaf, plus some flags. 1588 * @wr: Whether this is a write access. 1589 * 1590 * Return: 1591 * * %0 if the shadow fault was successfully resolved 1592 * * > 0 (pgm exception code) on exceptions while faulting 1593 * * %-EAGAIN if the caller can retry immediately 1594 * * %-EFAULT when accessing invalid guest addresses 1595 * * %-ENOMEM if out of memory 1596 */ 1597 static int __gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr, 1598 union mvpg_pei *datptr, bool wr) 1599 { 1600 struct pgtwalk walk = { .p = false, }; 1601 unsigned long seq; 1602 int rc; 1603 1604 seq = vcpu->kvm->mmu_invalidate_seq; 1605 /* Pairs with the smp_wmb() in kvm_mmu_invalidate_end(). */ 1606 smp_rmb(); 1607 1608 rc = walk_guest_tables(sg, saddr, &walk, wr); 1609 if (datptr) { 1610 datptr->val = walk.last_addr; 1611 datptr->dat_prot = wr && walk.p; 1612 datptr->not_pte = walk.level > TABLE_TYPE_PAGE_TABLE; 1613 datptr->real = sg->guest_asce.r; 1614 } 1615 if (!rc) 1616 rc = _gaccess_shadow_fault(vcpu, sg, saddr, seq, &walk); 1617 if (rc) 1618 kvm_s390_release_faultin_array(vcpu->kvm, walk.raw_entries, true); 1619 return rc; 1620 } 1621 1622 int gaccess_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, gpa_t saddr, 1623 union mvpg_pei *datptr, bool wr) 1624 { 1625 int rc; 1626 1627 if (KVM_BUG_ON(!test_bit(GMAP_FLAG_SHADOW, &sg->flags), vcpu->kvm)) 1628 return -EFAULT; 1629 1630 rc = kvm_s390_mmu_cache_topup(vcpu->arch.mc); 1631 if (rc) 1632 return rc; 1633 1634 ipte_lock(vcpu->kvm); 1635 rc = __gaccess_shadow_fault(vcpu, sg, saddr, datptr, wr || sg->guest_asce.r); 1636 ipte_unlock(vcpu->kvm); 1637 1638 return rc; 1639 } 1640