1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * KVM guest address space mapping code 4 * 5 * Copyright IBM Corp. 2007, 2020, 2024 6 * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> 7 * Martin Schwidefsky <schwidefsky@de.ibm.com> 8 * David Hildenbrand <david@redhat.com> 9 * Janosch Frank <frankja@linux.ibm.com> 10 */ 11 12 #include <linux/kernel.h> 13 #include <linux/pagewalk.h> 14 #include <linux/swap.h> 15 #include <linux/smp.h> 16 #include <linux/spinlock.h> 17 #include <linux/slab.h> 18 #include <linux/swapops.h> 19 #include <linux/ksm.h> 20 #include <linux/mm.h> 21 #include <linux/mman.h> 22 #include <linux/pgtable.h> 23 #include <linux/kvm_types.h> 24 #include <linux/kvm_host.h> 25 #include <linux/pgalloc.h> 26 27 #include <asm/page-states.h> 28 #include <asm/tlb.h> 29 #include "dat.h" 30 31 int kvm_s390_mmu_cache_topup(struct kvm_s390_mmu_cache *mc) 32 { 33 void *o; 34 35 for ( ; mc->n_crsts < KVM_S390_MMU_CACHE_N_CRSTS; mc->n_crsts++) { 36 o = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER); 37 if (!o) 38 return -ENOMEM; 39 mc->crsts[mc->n_crsts] = o; 40 } 41 for ( ; mc->n_pts < KVM_S390_MMU_CACHE_N_PTS; mc->n_pts++) { 42 o = (void *)__get_free_page(GFP_KERNEL_ACCOUNT); 43 if (!o) 44 return -ENOMEM; 45 mc->pts[mc->n_pts] = o; 46 } 47 for ( ; mc->n_rmaps < KVM_S390_MMU_CACHE_N_RMAPS; mc->n_rmaps++) { 48 o = kzalloc(sizeof(*mc->rmaps[0]), GFP_KERNEL_ACCOUNT); 49 if (!o) 50 return -ENOMEM; 51 mc->rmaps[mc->n_rmaps] = o; 52 } 53 return 0; 54 } 55 56 static inline struct page_table *dat_alloc_pt_noinit(struct kvm_s390_mmu_cache *mc) 57 { 58 struct page_table *res; 59 60 res = kvm_s390_mmu_cache_alloc_pt(mc); 61 if (res) 62 __arch_set_page_dat(res, 1); 63 return res; 64 } 65 66 static inline struct crst_table *dat_alloc_crst_noinit(struct kvm_s390_mmu_cache *mc) 67 { 68 struct crst_table *res; 69 70 res = kvm_s390_mmu_cache_alloc_crst(mc); 71 if (res) 72 __arch_set_page_dat(res, 1UL << CRST_ALLOC_ORDER); 73 return res; 74 } 75 76 struct crst_table *dat_alloc_crst_sleepable(unsigned long init) 77 { 78 struct page *page; 79 void *virt; 80 81 page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_COMP, CRST_ALLOC_ORDER); 82 if (!page) 83 return NULL; 84 virt = page_to_virt(page); 85 __arch_set_page_dat(virt, 1UL << CRST_ALLOC_ORDER); 86 crst_table_init(virt, init); 87 return virt; 88 } 89 90 void dat_free_level(struct crst_table *table, bool owns_ptes) 91 { 92 unsigned int i; 93 94 for (i = 0; i < _CRST_ENTRIES; i++) { 95 if (table->crstes[i].h.fc || table->crstes[i].h.i) 96 continue; 97 if (!is_pmd(table->crstes[i])) 98 dat_free_level(dereference_crste(table->crstes[i]), owns_ptes); 99 else if (owns_ptes) 100 dat_free_pt(dereference_pmd(table->crstes[i].pmd)); 101 } 102 dat_free_crst(table); 103 } 104 105 int dat_set_asce_limit(struct kvm_s390_mmu_cache *mc, union asce *asce, int newtype) 106 { 107 struct crst_table *table; 108 union crste crste; 109 110 while (asce->dt > newtype) { 111 table = dereference_asce(*asce); 112 crste = table->crstes[0]; 113 if (crste.h.fc) 114 return 0; 115 if (!crste.h.i) { 116 asce->rsto = crste.h.fc0.to; 117 dat_free_crst(table); 118 } else { 119 crste.h.tt--; 120 crst_table_init((void *)table, crste.val); 121 } 122 asce->dt--; 123 } 124 while (asce->dt < newtype) { 125 crste = _crste_fc0(asce->rsto, asce->dt + 1); 126 table = dat_alloc_crst_noinit(mc); 127 if (!table) 128 return -ENOMEM; 129 crst_table_init((void *)table, _CRSTE_HOLE(crste.h.tt).val); 130 table->crstes[0] = crste; 131 asce->rsto = __pa(table) >> PAGE_SHIFT; 132 asce->dt++; 133 } 134 return 0; 135 } 136 137 /** 138 * dat_crstep_xchg() - Exchange a gmap CRSTE with another. 139 * @crstep: Pointer to the CRST entry 140 * @new: Replacement entry. 141 * @gfn: The affected guest address. 142 * @asce: The ASCE of the address space. 143 * 144 * Context: This function is assumed to be called with kvm->mmu_lock held. 145 */ 146 void dat_crstep_xchg(union crste *crstep, union crste new, gfn_t gfn, union asce asce) 147 { 148 if (crstep->h.i) { 149 WRITE_ONCE(*crstep, new); 150 return; 151 } else if (cpu_has_edat2()) { 152 crdte_crste(crstep, *crstep, new, gfn, asce); 153 return; 154 } 155 156 if (machine_has_tlb_guest()) 157 idte_crste(crstep, gfn, IDTE_GUEST_ASCE, asce, IDTE_GLOBAL); 158 else 159 idte_crste(crstep, gfn, 0, NULL_ASCE, IDTE_GLOBAL); 160 WRITE_ONCE(*crstep, new); 161 } 162 163 /** 164 * dat_crstep_xchg_atomic() - Atomically exchange a gmap CRSTE with another. 165 * @crstep: Pointer to the CRST entry. 166 * @old: Expected old value. 167 * @new: Replacement entry. 168 * @gfn: The affected guest address. 169 * @asce: The asce of the address space. 170 * 171 * This function is needed to atomically exchange a CRSTE that potentially 172 * maps a prefix area, without having to invalidate it inbetween. 173 * 174 * Context: This function is assumed to be called with kvm->mmu_lock held. 175 * 176 * Return: %true if the exchange was successful. 177 */ 178 bool dat_crstep_xchg_atomic(union crste *crstep, union crste old, union crste new, gfn_t gfn, 179 union asce asce) 180 { 181 if (old.h.i) 182 return arch_try_cmpxchg((long *)crstep, &old.val, new.val); 183 if (cpu_has_edat2()) 184 return crdte_crste(crstep, old, new, gfn, asce); 185 return cspg_crste(crstep, old, new); 186 } 187 188 static void dat_set_storage_key_from_pgste(union pte pte, union pgste pgste) 189 { 190 union skey nkey = { .acc = pgste.acc, .fp = pgste.fp }; 191 192 page_set_storage_key(pte_origin(pte), nkey.skey, 0); 193 } 194 195 static void dat_move_storage_key(union pte old, union pte new) 196 { 197 page_set_storage_key(pte_origin(new), page_get_storage_key(pte_origin(old)), 1); 198 } 199 200 static union pgste dat_save_storage_key_into_pgste(union pte pte, union pgste pgste) 201 { 202 union skey skey; 203 204 skey.skey = page_get_storage_key(pte_origin(pte)); 205 206 pgste.acc = skey.acc; 207 pgste.fp = skey.fp; 208 pgste.gr |= skey.r; 209 pgste.gc |= skey.c; 210 211 return pgste; 212 } 213 214 union pgste __dat_ptep_xchg(union pte *ptep, union pgste pgste, union pte new, gfn_t gfn, 215 union asce asce, bool uses_skeys) 216 { 217 union pte old = READ_ONCE(*ptep); 218 219 /* Updating only the software bits while holding the pgste lock. */ 220 if (!((ptep->val ^ new.val) & ~_PAGE_SW_BITS)) { 221 WRITE_ONCE(ptep->swbyte, new.swbyte); 222 return pgste; 223 } 224 225 if (!old.h.i) { 226 unsigned long opts = IPTE_GUEST_ASCE | (pgste.nodat ? IPTE_NODAT : 0); 227 228 if (machine_has_tlb_guest()) 229 __ptep_ipte(gfn_to_gpa(gfn), (void *)ptep, opts, asce.val, IPTE_GLOBAL); 230 else 231 __ptep_ipte(gfn_to_gpa(gfn), (void *)ptep, 0, 0, IPTE_GLOBAL); 232 } 233 234 if (uses_skeys) { 235 if (old.h.i && !new.h.i) 236 /* Invalid to valid: restore storage keys from PGSTE. */ 237 dat_set_storage_key_from_pgste(new, pgste); 238 else if (!old.h.i && new.h.i) 239 /* Valid to invalid: save storage keys to PGSTE. */ 240 pgste = dat_save_storage_key_into_pgste(old, pgste); 241 else if (!old.h.i && !new.h.i) 242 /* Valid to valid: move storage keys. */ 243 if (old.h.pfra != new.h.pfra) 244 dat_move_storage_key(old, new); 245 /* Invalid to invalid: nothing to do. */ 246 } 247 248 WRITE_ONCE(*ptep, new); 249 return pgste; 250 } 251 252 /* 253 * dat_split_ste() - Split a segment table entry into page table entries. 254 * 255 * Context: This function is assumed to be called with kvm->mmu_lock held. 256 * 257 * Return: 0 in case of success, -ENOMEM if running out of memory. 258 */ 259 static int dat_split_ste(struct kvm_s390_mmu_cache *mc, union pmd *pmdp, gfn_t gfn, 260 union asce asce, bool uses_skeys) 261 { 262 union pgste pgste_init; 263 struct page_table *pt; 264 union pmd new, old; 265 union pte init; 266 int i; 267 268 BUG_ON(!mc); 269 old = READ_ONCE(*pmdp); 270 271 /* Already split, nothing to do. */ 272 if (!old.h.i && !old.h.fc) 273 return 0; 274 275 pt = dat_alloc_pt_noinit(mc); 276 if (!pt) 277 return -ENOMEM; 278 new.val = virt_to_phys(pt); 279 280 while (old.h.i || old.h.fc) { 281 init.val = pmd_origin_large(old); 282 init.h.p = old.h.p; 283 init.h.i = old.h.i; 284 init.s.d = old.s.fc1.d; 285 init.s.w = old.s.fc1.w; 286 init.s.y = old.s.fc1.y; 287 init.s.sd = old.s.fc1.sd; 288 init.s.pr = old.s.fc1.pr; 289 pgste_init.val = 0; 290 if (old.h.fc) { 291 for (i = 0; i < _PAGE_ENTRIES; i++) 292 pt->ptes[i].val = init.val | i * PAGE_SIZE; 293 /* No need to take locks as the page table is not installed yet. */ 294 pgste_init.prefix_notif = old.s.fc1.prefix_notif; 295 pgste_init.pcl = uses_skeys && init.h.i; 296 dat_init_pgstes(pt, pgste_init.val); 297 } else { 298 dat_init_page_table(pt, init.val, 0); 299 } 300 301 if (dat_pmdp_xchg_atomic(pmdp, old, new, gfn, asce)) { 302 if (!pgste_init.pcl) 303 return 0; 304 for (i = 0; i < _PAGE_ENTRIES; i++) { 305 union pgste pgste = pt->pgstes[i]; 306 307 pgste = dat_save_storage_key_into_pgste(pt->ptes[i], pgste); 308 pgste_set_unlock(pt->ptes + i, pgste); 309 } 310 return 0; 311 } 312 old = READ_ONCE(*pmdp); 313 } 314 315 dat_free_pt(pt); 316 return 0; 317 } 318 319 /* 320 * dat_split_crste() - Split a crste into smaller crstes. 321 * 322 * Context: This function is assumed to be called with kvm->mmu_lock held. 323 * 324 * Return: %0 in case of success, %-ENOMEM if running out of memory. 325 */ 326 static int dat_split_crste(struct kvm_s390_mmu_cache *mc, union crste *crstep, 327 gfn_t gfn, union asce asce, bool uses_skeys) 328 { 329 struct crst_table *table; 330 union crste old, new, init; 331 int i; 332 333 old = READ_ONCE(*crstep); 334 if (is_pmd(old)) 335 return dat_split_ste(mc, &crstep->pmd, gfn, asce, uses_skeys); 336 337 BUG_ON(!mc); 338 339 /* Already split, nothing to do. */ 340 if (!old.h.i && !old.h.fc) 341 return 0; 342 343 table = dat_alloc_crst_noinit(mc); 344 if (!table) 345 return -ENOMEM; 346 347 new.val = virt_to_phys(table); 348 new.h.tt = old.h.tt; 349 new.h.fc0.tl = _REGION_ENTRY_LENGTH; 350 351 while (old.h.i || old.h.fc) { 352 init = old; 353 init.h.tt--; 354 if (old.h.fc) { 355 for (i = 0; i < _CRST_ENTRIES; i++) 356 table->crstes[i].val = init.val | i * HPAGE_SIZE; 357 } else { 358 crst_table_init((void *)table, init.val); 359 } 360 if (dat_crstep_xchg_atomic(crstep, old, new, gfn, asce)) 361 return 0; 362 old = READ_ONCE(*crstep); 363 } 364 365 dat_free_crst(table); 366 return 0; 367 } 368 369 /** 370 * dat_entry_walk() - Walk the gmap page tables. 371 * @mc: Cache to use to allocate dat tables, if needed; can be NULL if neither 372 * %DAT_WALK_SPLIT or %DAT_WALK_ALLOC is specified in @flags. 373 * @gfn: Guest frame. 374 * @asce: The ASCE of the address space. 375 * @flags: Flags from WALK_* macros. 376 * @walk_level: Level to walk to, from LEVEL_* macros. 377 * @last: Will be filled the last visited non-pte DAT entry. 378 * @ptepp: Will be filled the last visited pte entry, if any, otherwise NULL. 379 * 380 * Returns a table entry pointer for the given guest address and @walk_level. 381 * 382 * The @flags have the following meanings: 383 * * %DAT_WALK_IGN_HOLES: consider holes as normal table entries 384 * * %DAT_WALK_ALLOC: allocate new tables to reach the requested level, if needed 385 * * %DAT_WALK_SPLIT: split existing large pages to reach the requested level, if needed 386 * * %DAT_WALK_LEAF: return successfully whenever a large page is encountered 387 * * %DAT_WALK_ANY: return successfully even if the requested level could not be reached 388 * * %DAT_WALK_CONTINUE: walk to the requested level with the specified flags, and then try to 389 * continue walking to ptes with only DAT_WALK_ANY 390 * * %DAT_WALK_USES_SKEYS: storage keys are in use 391 * 392 * Context: called with kvm->mmu_lock held. 393 * 394 * Return: 395 * * %PGM_ADDRESSING if the requested address lies outside memory 396 * * a PIC number if the requested address lies in a memory hole of type _DAT_TOKEN_PIC 397 * * %-EFAULT if the requested address lies inside a memory hole of a different type 398 * * %-EINVAL if the given ASCE is not compatible with the requested level 399 * * %-EFBIG if the requested level could not be reached because a larger frame was found 400 * * %-ENOENT if the requested level could not be reached for other reasons 401 * * %-ENOMEM if running out of memory while allocating or splitting a table 402 */ 403 int dat_entry_walk(struct kvm_s390_mmu_cache *mc, gfn_t gfn, union asce asce, int flags, 404 int walk_level, union crste **last, union pte **ptepp) 405 { 406 union vaddress vaddr = { .addr = gfn_to_gpa(gfn) }; 407 bool continue_anyway = flags & DAT_WALK_CONTINUE; 408 bool uses_skeys = flags & DAT_WALK_USES_SKEYS; 409 bool ign_holes = flags & DAT_WALK_IGN_HOLES; 410 bool allocate = flags & DAT_WALK_ALLOC; 411 bool split = flags & DAT_WALK_SPLIT; 412 bool leaf = flags & DAT_WALK_LEAF; 413 bool any = flags & DAT_WALK_ANY; 414 struct page_table *pgtable; 415 struct crst_table *table; 416 union crste entry; 417 int rc; 418 419 *last = NULL; 420 *ptepp = NULL; 421 if (WARN_ON_ONCE(unlikely(!asce.val))) 422 return -EINVAL; 423 if (WARN_ON_ONCE(unlikely(walk_level > asce.dt))) 424 return -EINVAL; 425 if (!asce_contains_gfn(asce, gfn)) 426 return PGM_ADDRESSING; 427 428 table = dereference_asce(asce); 429 if (asce.dt >= ASCE_TYPE_REGION1) { 430 *last = table->crstes + vaddr.rfx; 431 entry = READ_ONCE(**last); 432 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION1)) 433 return -EINVAL; 434 if (crste_hole(entry) && !ign_holes) 435 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT; 436 if (walk_level == TABLE_TYPE_REGION1) 437 return 0; 438 if (entry.pgd.h.i) { 439 if (!allocate) 440 return any ? 0 : -ENOENT; 441 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys); 442 if (rc) 443 return rc; 444 entry = READ_ONCE(**last); 445 } 446 table = dereference_crste(entry.pgd); 447 } 448 449 if (asce.dt >= ASCE_TYPE_REGION2) { 450 *last = table->crstes + vaddr.rsx; 451 entry = READ_ONCE(**last); 452 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION2)) 453 return -EINVAL; 454 if (crste_hole(entry) && !ign_holes) 455 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT; 456 if (walk_level == TABLE_TYPE_REGION2) 457 return 0; 458 if (entry.p4d.h.i) { 459 if (!allocate) 460 return any ? 0 : -ENOENT; 461 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys); 462 if (rc) 463 return rc; 464 entry = READ_ONCE(**last); 465 } 466 table = dereference_crste(entry.p4d); 467 } 468 469 if (asce.dt >= ASCE_TYPE_REGION3) { 470 *last = table->crstes + vaddr.rtx; 471 entry = READ_ONCE(**last); 472 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_REGION3)) 473 return -EINVAL; 474 if (crste_hole(entry) && !ign_holes) 475 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT; 476 if (walk_level == TABLE_TYPE_REGION3 && 477 continue_anyway && !entry.pud.h.fc && !entry.h.i) { 478 walk_level = TABLE_TYPE_PAGE_TABLE; 479 allocate = false; 480 } 481 if (walk_level == TABLE_TYPE_REGION3 || ((leaf || any) && entry.pud.h.fc)) 482 return 0; 483 if (entry.pud.h.i && !entry.pud.h.fc) { 484 if (!allocate) 485 return any ? 0 : -ENOENT; 486 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys); 487 if (rc) 488 return rc; 489 entry = READ_ONCE(**last); 490 } 491 if (walk_level <= TABLE_TYPE_SEGMENT && entry.pud.h.fc) { 492 if (!split) 493 return -EFBIG; 494 rc = dat_split_crste(mc, *last, gfn, asce, uses_skeys); 495 if (rc) 496 return rc; 497 entry = READ_ONCE(**last); 498 } 499 table = dereference_crste(entry.pud); 500 } 501 502 *last = table->crstes + vaddr.sx; 503 entry = READ_ONCE(**last); 504 if (WARN_ON_ONCE(entry.h.tt != TABLE_TYPE_SEGMENT)) 505 return -EINVAL; 506 if (crste_hole(entry) && !ign_holes) 507 return entry.tok.type == _DAT_TOKEN_PIC ? entry.tok.par : -EFAULT; 508 if (continue_anyway && !entry.pmd.h.fc && !entry.h.i) { 509 walk_level = TABLE_TYPE_PAGE_TABLE; 510 allocate = false; 511 } 512 if (walk_level == TABLE_TYPE_SEGMENT || ((leaf || any) && entry.pmd.h.fc)) 513 return 0; 514 515 if (entry.pmd.h.i && !entry.pmd.h.fc) { 516 if (!allocate) 517 return any ? 0 : -ENOENT; 518 rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys); 519 if (rc) 520 return rc; 521 entry = READ_ONCE(**last); 522 } 523 if (walk_level <= TABLE_TYPE_PAGE_TABLE && entry.pmd.h.fc) { 524 if (!split) 525 return -EFBIG; 526 rc = dat_split_ste(mc, &(*last)->pmd, gfn, asce, uses_skeys); 527 if (rc) 528 return rc; 529 entry = READ_ONCE(**last); 530 } 531 pgtable = dereference_pmd(entry.pmd); 532 *ptepp = pgtable->ptes + vaddr.px; 533 if (pte_hole(**ptepp) && !ign_holes) 534 return (*ptepp)->tok.type == _DAT_TOKEN_PIC ? (*ptepp)->tok.par : -EFAULT; 535 return 0; 536 } 537 538 static long dat_pte_walk_range(gfn_t gfn, gfn_t end, struct page_table *table, struct dat_walk *w) 539 { 540 unsigned int idx = gfn & (_PAGE_ENTRIES - 1); 541 long rc = 0; 542 543 for ( ; gfn < end; idx++, gfn++) { 544 if (pte_hole(READ_ONCE(table->ptes[idx]))) { 545 if (!(w->flags & DAT_WALK_IGN_HOLES)) 546 return -EFAULT; 547 if (!(w->flags & DAT_WALK_ANY)) 548 continue; 549 } 550 551 rc = w->ops->pte_entry(table->ptes + idx, gfn, gfn + 1, w); 552 if (rc) 553 break; 554 } 555 return rc; 556 } 557 558 static long dat_crste_walk_range(gfn_t start, gfn_t end, struct crst_table *table, 559 struct dat_walk *walk) 560 { 561 unsigned long idx, cur_shift, cur_size; 562 dat_walk_op the_op; 563 union crste crste; 564 gfn_t cur, next; 565 long rc = 0; 566 567 cur_shift = 8 + table->crstes[0].h.tt * 11; 568 idx = (start >> cur_shift) & (_CRST_ENTRIES - 1); 569 cur_size = 1UL << cur_shift; 570 571 for (cur = ALIGN_DOWN(start, cur_size); cur < end; idx++, cur = next) { 572 next = cur + cur_size; 573 walk->last = table->crstes + idx; 574 crste = READ_ONCE(*walk->last); 575 576 if (crste_hole(crste)) { 577 if (!(walk->flags & DAT_WALK_IGN_HOLES)) 578 return -EFAULT; 579 if (!(walk->flags & DAT_WALK_ANY)) 580 continue; 581 } 582 583 the_op = walk->ops->crste_ops[crste.h.tt]; 584 if (the_op) { 585 rc = the_op(walk->last, cur, next, walk); 586 crste = READ_ONCE(*walk->last); 587 } 588 if (rc) 589 break; 590 if (!crste.h.i && !crste.h.fc) { 591 if (!is_pmd(crste)) 592 rc = dat_crste_walk_range(max(start, cur), min(end, next), 593 _dereference_crste(crste), walk); 594 else if (walk->ops->pte_entry) 595 rc = dat_pte_walk_range(max(start, cur), min(end, next), 596 dereference_pmd(crste.pmd), walk); 597 } 598 } 599 return rc; 600 } 601 602 /** 603 * _dat_walk_gfn_range() - Walk DAT tables. 604 * @start: The first guest page frame to walk. 605 * @end: The guest page frame immediately after the last one to walk. 606 * @asce: The ASCE of the guest mapping. 607 * @ops: The gmap_walk_ops that will be used to perform the walk. 608 * @flags: Flags from WALK_* (currently only WALK_IGN_HOLES is supported). 609 * @priv: Will be passed as-is to the callbacks. 610 * 611 * Any callback returning non-zero causes the walk to stop immediately. 612 * 613 * Return: %-EINVAL in case of error, %-EFAULT if @start is too high for the 614 * given ASCE unless the DAT_WALK_IGN_HOLES flag is specified, 615 * otherwise it returns whatever the callbacks return. 616 */ 617 long _dat_walk_gfn_range(gfn_t start, gfn_t end, union asce asce, 618 const struct dat_walk_ops *ops, int flags, void *priv) 619 { 620 struct crst_table *table = dereference_asce(asce); 621 struct dat_walk walk = { 622 .ops = ops, 623 .asce = asce, 624 .priv = priv, 625 .flags = flags, 626 .start = start, 627 .end = end, 628 }; 629 630 if (WARN_ON_ONCE(unlikely(!asce.val))) 631 return -EINVAL; 632 if (!asce_contains_gfn(asce, start)) 633 return (flags & DAT_WALK_IGN_HOLES) ? 0 : -EFAULT; 634 635 return dat_crste_walk_range(start, min(end, asce_end(asce)), table, &walk); 636 } 637 638 int dat_get_storage_key(union asce asce, gfn_t gfn, union skey *skey) 639 { 640 union crste *crstep; 641 union pgste pgste; 642 union pte *ptep; 643 int rc; 644 645 skey->skey = 0; 646 rc = dat_entry_walk(NULL, gfn, asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep); 647 if (rc) 648 return rc; 649 650 if (!ptep) { 651 union crste crste; 652 653 crste = READ_ONCE(*crstep); 654 if (!crste.h.fc || !crste.s.fc1.pr) 655 return 0; 656 skey->skey = page_get_storage_key(large_crste_to_phys(crste, gfn)); 657 return 0; 658 } 659 pgste = pgste_get_lock(ptep); 660 if (ptep->h.i) { 661 skey->acc = pgste.acc; 662 skey->fp = pgste.fp; 663 } else { 664 skey->skey = page_get_storage_key(pte_origin(*ptep)); 665 } 666 skey->r |= pgste.gr; 667 skey->c |= pgste.gc; 668 pgste_set_unlock(ptep, pgste); 669 return 0; 670 } 671 672 static void dat_update_ptep_sd(union pgste old, union pgste pgste, union pte *ptep) 673 { 674 if (pgste.acc != old.acc || pgste.fp != old.fp || pgste.gr != old.gr || pgste.gc != old.gc) 675 __atomic64_or(_PAGE_SD, &ptep->val); 676 } 677 678 int dat_set_storage_key(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn, 679 union skey skey, bool nq) 680 { 681 union pgste pgste, old; 682 union crste *crstep; 683 union pte *ptep; 684 int rc; 685 686 rc = dat_entry_walk(mc, gfn, asce, DAT_WALK_LEAF_ALLOC, TABLE_TYPE_PAGE_TABLE, 687 &crstep, &ptep); 688 if (rc) 689 return rc; 690 691 if (!ptep) { 692 page_set_storage_key(large_crste_to_phys(*crstep, gfn), skey.skey, !nq); 693 return 0; 694 } 695 696 old = pgste_get_lock(ptep); 697 pgste = old; 698 699 pgste.acc = skey.acc; 700 pgste.fp = skey.fp; 701 pgste.gc = skey.c; 702 pgste.gr = skey.r; 703 704 if (!ptep->h.i) { 705 union skey old_skey; 706 707 old_skey.skey = page_get_storage_key(pte_origin(*ptep)); 708 pgste.hc |= old_skey.c; 709 pgste.hr |= old_skey.r; 710 old_skey.c = old.gc; 711 old_skey.r = old.gr; 712 skey.r = 0; 713 skey.c = 0; 714 page_set_storage_key(pte_origin(*ptep), skey.skey, !nq); 715 } 716 717 dat_update_ptep_sd(old, pgste, ptep); 718 pgste_set_unlock(ptep, pgste); 719 return 0; 720 } 721 722 static bool page_cond_set_storage_key(phys_addr_t paddr, union skey skey, union skey *oldkey, 723 bool nq, bool mr, bool mc) 724 { 725 oldkey->skey = page_get_storage_key(paddr); 726 if (oldkey->acc == skey.acc && oldkey->fp == skey.fp && 727 (oldkey->r == skey.r || mr) && (oldkey->c == skey.c || mc)) 728 return false; 729 page_set_storage_key(paddr, skey.skey, !nq); 730 return true; 731 } 732 733 int dat_cond_set_storage_key(struct kvm_s390_mmu_cache *mmc, union asce asce, gfn_t gfn, 734 union skey skey, union skey *oldkey, bool nq, bool mr, bool mc) 735 { 736 union pgste pgste, old; 737 union crste *crstep; 738 union skey prev; 739 union pte *ptep; 740 int rc; 741 742 rc = dat_entry_walk(mmc, gfn, asce, DAT_WALK_LEAF_ALLOC, TABLE_TYPE_PAGE_TABLE, 743 &crstep, &ptep); 744 if (rc) 745 return rc; 746 747 if (!ptep) 748 return page_cond_set_storage_key(large_crste_to_phys(*crstep, gfn), skey, oldkey, 749 nq, mr, mc); 750 751 old = pgste_get_lock(ptep); 752 pgste = old; 753 754 rc = 1; 755 pgste.acc = skey.acc; 756 pgste.fp = skey.fp; 757 pgste.gc = skey.c; 758 pgste.gr = skey.r; 759 760 if (!ptep->h.i) { 761 rc = page_cond_set_storage_key(pte_origin(*ptep), skey, &prev, nq, mr, mc); 762 pgste.hc |= prev.c; 763 pgste.hr |= prev.r; 764 prev.c |= old.gc; 765 prev.r |= old.gr; 766 } else { 767 prev.acc = old.acc; 768 prev.fp = old.fp; 769 prev.c = old.gc; 770 prev.r = old.gr; 771 } 772 if (oldkey) 773 *oldkey = prev; 774 775 dat_update_ptep_sd(old, pgste, ptep); 776 pgste_set_unlock(ptep, pgste); 777 return rc; 778 } 779 780 int dat_reset_reference_bit(union asce asce, gfn_t gfn) 781 { 782 union pgste pgste, old; 783 union crste *crstep; 784 union pte *ptep; 785 int rc; 786 787 rc = dat_entry_walk(NULL, gfn, asce, DAT_WALK_ANY, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep); 788 if (rc) 789 return rc; 790 791 if (!ptep) { 792 union crste crste = READ_ONCE(*crstep); 793 794 if (!crste.h.fc || !crste.s.fc1.pr) 795 return 0; 796 return page_reset_referenced(large_crste_to_phys(*crstep, gfn)); 797 } 798 old = pgste_get_lock(ptep); 799 pgste = old; 800 801 if (!ptep->h.i) { 802 rc = page_reset_referenced(pte_origin(*ptep)); 803 pgste.hr = rc >> 1; 804 } 805 rc |= (pgste.gr << 1) | pgste.gc; 806 pgste.gr = 0; 807 808 dat_update_ptep_sd(old, pgste, ptep); 809 pgste_set_unlock(ptep, pgste); 810 return rc; 811 } 812 813 static long dat_reset_skeys_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 814 { 815 union pgste pgste; 816 817 pgste = pgste_get_lock(ptep); 818 pgste.acc = 0; 819 pgste.fp = 0; 820 pgste.gr = 0; 821 pgste.gc = 0; 822 if (ptep->s.pr) 823 page_set_storage_key(pte_origin(*ptep), PAGE_DEFAULT_KEY, 1); 824 pgste_set_unlock(ptep, pgste); 825 826 if (need_resched()) 827 return next; 828 return 0; 829 } 830 831 static long dat_reset_skeys_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 832 { 833 phys_addr_t addr, end, origin = crste_origin_large(*crstep); 834 835 if (!crstep->h.fc || !crstep->s.fc1.pr) 836 return 0; 837 838 addr = ((max(gfn, walk->start) - gfn) << PAGE_SHIFT) + origin; 839 end = ((min(next, walk->end) - gfn) << PAGE_SHIFT) + origin; 840 while (ALIGN(addr + 1, _SEGMENT_SIZE) <= end) 841 addr = sske_frame(addr, PAGE_DEFAULT_KEY); 842 for ( ; addr < end; addr += PAGE_SIZE) 843 page_set_storage_key(addr, PAGE_DEFAULT_KEY, 1); 844 845 if (need_resched()) 846 return next; 847 return 0; 848 } 849 850 long dat_reset_skeys(union asce asce, gfn_t start) 851 { 852 const struct dat_walk_ops ops = { 853 .pte_entry = dat_reset_skeys_pte, 854 .pmd_entry = dat_reset_skeys_crste, 855 .pud_entry = dat_reset_skeys_crste, 856 }; 857 858 return _dat_walk_gfn_range(start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, NULL); 859 } 860 861 struct slot_priv { 862 unsigned long token; 863 struct kvm_s390_mmu_cache *mc; 864 }; 865 866 static long _dat_slot_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 867 { 868 struct slot_priv *p = walk->priv; 869 union crste dummy = { .val = p->token }; 870 union pte new_pte, pte = READ_ONCE(*ptep); 871 872 new_pte = _PTE_TOK(dummy.tok.type, dummy.tok.par); 873 874 /* Table entry already in the desired state. */ 875 if (pte.val == new_pte.val) 876 return 0; 877 878 dat_ptep_xchg(ptep, new_pte, gfn, walk->asce, false); 879 return 0; 880 } 881 882 static long _dat_slot_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 883 { 884 union crste new_crste, crste = READ_ONCE(*crstep); 885 struct slot_priv *p = walk->priv; 886 887 new_crste.val = p->token; 888 new_crste.h.tt = crste.h.tt; 889 890 /* Table entry already in the desired state. */ 891 if (crste.val == new_crste.val) 892 return 0; 893 894 /* This table entry needs to be updated. */ 895 if (walk->start <= gfn && walk->end >= next) { 896 dat_crstep_xchg_atomic(crstep, crste, new_crste, gfn, walk->asce); 897 /* A lower level table was present, needs to be freed. */ 898 if (!crste.h.fc && !crste.h.i) { 899 if (is_pmd(crste)) 900 dat_free_pt(dereference_pmd(crste.pmd)); 901 else 902 dat_free_level(dereference_crste(crste), true); 903 } 904 return 0; 905 } 906 907 /* A lower level table is present, things will handled there. */ 908 if (!crste.h.fc && !crste.h.i) 909 return 0; 910 /* Split (install a lower level table), and handle things there. */ 911 return dat_split_crste(p->mc, crstep, gfn, walk->asce, false); 912 } 913 914 static const struct dat_walk_ops dat_slot_ops = { 915 .pte_entry = _dat_slot_pte, 916 .crste_ops = { _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, _dat_slot_crste, }, 917 }; 918 919 int dat_set_slot(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t start, gfn_t end, 920 u16 type, u16 param) 921 { 922 struct slot_priv priv = { 923 .token = _CRSTE_TOK(0, type, param).val, 924 .mc = mc, 925 }; 926 927 return _dat_walk_gfn_range(start, end, asce, &dat_slot_ops, 928 DAT_WALK_IGN_HOLES | DAT_WALK_ANY, &priv); 929 } 930 931 static void pgste_set_unlock_multiple(union pte *first, int n, union pgste *pgstes) 932 { 933 int i; 934 935 for (i = 0; i < n; i++) { 936 if (!pgstes[i].pcl) 937 break; 938 pgste_set_unlock(first + i, pgstes[i]); 939 } 940 } 941 942 static bool pgste_get_trylock_multiple(union pte *first, int n, union pgste *pgstes) 943 { 944 int i; 945 946 for (i = 0; i < n; i++) { 947 if (!pgste_get_trylock(first + i, pgstes + i)) 948 break; 949 } 950 if (i == n) 951 return true; 952 pgste_set_unlock_multiple(first, n, pgstes); 953 return false; 954 } 955 956 unsigned long dat_get_ptval(struct page_table *table, struct ptval_param param) 957 { 958 union pgste pgstes[4] = {}; 959 unsigned long res = 0; 960 int i, n; 961 962 n = param.len + 1; 963 964 while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes)) 965 cpu_relax(); 966 967 for (i = 0; i < n; i++) 968 res = res << 16 | pgstes[i].val16; 969 970 pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes); 971 return res; 972 } 973 974 void dat_set_ptval(struct page_table *table, struct ptval_param param, unsigned long val) 975 { 976 union pgste pgstes[4] = {}; 977 int i, n; 978 979 n = param.len + 1; 980 981 while (!pgste_get_trylock_multiple(table->ptes + param.offset, n, pgstes)) 982 cpu_relax(); 983 984 for (i = param.len; i >= 0; i--) { 985 pgstes[i].val16 = val; 986 val = val >> 16; 987 } 988 989 pgste_set_unlock_multiple(table->ptes + param.offset, n, pgstes); 990 } 991 992 static long _dat_test_young_pte(union pte *ptep, gfn_t start, gfn_t end, struct dat_walk *walk) 993 { 994 return ptep->s.y; 995 } 996 997 static long _dat_test_young_crste(union crste *crstep, gfn_t start, gfn_t end, 998 struct dat_walk *walk) 999 { 1000 return crstep->h.fc && crstep->s.fc1.y; 1001 } 1002 1003 static const struct dat_walk_ops test_age_ops = { 1004 .pte_entry = _dat_test_young_pte, 1005 .pmd_entry = _dat_test_young_crste, 1006 .pud_entry = _dat_test_young_crste, 1007 }; 1008 1009 /** 1010 * dat_test_age_gfn() - Test young. 1011 * @asce: The ASCE whose address range is to be tested. 1012 * @start: The first guest frame of the range to check. 1013 * @end: The guest frame after the last in the range. 1014 * 1015 * Context: called by KVM common code with the kvm mmu write lock held. 1016 * 1017 * Return: %true if any page in the given range is young, otherwise %false. 1018 */ 1019 bool dat_test_age_gfn(union asce asce, gfn_t start, gfn_t end) 1020 { 1021 return _dat_walk_gfn_range(start, end, asce, &test_age_ops, 0, NULL) > 0; 1022 } 1023 1024 int dat_link(struct kvm_s390_mmu_cache *mc, union asce asce, int level, 1025 bool uses_skeys, struct guest_fault *f) 1026 { 1027 union crste oldval, newval; 1028 union pte newpte, oldpte; 1029 union pgste pgste; 1030 int rc = 0; 1031 1032 rc = dat_entry_walk(mc, f->gfn, asce, DAT_WALK_ALLOC_CONTINUE, level, &f->crstep, &f->ptep); 1033 if (rc == -EINVAL || rc == -ENOMEM) 1034 return rc; 1035 if (rc) 1036 return -EAGAIN; 1037 1038 if (WARN_ON_ONCE(unlikely(get_level(f->crstep, f->ptep) > level))) 1039 return -EINVAL; 1040 1041 if (f->ptep) { 1042 pgste = pgste_get_lock(f->ptep); 1043 oldpte = *f->ptep; 1044 newpte = _pte(f->pfn, f->writable, f->write_attempt | oldpte.s.d, !f->page); 1045 newpte.s.sd = oldpte.s.sd; 1046 oldpte.s.sd = 0; 1047 if (oldpte.val == _PTE_EMPTY.val || oldpte.h.pfra == f->pfn) { 1048 pgste = __dat_ptep_xchg(f->ptep, pgste, newpte, f->gfn, asce, uses_skeys); 1049 if (f->callback) 1050 f->callback(f); 1051 } else { 1052 rc = -EAGAIN; 1053 } 1054 pgste_set_unlock(f->ptep, pgste); 1055 } else { 1056 oldval = READ_ONCE(*f->crstep); 1057 newval = _crste_fc1(f->pfn, oldval.h.tt, f->writable, 1058 f->write_attempt | oldval.s.fc1.d); 1059 newval.s.fc1.sd = oldval.s.fc1.sd; 1060 if (oldval.val != _CRSTE_EMPTY(oldval.h.tt).val && 1061 crste_origin_large(oldval) != crste_origin_large(newval)) 1062 return -EAGAIN; 1063 if (!dat_crstep_xchg_atomic(f->crstep, oldval, newval, f->gfn, asce)) 1064 return -EAGAIN; 1065 if (f->callback) 1066 f->callback(f); 1067 } 1068 1069 return rc; 1070 } 1071 1072 static long dat_set_pn_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1073 { 1074 union crste crste = READ_ONCE(*crstep); 1075 int *n = walk->priv; 1076 1077 if (!crste.h.fc || crste.h.i || crste.h.p) 1078 return 0; 1079 1080 *n = 2; 1081 if (crste.s.fc1.prefix_notif) 1082 return 0; 1083 crste.s.fc1.prefix_notif = 1; 1084 dat_crstep_xchg(crstep, crste, gfn, walk->asce); 1085 return 0; 1086 } 1087 1088 static long dat_set_pn_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1089 { 1090 int *n = walk->priv; 1091 union pgste pgste; 1092 1093 pgste = pgste_get_lock(ptep); 1094 if (!ptep->h.i && !ptep->h.p) { 1095 pgste.prefix_notif = 1; 1096 *n += 1; 1097 } 1098 pgste_set_unlock(ptep, pgste); 1099 return 0; 1100 } 1101 1102 int dat_set_prefix_notif_bit(union asce asce, gfn_t gfn) 1103 { 1104 static const struct dat_walk_ops ops = { 1105 .pte_entry = dat_set_pn_pte, 1106 .pmd_entry = dat_set_pn_crste, 1107 .pud_entry = dat_set_pn_crste, 1108 }; 1109 1110 int n = 0; 1111 1112 _dat_walk_gfn_range(gfn, gfn + 2, asce, &ops, DAT_WALK_IGN_HOLES, &n); 1113 if (n != 2) 1114 return -EAGAIN; 1115 return 0; 1116 } 1117 1118 /** 1119 * dat_perform_essa() - Perform ESSA actions on the PGSTE. 1120 * @asce: The asce to operate on. 1121 * @gfn: The guest page frame to operate on. 1122 * @orc: The specific action to perform, see the ESSA_SET_* macros. 1123 * @state: The storage attributes to be returned to the guest. 1124 * @dirty: Returns whether the function dirtied a previously clean entry. 1125 * 1126 * Context: Called with kvm->mmu_lock held. 1127 * 1128 * Return: 1129 * * %1 if the page state has been altered and the page is to be added to the CBRL 1130 * * %0 if the page state has been altered, but the page is not to be added to the CBRL 1131 * * %-1 if the page state has not been altered and the page is not to be added to the CBRL 1132 */ 1133 int dat_perform_essa(union asce asce, gfn_t gfn, int orc, union essa_state *state, bool *dirty) 1134 { 1135 union crste *crstep; 1136 union pgste pgste; 1137 union pte *ptep; 1138 int res = 0; 1139 1140 if (dat_entry_walk(NULL, gfn, asce, 0, TABLE_TYPE_PAGE_TABLE, &crstep, &ptep)) { 1141 *state = (union essa_state) { .exception = 1 }; 1142 return -1; 1143 } 1144 1145 pgste = pgste_get_lock(ptep); 1146 1147 *state = (union essa_state) { 1148 .content = (ptep->h.i << 1) + (ptep->h.i && pgste.zero), 1149 .nodat = pgste.nodat, 1150 .usage = pgste.usage, 1151 }; 1152 1153 switch (orc) { 1154 case ESSA_GET_STATE: 1155 res = -1; 1156 break; 1157 case ESSA_SET_STABLE: 1158 pgste.usage = PGSTE_GPS_USAGE_STABLE; 1159 pgste.nodat = 0; 1160 break; 1161 case ESSA_SET_UNUSED: 1162 pgste.usage = PGSTE_GPS_USAGE_UNUSED; 1163 if (ptep->h.i) 1164 res = 1; 1165 break; 1166 case ESSA_SET_VOLATILE: 1167 pgste.usage = PGSTE_GPS_USAGE_VOLATILE; 1168 if (ptep->h.i) 1169 res = 1; 1170 break; 1171 case ESSA_SET_POT_VOLATILE: 1172 if (!ptep->h.i) { 1173 pgste.usage = PGSTE_GPS_USAGE_POT_VOLATILE; 1174 } else if (pgste.zero) { 1175 pgste.usage = PGSTE_GPS_USAGE_VOLATILE; 1176 } else if (!pgste.gc) { 1177 pgste.usage = PGSTE_GPS_USAGE_VOLATILE; 1178 res = 1; 1179 } 1180 break; 1181 case ESSA_SET_STABLE_RESIDENT: 1182 pgste.usage = PGSTE_GPS_USAGE_STABLE; 1183 /* 1184 * Since the resident state can go away any time after this 1185 * call, we will not make this page resident. We can revisit 1186 * this decision if a guest will ever start using this. 1187 */ 1188 break; 1189 case ESSA_SET_STABLE_IF_RESIDENT: 1190 if (!ptep->h.i) 1191 pgste.usage = PGSTE_GPS_USAGE_STABLE; 1192 break; 1193 case ESSA_SET_STABLE_NODAT: 1194 pgste.usage = PGSTE_GPS_USAGE_STABLE; 1195 pgste.nodat = 1; 1196 break; 1197 default: 1198 WARN_ONCE(1, "Invalid ORC!"); 1199 res = -1; 1200 break; 1201 } 1202 /* If we are discarding a page, set it to logical zero. */ 1203 pgste.zero = res == 1; 1204 if (orc > 0) { 1205 *dirty = !pgste.cmma_d; 1206 pgste.cmma_d = 1; 1207 } 1208 1209 pgste_set_unlock(ptep, pgste); 1210 1211 return res; 1212 } 1213 1214 static long dat_reset_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1215 { 1216 union pgste pgste; 1217 1218 pgste = pgste_get_lock(ptep); 1219 pgste.usage = 0; 1220 pgste.nodat = 0; 1221 pgste.cmma_d = 0; 1222 pgste_set_unlock(ptep, pgste); 1223 if (need_resched()) 1224 return next; 1225 return 0; 1226 } 1227 1228 long dat_reset_cmma(union asce asce, gfn_t start) 1229 { 1230 const struct dat_walk_ops dat_reset_cmma_ops = { 1231 .pte_entry = dat_reset_cmma_pte, 1232 }; 1233 1234 return _dat_walk_gfn_range(start, asce_end(asce), asce, &dat_reset_cmma_ops, 1235 DAT_WALK_IGN_HOLES, NULL); 1236 } 1237 1238 struct dat_get_cmma_state { 1239 gfn_t start; 1240 gfn_t end; 1241 unsigned int count; 1242 u8 *values; 1243 atomic64_t *remaining; 1244 }; 1245 1246 static long __dat_peek_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1247 { 1248 struct dat_get_cmma_state *state = walk->priv; 1249 union pgste pgste; 1250 1251 pgste = pgste_get_lock(ptep); 1252 state->values[gfn - walk->start] = pgste.usage | (pgste.nodat << 6); 1253 pgste_set_unlock(ptep, pgste); 1254 state->end = next; 1255 1256 return 0; 1257 } 1258 1259 static long __dat_peek_cmma_crste(union crste *crstep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1260 { 1261 struct dat_get_cmma_state *state = walk->priv; 1262 1263 if (crstep->h.i) 1264 state->end = min(walk->end, next); 1265 return 0; 1266 } 1267 1268 int dat_peek_cmma(gfn_t start, union asce asce, unsigned int *count, u8 *values) 1269 { 1270 const struct dat_walk_ops ops = { 1271 .pte_entry = __dat_peek_cmma_pte, 1272 .pmd_entry = __dat_peek_cmma_crste, 1273 .pud_entry = __dat_peek_cmma_crste, 1274 .p4d_entry = __dat_peek_cmma_crste, 1275 .pgd_entry = __dat_peek_cmma_crste, 1276 }; 1277 struct dat_get_cmma_state state = { .values = values, }; 1278 int rc; 1279 1280 rc = _dat_walk_gfn_range(start, start + *count, asce, &ops, DAT_WALK_DEFAULT, &state); 1281 *count = state.end - start; 1282 /* Return success if at least one value was saved, otherwise an error. */ 1283 return (rc == -EFAULT && *count > 0) ? 0 : rc; 1284 } 1285 1286 static long __dat_get_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1287 { 1288 struct dat_get_cmma_state *state = walk->priv; 1289 union pgste pgste; 1290 1291 if (state->start != -1) { 1292 if ((gfn - state->end) > KVM_S390_MAX_BIT_DISTANCE) 1293 return 1; 1294 if (gfn - state->start >= state->count) 1295 return 1; 1296 } 1297 1298 if (!READ_ONCE(*pgste_of(ptep)).cmma_d) 1299 return 0; 1300 1301 pgste = pgste_get_lock(ptep); 1302 if (pgste.cmma_d) { 1303 if (state->start == -1) 1304 state->start = gfn; 1305 pgste.cmma_d = 0; 1306 atomic64_dec(state->remaining); 1307 state->values[gfn - state->start] = pgste.usage | pgste.nodat << 6; 1308 state->end = next; 1309 } 1310 pgste_set_unlock(ptep, pgste); 1311 return 0; 1312 } 1313 1314 int dat_get_cmma(union asce asce, gfn_t *start, unsigned int *count, u8 *values, atomic64_t *rem) 1315 { 1316 const struct dat_walk_ops ops = { .pte_entry = __dat_get_cmma_pte, }; 1317 struct dat_get_cmma_state state = { 1318 .remaining = rem, 1319 .values = values, 1320 .count = *count, 1321 .start = -1, 1322 }; 1323 1324 _dat_walk_gfn_range(*start, asce_end(asce), asce, &ops, DAT_WALK_IGN_HOLES, &state); 1325 1326 if (state.start == -1) { 1327 *count = 0; 1328 } else { 1329 *count = state.end - state.start; 1330 *start = state.start; 1331 } 1332 1333 return 0; 1334 } 1335 1336 struct dat_set_cmma_state { 1337 unsigned long mask; 1338 const u8 *bits; 1339 }; 1340 1341 static long __dat_set_cmma_pte(union pte *ptep, gfn_t gfn, gfn_t next, struct dat_walk *walk) 1342 { 1343 struct dat_set_cmma_state *state = walk->priv; 1344 union pgste pgste, tmp; 1345 1346 tmp.val = (state->bits[gfn - walk->start] << 24) & state->mask; 1347 1348 pgste = pgste_get_lock(ptep); 1349 pgste.usage = tmp.usage; 1350 pgste.nodat = tmp.nodat; 1351 pgste_set_unlock(ptep, pgste); 1352 1353 return 0; 1354 } 1355 1356 /** 1357 * dat_set_cmma_bits() - Set CMMA bits for a range of guest pages. 1358 * @mc: Cache used for allocations. 1359 * @asce: The ASCE of the guest. 1360 * @gfn: The guest frame of the fist page whose CMMA bits are to set. 1361 * @count: How many pages need to be processed. 1362 * @mask: Which PGSTE bits should be set. 1363 * @bits: Points to an array with the CMMA attributes. 1364 * 1365 * This function sets the CMMA attributes for the given pages. If the input 1366 * buffer has zero length, no action is taken, otherwise the attributes are 1367 * set and the mm->context.uses_cmm flag is set. 1368 * 1369 * Each byte in @bits contains new values for bits 32-39 of the PGSTE. 1370 * Currently, only the fields NT and US are applied. 1371 * 1372 * Return: %0 in case of success, a negative error value otherwise. 1373 */ 1374 int dat_set_cmma_bits(struct kvm_s390_mmu_cache *mc, union asce asce, gfn_t gfn, 1375 unsigned long count, unsigned long mask, const uint8_t *bits) 1376 { 1377 const struct dat_walk_ops ops = { .pte_entry = __dat_set_cmma_pte, }; 1378 struct dat_set_cmma_state state = { .mask = mask, .bits = bits, }; 1379 union crste *crstep; 1380 union pte *ptep; 1381 gfn_t cur; 1382 int rc; 1383 1384 for (cur = ALIGN_DOWN(gfn, _PAGE_ENTRIES); cur < gfn + count; cur += _PAGE_ENTRIES) { 1385 rc = dat_entry_walk(mc, cur, asce, DAT_WALK_ALLOC, TABLE_TYPE_PAGE_TABLE, 1386 &crstep, &ptep); 1387 if (rc) 1388 return rc; 1389 } 1390 return _dat_walk_gfn_range(gfn, gfn + count, asce, &ops, DAT_WALK_IGN_HOLES, &state); 1391 } 1392