1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/spinlock.h> 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/swapops.h> 17 #include <linux/sysctl.h> 18 #include <linux/ksm.h> 19 #include <linux/mman.h> 20 21 #include <asm/pgtable.h> 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 #include <asm/page-states.h> 27 28 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr, 29 pte_t *ptep, int nodat) 30 { 31 unsigned long opt, asce; 32 33 if (MACHINE_HAS_TLB_GUEST) { 34 opt = 0; 35 asce = READ_ONCE(mm->context.gmap_asce); 36 if (asce == 0UL || nodat) 37 opt |= IPTE_NODAT; 38 if (asce != -1UL) { 39 asce = asce ? : mm->context.asce; 40 opt |= IPTE_GUEST_ASCE; 41 } 42 __ptep_ipte(addr, ptep, opt, asce, IPTE_LOCAL); 43 } else { 44 __ptep_ipte(addr, ptep, 0, 0, IPTE_LOCAL); 45 } 46 } 47 48 static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr, 49 pte_t *ptep, int nodat) 50 { 51 unsigned long opt, asce; 52 53 if (MACHINE_HAS_TLB_GUEST) { 54 opt = 0; 55 asce = READ_ONCE(mm->context.gmap_asce); 56 if (asce == 0UL || nodat) 57 opt |= IPTE_NODAT; 58 if (asce != -1UL) { 59 asce = asce ? : mm->context.asce; 60 opt |= IPTE_GUEST_ASCE; 61 } 62 __ptep_ipte(addr, ptep, opt, asce, IPTE_GLOBAL); 63 } else { 64 __ptep_ipte(addr, ptep, 0, 0, IPTE_GLOBAL); 65 } 66 } 67 68 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 69 unsigned long addr, pte_t *ptep, 70 int nodat) 71 { 72 pte_t old; 73 74 old = *ptep; 75 if (unlikely(pte_val(old) & _PAGE_INVALID)) 76 return old; 77 atomic_inc(&mm->context.flush_count); 78 if (MACHINE_HAS_TLB_LC && 79 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 80 ptep_ipte_local(mm, addr, ptep, nodat); 81 else 82 ptep_ipte_global(mm, addr, ptep, nodat); 83 atomic_dec(&mm->context.flush_count); 84 return old; 85 } 86 87 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 88 unsigned long addr, pte_t *ptep, 89 int nodat) 90 { 91 pte_t old; 92 93 old = *ptep; 94 if (unlikely(pte_val(old) & _PAGE_INVALID)) 95 return old; 96 atomic_inc(&mm->context.flush_count); 97 if (cpumask_equal(&mm->context.cpu_attach_mask, 98 cpumask_of(smp_processor_id()))) { 99 pte_val(*ptep) |= _PAGE_INVALID; 100 mm->context.flush_mm = 1; 101 } else 102 ptep_ipte_global(mm, addr, ptep, nodat); 103 atomic_dec(&mm->context.flush_count); 104 return old; 105 } 106 107 static inline pgste_t pgste_get_lock(pte_t *ptep) 108 { 109 unsigned long new = 0; 110 #ifdef CONFIG_PGSTE 111 unsigned long old; 112 113 asm( 114 " lg %0,%2\n" 115 "0: lgr %1,%0\n" 116 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 117 " oihh %1,0x0080\n" /* set PCL bit in new */ 118 " csg %0,%1,%2\n" 119 " jl 0b\n" 120 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 121 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 122 #endif 123 return __pgste(new); 124 } 125 126 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 127 { 128 #ifdef CONFIG_PGSTE 129 asm( 130 " nihh %1,0xff7f\n" /* clear PCL bit */ 131 " stg %1,%0\n" 132 : "=Q" (ptep[PTRS_PER_PTE]) 133 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 134 : "cc", "memory"); 135 #endif 136 } 137 138 static inline pgste_t pgste_get(pte_t *ptep) 139 { 140 unsigned long pgste = 0; 141 #ifdef CONFIG_PGSTE 142 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 143 #endif 144 return __pgste(pgste); 145 } 146 147 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 148 { 149 #ifdef CONFIG_PGSTE 150 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 151 #endif 152 } 153 154 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 155 struct mm_struct *mm) 156 { 157 #ifdef CONFIG_PGSTE 158 unsigned long address, bits, skey; 159 160 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 161 return pgste; 162 address = pte_val(pte) & PAGE_MASK; 163 skey = (unsigned long) page_get_storage_key(address); 164 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 165 /* Transfer page changed & referenced bit to guest bits in pgste */ 166 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 167 /* Copy page access key and fetch protection bit to pgste */ 168 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 169 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 170 #endif 171 return pgste; 172 173 } 174 175 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 176 struct mm_struct *mm) 177 { 178 #ifdef CONFIG_PGSTE 179 unsigned long address; 180 unsigned long nkey; 181 182 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) 183 return; 184 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 185 address = pte_val(entry) & PAGE_MASK; 186 /* 187 * Set page access key and fetch protection bit from pgste. 188 * The guest C/R information is still in the PGSTE, set real 189 * key C/R to 0. 190 */ 191 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 192 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 193 page_set_storage_key(address, nkey, 0); 194 #endif 195 } 196 197 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 198 { 199 #ifdef CONFIG_PGSTE 200 if ((pte_val(entry) & _PAGE_PRESENT) && 201 (pte_val(entry) & _PAGE_WRITE) && 202 !(pte_val(entry) & _PAGE_INVALID)) { 203 if (!MACHINE_HAS_ESOP) { 204 /* 205 * Without enhanced suppression-on-protection force 206 * the dirty bit on for all writable ptes. 207 */ 208 pte_val(entry) |= _PAGE_DIRTY; 209 pte_val(entry) &= ~_PAGE_PROTECT; 210 } 211 if (!(pte_val(entry) & _PAGE_PROTECT)) 212 /* This pte allows write access, set user-dirty */ 213 pgste_val(pgste) |= PGSTE_UC_BIT; 214 } 215 #endif 216 *ptep = entry; 217 return pgste; 218 } 219 220 static inline pgste_t pgste_pte_notify(struct mm_struct *mm, 221 unsigned long addr, 222 pte_t *ptep, pgste_t pgste) 223 { 224 #ifdef CONFIG_PGSTE 225 unsigned long bits; 226 227 bits = pgste_val(pgste) & (PGSTE_IN_BIT | PGSTE_VSIE_BIT); 228 if (bits) { 229 pgste_val(pgste) ^= bits; 230 ptep_notify(mm, addr, ptep, bits); 231 } 232 #endif 233 return pgste; 234 } 235 236 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 237 unsigned long addr, pte_t *ptep) 238 { 239 pgste_t pgste = __pgste(0); 240 241 if (mm_has_pgste(mm)) { 242 pgste = pgste_get_lock(ptep); 243 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 244 } 245 return pgste; 246 } 247 248 static inline pte_t ptep_xchg_commit(struct mm_struct *mm, 249 unsigned long addr, pte_t *ptep, 250 pgste_t pgste, pte_t old, pte_t new) 251 { 252 if (mm_has_pgste(mm)) { 253 if (pte_val(old) & _PAGE_INVALID) 254 pgste_set_key(ptep, pgste, new, mm); 255 if (pte_val(new) & _PAGE_INVALID) { 256 pgste = pgste_update_all(old, pgste, mm); 257 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 258 _PGSTE_GPS_USAGE_UNUSED) 259 pte_val(old) |= _PAGE_UNUSED; 260 } 261 pgste = pgste_set_pte(ptep, pgste, new); 262 pgste_set_unlock(ptep, pgste); 263 } else { 264 *ptep = new; 265 } 266 return old; 267 } 268 269 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 270 pte_t *ptep, pte_t new) 271 { 272 pgste_t pgste; 273 pte_t old; 274 int nodat; 275 276 preempt_disable(); 277 pgste = ptep_xchg_start(mm, addr, ptep); 278 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 279 old = ptep_flush_direct(mm, addr, ptep, nodat); 280 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 281 preempt_enable(); 282 return old; 283 } 284 EXPORT_SYMBOL(ptep_xchg_direct); 285 286 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 287 pte_t *ptep, pte_t new) 288 { 289 pgste_t pgste; 290 pte_t old; 291 int nodat; 292 293 preempt_disable(); 294 pgste = ptep_xchg_start(mm, addr, ptep); 295 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 296 old = ptep_flush_lazy(mm, addr, ptep, nodat); 297 old = ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 298 preempt_enable(); 299 return old; 300 } 301 EXPORT_SYMBOL(ptep_xchg_lazy); 302 303 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 304 pte_t *ptep) 305 { 306 pgste_t pgste; 307 pte_t old; 308 int nodat; 309 310 preempt_disable(); 311 pgste = ptep_xchg_start(mm, addr, ptep); 312 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 313 old = ptep_flush_lazy(mm, addr, ptep, nodat); 314 if (mm_has_pgste(mm)) { 315 pgste = pgste_update_all(old, pgste, mm); 316 pgste_set(ptep, pgste); 317 } 318 return old; 319 } 320 EXPORT_SYMBOL(ptep_modify_prot_start); 321 322 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 323 pte_t *ptep, pte_t pte) 324 { 325 pgste_t pgste; 326 327 if (!MACHINE_HAS_NX) 328 pte_val(pte) &= ~_PAGE_NOEXEC; 329 if (mm_has_pgste(mm)) { 330 pgste = pgste_get(ptep); 331 pgste_set_key(ptep, pgste, pte, mm); 332 pgste = pgste_set_pte(ptep, pgste, pte); 333 pgste_set_unlock(ptep, pgste); 334 } else { 335 *ptep = pte; 336 } 337 preempt_enable(); 338 } 339 EXPORT_SYMBOL(ptep_modify_prot_commit); 340 341 static inline void pmdp_idte_local(struct mm_struct *mm, 342 unsigned long addr, pmd_t *pmdp) 343 { 344 if (MACHINE_HAS_TLB_GUEST) 345 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 346 mm->context.asce, IDTE_LOCAL); 347 else 348 __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); 349 } 350 351 static inline void pmdp_idte_global(struct mm_struct *mm, 352 unsigned long addr, pmd_t *pmdp) 353 { 354 if (MACHINE_HAS_TLB_GUEST) 355 __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, 356 mm->context.asce, IDTE_GLOBAL); 357 else if (MACHINE_HAS_IDTE) 358 __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); 359 else 360 __pmdp_csp(pmdp); 361 } 362 363 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 364 unsigned long addr, pmd_t *pmdp) 365 { 366 pmd_t old; 367 368 old = *pmdp; 369 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 370 return old; 371 atomic_inc(&mm->context.flush_count); 372 if (MACHINE_HAS_TLB_LC && 373 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 374 pmdp_idte_local(mm, addr, pmdp); 375 else 376 pmdp_idte_global(mm, addr, pmdp); 377 atomic_dec(&mm->context.flush_count); 378 return old; 379 } 380 381 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 382 unsigned long addr, pmd_t *pmdp) 383 { 384 pmd_t old; 385 386 old = *pmdp; 387 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 388 return old; 389 atomic_inc(&mm->context.flush_count); 390 if (cpumask_equal(&mm->context.cpu_attach_mask, 391 cpumask_of(smp_processor_id()))) { 392 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 393 mm->context.flush_mm = 1; 394 } else { 395 pmdp_idte_global(mm, addr, pmdp); 396 } 397 atomic_dec(&mm->context.flush_count); 398 return old; 399 } 400 401 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 402 pmd_t *pmdp, pmd_t new) 403 { 404 pmd_t old; 405 406 preempt_disable(); 407 old = pmdp_flush_direct(mm, addr, pmdp); 408 *pmdp = new; 409 preempt_enable(); 410 return old; 411 } 412 EXPORT_SYMBOL(pmdp_xchg_direct); 413 414 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 415 pmd_t *pmdp, pmd_t new) 416 { 417 pmd_t old; 418 419 preempt_disable(); 420 old = pmdp_flush_lazy(mm, addr, pmdp); 421 *pmdp = new; 422 preempt_enable(); 423 return old; 424 } 425 EXPORT_SYMBOL(pmdp_xchg_lazy); 426 427 static inline void pudp_idte_local(struct mm_struct *mm, 428 unsigned long addr, pud_t *pudp) 429 { 430 if (MACHINE_HAS_TLB_GUEST) 431 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 432 mm->context.asce, IDTE_LOCAL); 433 else 434 __pudp_idte(addr, pudp, 0, 0, IDTE_LOCAL); 435 } 436 437 static inline void pudp_idte_global(struct mm_struct *mm, 438 unsigned long addr, pud_t *pudp) 439 { 440 if (MACHINE_HAS_TLB_GUEST) 441 __pudp_idte(addr, pudp, IDTE_NODAT | IDTE_GUEST_ASCE, 442 mm->context.asce, IDTE_GLOBAL); 443 else if (MACHINE_HAS_IDTE) 444 __pudp_idte(addr, pudp, 0, 0, IDTE_GLOBAL); 445 else 446 /* 447 * Invalid bit position is the same for pmd and pud, so we can 448 * re-use _pmd_csp() here 449 */ 450 __pmdp_csp((pmd_t *) pudp); 451 } 452 453 static inline pud_t pudp_flush_direct(struct mm_struct *mm, 454 unsigned long addr, pud_t *pudp) 455 { 456 pud_t old; 457 458 old = *pudp; 459 if (pud_val(old) & _REGION_ENTRY_INVALID) 460 return old; 461 atomic_inc(&mm->context.flush_count); 462 if (MACHINE_HAS_TLB_LC && 463 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 464 pudp_idte_local(mm, addr, pudp); 465 else 466 pudp_idte_global(mm, addr, pudp); 467 atomic_dec(&mm->context.flush_count); 468 return old; 469 } 470 471 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr, 472 pud_t *pudp, pud_t new) 473 { 474 pud_t old; 475 476 preempt_disable(); 477 old = pudp_flush_direct(mm, addr, pudp); 478 *pudp = new; 479 preempt_enable(); 480 return old; 481 } 482 EXPORT_SYMBOL(pudp_xchg_direct); 483 484 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 485 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 486 pgtable_t pgtable) 487 { 488 struct list_head *lh = (struct list_head *) pgtable; 489 490 assert_spin_locked(pmd_lockptr(mm, pmdp)); 491 492 /* FIFO */ 493 if (!pmd_huge_pte(mm, pmdp)) 494 INIT_LIST_HEAD(lh); 495 else 496 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 497 pmd_huge_pte(mm, pmdp) = pgtable; 498 } 499 500 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 501 { 502 struct list_head *lh; 503 pgtable_t pgtable; 504 pte_t *ptep; 505 506 assert_spin_locked(pmd_lockptr(mm, pmdp)); 507 508 /* FIFO */ 509 pgtable = pmd_huge_pte(mm, pmdp); 510 lh = (struct list_head *) pgtable; 511 if (list_empty(lh)) 512 pmd_huge_pte(mm, pmdp) = NULL; 513 else { 514 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 515 list_del(lh); 516 } 517 ptep = (pte_t *) pgtable; 518 pte_val(*ptep) = _PAGE_INVALID; 519 ptep++; 520 pte_val(*ptep) = _PAGE_INVALID; 521 return pgtable; 522 } 523 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 524 525 #ifdef CONFIG_PGSTE 526 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 527 pte_t *ptep, pte_t entry) 528 { 529 pgste_t pgste; 530 531 /* the mm_has_pgste() check is done in set_pte_at() */ 532 preempt_disable(); 533 pgste = pgste_get_lock(ptep); 534 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 535 pgste_set_key(ptep, pgste, entry, mm); 536 pgste = pgste_set_pte(ptep, pgste, entry); 537 pgste_set_unlock(ptep, pgste); 538 preempt_enable(); 539 } 540 541 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 542 { 543 pgste_t pgste; 544 545 preempt_disable(); 546 pgste = pgste_get_lock(ptep); 547 pgste_val(pgste) |= PGSTE_IN_BIT; 548 pgste_set_unlock(ptep, pgste); 549 preempt_enable(); 550 } 551 552 /** 553 * ptep_force_prot - change access rights of a locked pte 554 * @mm: pointer to the process mm_struct 555 * @addr: virtual address in the guest address space 556 * @ptep: pointer to the page table entry 557 * @prot: indicates guest access rights: PROT_NONE, PROT_READ or PROT_WRITE 558 * @bit: pgste bit to set (e.g. for notification) 559 * 560 * Returns 0 if the access rights were changed and -EAGAIN if the current 561 * and requested access rights are incompatible. 562 */ 563 int ptep_force_prot(struct mm_struct *mm, unsigned long addr, 564 pte_t *ptep, int prot, unsigned long bit) 565 { 566 pte_t entry; 567 pgste_t pgste; 568 int pte_i, pte_p, nodat; 569 570 pgste = pgste_get_lock(ptep); 571 entry = *ptep; 572 /* Check pte entry after all locks have been acquired */ 573 pte_i = pte_val(entry) & _PAGE_INVALID; 574 pte_p = pte_val(entry) & _PAGE_PROTECT; 575 if ((pte_i && (prot != PROT_NONE)) || 576 (pte_p && (prot & PROT_WRITE))) { 577 pgste_set_unlock(ptep, pgste); 578 return -EAGAIN; 579 } 580 /* Change access rights and set pgste bit */ 581 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 582 if (prot == PROT_NONE && !pte_i) { 583 ptep_flush_direct(mm, addr, ptep, nodat); 584 pgste = pgste_update_all(entry, pgste, mm); 585 pte_val(entry) |= _PAGE_INVALID; 586 } 587 if (prot == PROT_READ && !pte_p) { 588 ptep_flush_direct(mm, addr, ptep, nodat); 589 pte_val(entry) &= ~_PAGE_INVALID; 590 pte_val(entry) |= _PAGE_PROTECT; 591 } 592 pgste_val(pgste) |= bit; 593 pgste = pgste_set_pte(ptep, pgste, entry); 594 pgste_set_unlock(ptep, pgste); 595 return 0; 596 } 597 598 int ptep_shadow_pte(struct mm_struct *mm, unsigned long saddr, 599 pte_t *sptep, pte_t *tptep, pte_t pte) 600 { 601 pgste_t spgste, tpgste; 602 pte_t spte, tpte; 603 int rc = -EAGAIN; 604 605 if (!(pte_val(*tptep) & _PAGE_INVALID)) 606 return 0; /* already shadowed */ 607 spgste = pgste_get_lock(sptep); 608 spte = *sptep; 609 if (!(pte_val(spte) & _PAGE_INVALID) && 610 !((pte_val(spte) & _PAGE_PROTECT) && 611 !(pte_val(pte) & _PAGE_PROTECT))) { 612 pgste_val(spgste) |= PGSTE_VSIE_BIT; 613 tpgste = pgste_get_lock(tptep); 614 pte_val(tpte) = (pte_val(spte) & PAGE_MASK) | 615 (pte_val(pte) & _PAGE_PROTECT); 616 /* don't touch the storage key - it belongs to parent pgste */ 617 tpgste = pgste_set_pte(tptep, tpgste, tpte); 618 pgste_set_unlock(tptep, tpgste); 619 rc = 1; 620 } 621 pgste_set_unlock(sptep, spgste); 622 return rc; 623 } 624 625 void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) 626 { 627 pgste_t pgste; 628 int nodat; 629 630 pgste = pgste_get_lock(ptep); 631 /* notifier is called by the caller */ 632 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 633 ptep_flush_direct(mm, saddr, ptep, nodat); 634 /* don't touch the storage key - it belongs to parent pgste */ 635 pgste = pgste_set_pte(ptep, pgste, __pte(_PAGE_INVALID)); 636 pgste_set_unlock(ptep, pgste); 637 } 638 639 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 640 { 641 if (!non_swap_entry(entry)) 642 dec_mm_counter(mm, MM_SWAPENTS); 643 else if (is_migration_entry(entry)) { 644 struct page *page = migration_entry_to_page(entry); 645 646 dec_mm_counter(mm, mm_counter(page)); 647 } 648 free_swap_and_cache(entry); 649 } 650 651 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 652 pte_t *ptep, int reset) 653 { 654 unsigned long pgstev; 655 pgste_t pgste; 656 pte_t pte; 657 658 /* Zap unused and logically-zero pages */ 659 preempt_disable(); 660 pgste = pgste_get_lock(ptep); 661 pgstev = pgste_val(pgste); 662 pte = *ptep; 663 if (!reset && pte_swap(pte) && 664 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 665 (pgstev & _PGSTE_GPS_ZERO))) { 666 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 667 pte_clear(mm, addr, ptep); 668 } 669 if (reset) 670 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 671 pgste_set_unlock(ptep, pgste); 672 preempt_enable(); 673 } 674 675 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 676 { 677 unsigned long ptev; 678 pgste_t pgste; 679 680 /* Clear storage key ACC and F, but set R/C */ 681 preempt_disable(); 682 pgste = pgste_get_lock(ptep); 683 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 684 pgste_val(pgste) |= PGSTE_GR_BIT | PGSTE_GC_BIT; 685 ptev = pte_val(*ptep); 686 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 687 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 688 pgste_set_unlock(ptep, pgste); 689 preempt_enable(); 690 } 691 692 /* 693 * Test and reset if a guest page is dirty 694 */ 695 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) 696 { 697 spinlock_t *ptl; 698 pgd_t *pgd; 699 p4d_t *p4d; 700 pud_t *pud; 701 pmd_t *pmd; 702 pgste_t pgste; 703 pte_t *ptep; 704 pte_t pte; 705 bool dirty; 706 int nodat; 707 708 pgd = pgd_offset(mm, addr); 709 p4d = p4d_alloc(mm, pgd, addr); 710 if (!p4d) 711 return false; 712 pud = pud_alloc(mm, p4d, addr); 713 if (!pud) 714 return false; 715 pmd = pmd_alloc(mm, pud, addr); 716 if (!pmd) 717 return false; 718 /* We can't run guests backed by huge pages, but userspace can 719 * still set them up and then try to migrate them without any 720 * migration support. 721 */ 722 if (pmd_large(*pmd)) 723 return true; 724 725 ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); 726 if (unlikely(!ptep)) 727 return false; 728 729 pgste = pgste_get_lock(ptep); 730 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 731 pgste_val(pgste) &= ~PGSTE_UC_BIT; 732 pte = *ptep; 733 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 734 pgste = pgste_pte_notify(mm, addr, ptep, pgste); 735 nodat = !!(pgste_val(pgste) & _PGSTE_GPS_NODAT); 736 ptep_ipte_global(mm, addr, ptep, nodat); 737 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 738 pte_val(pte) |= _PAGE_PROTECT; 739 else 740 pte_val(pte) |= _PAGE_INVALID; 741 *ptep = pte; 742 } 743 pgste_set_unlock(ptep, pgste); 744 745 spin_unlock(ptl); 746 return dirty; 747 } 748 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); 749 750 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 751 unsigned char key, bool nq) 752 { 753 unsigned long keyul; 754 spinlock_t *ptl; 755 pgste_t old, new; 756 pte_t *ptep; 757 758 ptep = get_locked_pte(mm, addr, &ptl); 759 if (unlikely(!ptep)) 760 return -EFAULT; 761 762 new = old = pgste_get_lock(ptep); 763 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 764 PGSTE_ACC_BITS | PGSTE_FP_BIT); 765 keyul = (unsigned long) key; 766 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 767 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 768 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 769 unsigned long address, bits, skey; 770 771 address = pte_val(*ptep) & PAGE_MASK; 772 skey = (unsigned long) page_get_storage_key(address); 773 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 774 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 775 /* Set storage key ACC and FP */ 776 page_set_storage_key(address, skey, !nq); 777 /* Merge host changed & referenced into pgste */ 778 pgste_val(new) |= bits << 52; 779 } 780 /* changing the guest storage key is considered a change of the page */ 781 if ((pgste_val(new) ^ pgste_val(old)) & 782 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 783 pgste_val(new) |= PGSTE_UC_BIT; 784 785 pgste_set_unlock(ptep, new); 786 pte_unmap_unlock(ptep, ptl); 787 return 0; 788 } 789 EXPORT_SYMBOL(set_guest_storage_key); 790 791 /** 792 * Conditionally set a guest storage key (handling csske). 793 * oldkey will be updated when either mr or mc is set and a pointer is given. 794 * 795 * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest 796 * storage key was updated and -EFAULT on access errors. 797 */ 798 int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 799 unsigned char key, unsigned char *oldkey, 800 bool nq, bool mr, bool mc) 801 { 802 unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; 803 int rc; 804 805 /* we can drop the pgste lock between getting and setting the key */ 806 if (mr | mc) { 807 rc = get_guest_storage_key(current->mm, addr, &tmp); 808 if (rc) 809 return rc; 810 if (oldkey) 811 *oldkey = tmp; 812 if (!mr) 813 mask |= _PAGE_REFERENCED; 814 if (!mc) 815 mask |= _PAGE_CHANGED; 816 if (!((tmp ^ key) & mask)) 817 return 0; 818 } 819 rc = set_guest_storage_key(current->mm, addr, key, nq); 820 return rc < 0 ? rc : 1; 821 } 822 EXPORT_SYMBOL(cond_set_guest_storage_key); 823 824 /** 825 * Reset a guest reference bit (rrbe), returning the reference and changed bit. 826 * 827 * Returns < 0 in case of error, otherwise the cc to be reported to the guest. 828 */ 829 int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) 830 { 831 spinlock_t *ptl; 832 pgste_t old, new; 833 pte_t *ptep; 834 int cc = 0; 835 836 ptep = get_locked_pte(mm, addr, &ptl); 837 if (unlikely(!ptep)) 838 return -EFAULT; 839 840 new = old = pgste_get_lock(ptep); 841 /* Reset guest reference bit only */ 842 pgste_val(new) &= ~PGSTE_GR_BIT; 843 844 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 845 cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); 846 /* Merge real referenced bit into host-set */ 847 pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; 848 } 849 /* Reflect guest's logical view, not physical */ 850 cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; 851 /* Changing the guest storage key is considered a change of the page */ 852 if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) 853 pgste_val(new) |= PGSTE_UC_BIT; 854 855 pgste_set_unlock(ptep, new); 856 pte_unmap_unlock(ptep, ptl); 857 return cc; 858 } 859 EXPORT_SYMBOL(reset_guest_reference_bit); 860 861 int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, 862 unsigned char *key) 863 { 864 spinlock_t *ptl; 865 pgste_t pgste; 866 pte_t *ptep; 867 868 ptep = get_locked_pte(mm, addr, &ptl); 869 if (unlikely(!ptep)) 870 return -EFAULT; 871 872 pgste = pgste_get_lock(ptep); 873 *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 874 if (!(pte_val(*ptep) & _PAGE_INVALID)) 875 *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); 876 /* Reflect guest's logical view, not physical */ 877 *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 878 pgste_set_unlock(ptep, pgste); 879 pte_unmap_unlock(ptep, ptl); 880 return 0; 881 } 882 EXPORT_SYMBOL(get_guest_storage_key); 883 884 /** 885 * pgste_perform_essa - perform ESSA actions on the PGSTE. 886 * @mm: the memory context. It must have PGSTEs, no check is performed here! 887 * @hva: the host virtual address of the page whose PGSTE is to be processed 888 * @orc: the specific action to perform, see the ESSA_SET_* macros. 889 * @oldpte: the PTE will be saved there if the pointer is not NULL. 890 * @oldpgste: the old PGSTE will be saved there if the pointer is not NULL. 891 * 892 * Return: 1 if the page is to be added to the CBRL, otherwise 0, 893 * or < 0 in case of error. -EINVAL is returned for invalid values 894 * of orc, -EFAULT for invalid addresses. 895 */ 896 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, 897 unsigned long *oldpte, unsigned long *oldpgste) 898 { 899 unsigned long pgstev; 900 spinlock_t *ptl; 901 pgste_t pgste; 902 pte_t *ptep; 903 int res = 0; 904 905 WARN_ON_ONCE(orc > ESSA_MAX); 906 if (unlikely(orc > ESSA_MAX)) 907 return -EINVAL; 908 ptep = get_locked_pte(mm, hva, &ptl); 909 if (unlikely(!ptep)) 910 return -EFAULT; 911 pgste = pgste_get_lock(ptep); 912 pgstev = pgste_val(pgste); 913 if (oldpte) 914 *oldpte = pte_val(*ptep); 915 if (oldpgste) 916 *oldpgste = pgstev; 917 918 switch (orc) { 919 case ESSA_GET_STATE: 920 break; 921 case ESSA_SET_STABLE: 922 pgstev &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); 923 pgstev |= _PGSTE_GPS_USAGE_STABLE; 924 break; 925 case ESSA_SET_UNUSED: 926 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 927 pgstev |= _PGSTE_GPS_USAGE_UNUSED; 928 if (pte_val(*ptep) & _PAGE_INVALID) 929 res = 1; 930 break; 931 case ESSA_SET_VOLATILE: 932 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 933 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 934 if (pte_val(*ptep) & _PAGE_INVALID) 935 res = 1; 936 break; 937 case ESSA_SET_POT_VOLATILE: 938 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 939 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 940 pgstev |= _PGSTE_GPS_USAGE_POT_VOLATILE; 941 break; 942 } 943 if (pgstev & _PGSTE_GPS_ZERO) { 944 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 945 break; 946 } 947 if (!(pgstev & PGSTE_GC_BIT)) { 948 pgstev |= _PGSTE_GPS_USAGE_VOLATILE; 949 res = 1; 950 break; 951 } 952 break; 953 case ESSA_SET_STABLE_RESIDENT: 954 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 955 pgstev |= _PGSTE_GPS_USAGE_STABLE; 956 /* 957 * Since the resident state can go away any time after this 958 * call, we will not make this page resident. We can revisit 959 * this decision if a guest will ever start using this. 960 */ 961 break; 962 case ESSA_SET_STABLE_IF_RESIDENT: 963 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 964 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 965 pgstev |= _PGSTE_GPS_USAGE_STABLE; 966 } 967 break; 968 case ESSA_SET_STABLE_NODAT: 969 pgstev &= ~_PGSTE_GPS_USAGE_MASK; 970 pgstev |= _PGSTE_GPS_USAGE_STABLE | _PGSTE_GPS_NODAT; 971 break; 972 default: 973 /* we should never get here! */ 974 break; 975 } 976 /* If we are discarding a page, set it to logical zero */ 977 if (res) 978 pgstev |= _PGSTE_GPS_ZERO; 979 980 pgste_val(pgste) = pgstev; 981 pgste_set_unlock(ptep, pgste); 982 pte_unmap_unlock(ptep, ptl); 983 return res; 984 } 985 EXPORT_SYMBOL(pgste_perform_essa); 986 987 /** 988 * set_pgste_bits - set specific PGSTE bits. 989 * @mm: the memory context. It must have PGSTEs, no check is performed here! 990 * @hva: the host virtual address of the page whose PGSTE is to be processed 991 * @bits: a bitmask representing the bits that will be touched 992 * @value: the values of the bits to be written. Only the bits in the mask 993 * will be written. 994 * 995 * Return: 0 on success, < 0 in case of error. 996 */ 997 int set_pgste_bits(struct mm_struct *mm, unsigned long hva, 998 unsigned long bits, unsigned long value) 999 { 1000 spinlock_t *ptl; 1001 pgste_t new; 1002 pte_t *ptep; 1003 1004 ptep = get_locked_pte(mm, hva, &ptl); 1005 if (unlikely(!ptep)) 1006 return -EFAULT; 1007 new = pgste_get_lock(ptep); 1008 1009 pgste_val(new) &= ~bits; 1010 pgste_val(new) |= value & bits; 1011 1012 pgste_set_unlock(ptep, new); 1013 pte_unmap_unlock(ptep, ptl); 1014 return 0; 1015 } 1016 EXPORT_SYMBOL(set_pgste_bits); 1017 1018 /** 1019 * get_pgste - get the current PGSTE for the given address. 1020 * @mm: the memory context. It must have PGSTEs, no check is performed here! 1021 * @hva: the host virtual address of the page whose PGSTE is to be processed 1022 * @pgstep: will be written with the current PGSTE for the given address. 1023 * 1024 * Return: 0 on success, < 0 in case of error. 1025 */ 1026 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) 1027 { 1028 spinlock_t *ptl; 1029 pte_t *ptep; 1030 1031 ptep = get_locked_pte(mm, hva, &ptl); 1032 if (unlikely(!ptep)) 1033 return -EFAULT; 1034 *pgstep = pgste_val(pgste_get(ptep)); 1035 pte_unmap_unlock(ptep, ptl); 1036 return 0; 1037 } 1038 EXPORT_SYMBOL(get_pgste); 1039 #endif 1040