1 /* 2 * Copyright IBM Corp. 2007, 2011 3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/kernel.h> 8 #include <linux/errno.h> 9 #include <linux/gfp.h> 10 #include <linux/mm.h> 11 #include <linux/swap.h> 12 #include <linux/smp.h> 13 #include <linux/spinlock.h> 14 #include <linux/rcupdate.h> 15 #include <linux/slab.h> 16 #include <linux/swapops.h> 17 #include <linux/sysctl.h> 18 #include <linux/ksm.h> 19 #include <linux/mman.h> 20 21 #include <asm/pgtable.h> 22 #include <asm/pgalloc.h> 23 #include <asm/tlb.h> 24 #include <asm/tlbflush.h> 25 #include <asm/mmu_context.h> 26 27 static inline pte_t ptep_flush_direct(struct mm_struct *mm, 28 unsigned long addr, pte_t *ptep) 29 { 30 int active, count; 31 pte_t old; 32 33 old = *ptep; 34 if (unlikely(pte_val(old) & _PAGE_INVALID)) 35 return old; 36 active = (mm == current->active_mm) ? 1 : 0; 37 count = atomic_add_return(0x10000, &mm->context.attach_count); 38 if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && 39 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 40 __ptep_ipte_local(addr, ptep); 41 else 42 __ptep_ipte(addr, ptep); 43 atomic_sub(0x10000, &mm->context.attach_count); 44 return old; 45 } 46 47 static inline pte_t ptep_flush_lazy(struct mm_struct *mm, 48 unsigned long addr, pte_t *ptep) 49 { 50 int active, count; 51 pte_t old; 52 53 old = *ptep; 54 if (unlikely(pte_val(old) & _PAGE_INVALID)) 55 return old; 56 active = (mm == current->active_mm) ? 1 : 0; 57 count = atomic_add_return(0x10000, &mm->context.attach_count); 58 if ((count & 0xffff) <= active) { 59 pte_val(*ptep) |= _PAGE_INVALID; 60 mm->context.flush_mm = 1; 61 } else 62 __ptep_ipte(addr, ptep); 63 atomic_sub(0x10000, &mm->context.attach_count); 64 return old; 65 } 66 67 static inline pgste_t pgste_get_lock(pte_t *ptep) 68 { 69 unsigned long new = 0; 70 #ifdef CONFIG_PGSTE 71 unsigned long old; 72 73 preempt_disable(); 74 asm( 75 " lg %0,%2\n" 76 "0: lgr %1,%0\n" 77 " nihh %0,0xff7f\n" /* clear PCL bit in old */ 78 " oihh %1,0x0080\n" /* set PCL bit in new */ 79 " csg %0,%1,%2\n" 80 " jl 0b\n" 81 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) 82 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); 83 #endif 84 return __pgste(new); 85 } 86 87 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) 88 { 89 #ifdef CONFIG_PGSTE 90 asm( 91 " nihh %1,0xff7f\n" /* clear PCL bit */ 92 " stg %1,%0\n" 93 : "=Q" (ptep[PTRS_PER_PTE]) 94 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) 95 : "cc", "memory"); 96 preempt_enable(); 97 #endif 98 } 99 100 static inline pgste_t pgste_get(pte_t *ptep) 101 { 102 unsigned long pgste = 0; 103 #ifdef CONFIG_PGSTE 104 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); 105 #endif 106 return __pgste(pgste); 107 } 108 109 static inline void pgste_set(pte_t *ptep, pgste_t pgste) 110 { 111 #ifdef CONFIG_PGSTE 112 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; 113 #endif 114 } 115 116 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, 117 struct mm_struct *mm) 118 { 119 #ifdef CONFIG_PGSTE 120 unsigned long address, bits, skey; 121 122 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) 123 return pgste; 124 address = pte_val(pte) & PAGE_MASK; 125 skey = (unsigned long) page_get_storage_key(address); 126 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 127 /* Transfer page changed & referenced bit to guest bits in pgste */ 128 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ 129 /* Copy page access key and fetch protection bit to pgste */ 130 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); 131 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 132 #endif 133 return pgste; 134 135 } 136 137 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, 138 struct mm_struct *mm) 139 { 140 #ifdef CONFIG_PGSTE 141 unsigned long address; 142 unsigned long nkey; 143 144 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) 145 return; 146 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); 147 address = pte_val(entry) & PAGE_MASK; 148 /* 149 * Set page access key and fetch protection bit from pgste. 150 * The guest C/R information is still in the PGSTE, set real 151 * key C/R to 0. 152 */ 153 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; 154 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; 155 page_set_storage_key(address, nkey, 0); 156 #endif 157 } 158 159 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) 160 { 161 #ifdef CONFIG_PGSTE 162 if ((pte_val(entry) & _PAGE_PRESENT) && 163 (pte_val(entry) & _PAGE_WRITE) && 164 !(pte_val(entry) & _PAGE_INVALID)) { 165 if (!MACHINE_HAS_ESOP) { 166 /* 167 * Without enhanced suppression-on-protection force 168 * the dirty bit on for all writable ptes. 169 */ 170 pte_val(entry) |= _PAGE_DIRTY; 171 pte_val(entry) &= ~_PAGE_PROTECT; 172 } 173 if (!(pte_val(entry) & _PAGE_PROTECT)) 174 /* This pte allows write access, set user-dirty */ 175 pgste_val(pgste) |= PGSTE_UC_BIT; 176 } 177 #endif 178 *ptep = entry; 179 return pgste; 180 } 181 182 static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, 183 unsigned long addr, 184 pte_t *ptep, pgste_t pgste) 185 { 186 #ifdef CONFIG_PGSTE 187 if (pgste_val(pgste) & PGSTE_IN_BIT) { 188 pgste_val(pgste) &= ~PGSTE_IN_BIT; 189 ptep_notify(mm, addr, ptep); 190 } 191 #endif 192 return pgste; 193 } 194 195 static inline pgste_t ptep_xchg_start(struct mm_struct *mm, 196 unsigned long addr, pte_t *ptep) 197 { 198 pgste_t pgste = __pgste(0); 199 200 if (mm_has_pgste(mm)) { 201 pgste = pgste_get_lock(ptep); 202 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 203 } 204 return pgste; 205 } 206 207 static inline void ptep_xchg_commit(struct mm_struct *mm, 208 unsigned long addr, pte_t *ptep, 209 pgste_t pgste, pte_t old, pte_t new) 210 { 211 if (mm_has_pgste(mm)) { 212 if (pte_val(old) & _PAGE_INVALID) 213 pgste_set_key(ptep, pgste, new, mm); 214 if (pte_val(new) & _PAGE_INVALID) { 215 pgste = pgste_update_all(old, pgste, mm); 216 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == 217 _PGSTE_GPS_USAGE_UNUSED) 218 pte_val(old) |= _PAGE_UNUSED; 219 } 220 pgste = pgste_set_pte(ptep, pgste, new); 221 pgste_set_unlock(ptep, pgste); 222 } else { 223 *ptep = new; 224 } 225 } 226 227 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, 228 pte_t *ptep, pte_t new) 229 { 230 pgste_t pgste; 231 pte_t old; 232 233 pgste = ptep_xchg_start(mm, addr, ptep); 234 old = ptep_flush_direct(mm, addr, ptep); 235 ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 236 return old; 237 } 238 EXPORT_SYMBOL(ptep_xchg_direct); 239 240 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, 241 pte_t *ptep, pte_t new) 242 { 243 pgste_t pgste; 244 pte_t old; 245 246 pgste = ptep_xchg_start(mm, addr, ptep); 247 old = ptep_flush_lazy(mm, addr, ptep); 248 ptep_xchg_commit(mm, addr, ptep, pgste, old, new); 249 return old; 250 } 251 EXPORT_SYMBOL(ptep_xchg_lazy); 252 253 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, 254 pte_t *ptep) 255 { 256 pgste_t pgste; 257 pte_t old; 258 259 pgste = ptep_xchg_start(mm, addr, ptep); 260 old = ptep_flush_lazy(mm, addr, ptep); 261 if (mm_has_pgste(mm)) { 262 pgste = pgste_update_all(old, pgste, mm); 263 pgste_set(ptep, pgste); 264 } 265 return old; 266 } 267 EXPORT_SYMBOL(ptep_modify_prot_start); 268 269 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, 270 pte_t *ptep, pte_t pte) 271 { 272 pgste_t pgste; 273 274 if (mm_has_pgste(mm)) { 275 pgste = pgste_get(ptep); 276 pgste_set_key(ptep, pgste, pte, mm); 277 pgste = pgste_set_pte(ptep, pgste, pte); 278 pgste_set_unlock(ptep, pgste); 279 } else { 280 *ptep = pte; 281 } 282 } 283 EXPORT_SYMBOL(ptep_modify_prot_commit); 284 285 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, 286 unsigned long addr, pmd_t *pmdp) 287 { 288 int active, count; 289 pmd_t old; 290 291 old = *pmdp; 292 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 293 return old; 294 if (!MACHINE_HAS_IDTE) { 295 __pmdp_csp(pmdp); 296 return old; 297 } 298 active = (mm == current->active_mm) ? 1 : 0; 299 count = atomic_add_return(0x10000, &mm->context.attach_count); 300 if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && 301 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) 302 __pmdp_idte_local(addr, pmdp); 303 else 304 __pmdp_idte(addr, pmdp); 305 atomic_sub(0x10000, &mm->context.attach_count); 306 return old; 307 } 308 309 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, 310 unsigned long addr, pmd_t *pmdp) 311 { 312 int active, count; 313 pmd_t old; 314 315 old = *pmdp; 316 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) 317 return old; 318 active = (mm == current->active_mm) ? 1 : 0; 319 count = atomic_add_return(0x10000, &mm->context.attach_count); 320 if ((count & 0xffff) <= active) { 321 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; 322 mm->context.flush_mm = 1; 323 } else if (MACHINE_HAS_IDTE) 324 __pmdp_idte(addr, pmdp); 325 else 326 __pmdp_csp(pmdp); 327 atomic_sub(0x10000, &mm->context.attach_count); 328 return old; 329 } 330 331 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, 332 pmd_t *pmdp, pmd_t new) 333 { 334 pmd_t old; 335 336 old = pmdp_flush_direct(mm, addr, pmdp); 337 *pmdp = new; 338 return old; 339 } 340 EXPORT_SYMBOL(pmdp_xchg_direct); 341 342 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, 343 pmd_t *pmdp, pmd_t new) 344 { 345 pmd_t old; 346 347 old = pmdp_flush_lazy(mm, addr, pmdp); 348 *pmdp = new; 349 return old; 350 } 351 EXPORT_SYMBOL(pmdp_xchg_lazy); 352 353 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 354 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 355 pgtable_t pgtable) 356 { 357 struct list_head *lh = (struct list_head *) pgtable; 358 359 assert_spin_locked(pmd_lockptr(mm, pmdp)); 360 361 /* FIFO */ 362 if (!pmd_huge_pte(mm, pmdp)) 363 INIT_LIST_HEAD(lh); 364 else 365 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 366 pmd_huge_pte(mm, pmdp) = pgtable; 367 } 368 369 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 370 { 371 struct list_head *lh; 372 pgtable_t pgtable; 373 pte_t *ptep; 374 375 assert_spin_locked(pmd_lockptr(mm, pmdp)); 376 377 /* FIFO */ 378 pgtable = pmd_huge_pte(mm, pmdp); 379 lh = (struct list_head *) pgtable; 380 if (list_empty(lh)) 381 pmd_huge_pte(mm, pmdp) = NULL; 382 else { 383 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 384 list_del(lh); 385 } 386 ptep = (pte_t *) pgtable; 387 pte_val(*ptep) = _PAGE_INVALID; 388 ptep++; 389 pte_val(*ptep) = _PAGE_INVALID; 390 return pgtable; 391 } 392 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 393 394 #ifdef CONFIG_PGSTE 395 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, 396 pte_t *ptep, pte_t entry) 397 { 398 pgste_t pgste; 399 400 /* the mm_has_pgste() check is done in set_pte_at() */ 401 pgste = pgste_get_lock(ptep); 402 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; 403 pgste_set_key(ptep, pgste, entry, mm); 404 pgste = pgste_set_pte(ptep, pgste, entry); 405 pgste_set_unlock(ptep, pgste); 406 } 407 408 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 409 { 410 pgste_t pgste; 411 412 pgste = pgste_get_lock(ptep); 413 pgste_val(pgste) |= PGSTE_IN_BIT; 414 pgste_set_unlock(ptep, pgste); 415 } 416 417 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) 418 { 419 if (!non_swap_entry(entry)) 420 dec_mm_counter(mm, MM_SWAPENTS); 421 else if (is_migration_entry(entry)) { 422 struct page *page = migration_entry_to_page(entry); 423 424 dec_mm_counter(mm, mm_counter(page)); 425 } 426 free_swap_and_cache(entry); 427 } 428 429 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, 430 pte_t *ptep, int reset) 431 { 432 unsigned long pgstev; 433 pgste_t pgste; 434 pte_t pte; 435 436 /* Zap unused and logically-zero pages */ 437 pgste = pgste_get_lock(ptep); 438 pgstev = pgste_val(pgste); 439 pte = *ptep; 440 if (!reset && pte_swap(pte) && 441 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || 442 (pgstev & _PGSTE_GPS_ZERO))) { 443 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); 444 pte_clear(mm, addr, ptep); 445 } 446 if (reset) 447 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; 448 pgste_set_unlock(ptep, pgste); 449 } 450 451 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 452 { 453 unsigned long ptev; 454 pgste_t pgste; 455 456 /* Clear storage key */ 457 pgste = pgste_get_lock(ptep); 458 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | 459 PGSTE_GR_BIT | PGSTE_GC_BIT); 460 ptev = pte_val(*ptep); 461 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) 462 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); 463 pgste_set_unlock(ptep, pgste); 464 } 465 466 /* 467 * Test and reset if a guest page is dirty 468 */ 469 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) 470 { 471 spinlock_t *ptl; 472 pgste_t pgste; 473 pte_t *ptep; 474 pte_t pte; 475 bool dirty; 476 477 ptep = get_locked_pte(mm, addr, &ptl); 478 if (unlikely(!ptep)) 479 return false; 480 481 pgste = pgste_get_lock(ptep); 482 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); 483 pgste_val(pgste) &= ~PGSTE_UC_BIT; 484 pte = *ptep; 485 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { 486 pgste = pgste_ipte_notify(mm, addr, ptep, pgste); 487 __ptep_ipte(addr, ptep); 488 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) 489 pte_val(pte) |= _PAGE_PROTECT; 490 else 491 pte_val(pte) |= _PAGE_INVALID; 492 *ptep = pte; 493 } 494 pgste_set_unlock(ptep, pgste); 495 496 spin_unlock(ptl); 497 return dirty; 498 } 499 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); 500 501 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, 502 unsigned char key, bool nq) 503 { 504 unsigned long keyul; 505 spinlock_t *ptl; 506 pgste_t old, new; 507 pte_t *ptep; 508 509 down_read(&mm->mmap_sem); 510 ptep = get_locked_pte(mm, addr, &ptl); 511 if (unlikely(!ptep)) { 512 up_read(&mm->mmap_sem); 513 return -EFAULT; 514 } 515 516 new = old = pgste_get_lock(ptep); 517 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | 518 PGSTE_ACC_BITS | PGSTE_FP_BIT); 519 keyul = (unsigned long) key; 520 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; 521 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; 522 if (!(pte_val(*ptep) & _PAGE_INVALID)) { 523 unsigned long address, bits, skey; 524 525 address = pte_val(*ptep) & PAGE_MASK; 526 skey = (unsigned long) page_get_storage_key(address); 527 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); 528 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); 529 /* Set storage key ACC and FP */ 530 page_set_storage_key(address, skey, !nq); 531 /* Merge host changed & referenced into pgste */ 532 pgste_val(new) |= bits << 52; 533 } 534 /* changing the guest storage key is considered a change of the page */ 535 if ((pgste_val(new) ^ pgste_val(old)) & 536 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) 537 pgste_val(new) |= PGSTE_UC_BIT; 538 539 pgste_set_unlock(ptep, new); 540 pte_unmap_unlock(ptep, ptl); 541 up_read(&mm->mmap_sem); 542 return 0; 543 } 544 EXPORT_SYMBOL(set_guest_storage_key); 545 546 unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) 547 { 548 unsigned char key; 549 spinlock_t *ptl; 550 pgste_t pgste; 551 pte_t *ptep; 552 553 down_read(&mm->mmap_sem); 554 ptep = get_locked_pte(mm, addr, &ptl); 555 if (unlikely(!ptep)) { 556 up_read(&mm->mmap_sem); 557 return -EFAULT; 558 } 559 pgste = pgste_get_lock(ptep); 560 561 if (pte_val(*ptep) & _PAGE_INVALID) { 562 key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; 563 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; 564 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; 565 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; 566 } else { 567 key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); 568 569 /* Reflect guest's logical view, not physical */ 570 if (pgste_val(pgste) & PGSTE_GR_BIT) 571 key |= _PAGE_REFERENCED; 572 if (pgste_val(pgste) & PGSTE_GC_BIT) 573 key |= _PAGE_CHANGED; 574 } 575 576 pgste_set_unlock(ptep, pgste); 577 pte_unmap_unlock(ptep, ptl); 578 up_read(&mm->mmap_sem); 579 return key; 580 } 581 EXPORT_SYMBOL(get_guest_storage_key); 582 #endif 583