1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * VM - page locking primitives 31 */ 32 #include <sys/param.h> 33 #include <sys/t_lock.h> 34 #include <sys/vtrace.h> 35 #include <sys/debug.h> 36 #include <sys/cmn_err.h> 37 #include <sys/vnode.h> 38 #include <sys/bitmap.h> 39 #include <sys/lockstat.h> 40 #include <sys/condvar_impl.h> 41 #include <vm/page.h> 42 #include <vm/seg_enum.h> 43 #include <vm/vm_dep.h> 44 45 /* 46 * This global mutex is for logical page locking. 47 * The following fields in the page structure are protected 48 * by this lock: 49 * 50 * p_lckcnt 51 * p_cowcnt 52 */ 53 kmutex_t page_llock; 54 55 /* 56 * This is a global lock for the logical page free list. The 57 * logical free list, in this implementation, is maintained as two 58 * separate physical lists - the cache list and the free list. 59 */ 60 kmutex_t page_freelock; 61 62 /* 63 * The hash table, page_hash[], the p_selock fields, and the 64 * list of pages associated with vnodes are protected by arrays of mutexes. 65 * 66 * Unless the hashes are changed radically, the table sizes must be 67 * a power of two. Also, we typically need more mutexes for the 68 * vnodes since these locks are occasionally held for long periods. 69 * And since there seem to be two special vnodes (kvp and swapvp), 70 * we make room for private mutexes for them. 71 * 72 * The pse_mutex[] array holds the mutexes to protect the p_selock 73 * fields of all page_t structures. 74 * 75 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex 76 * when given a pointer to a page_t. 77 * 78 * PSE_TABLE_SIZE must be a power of two. One could argue that we 79 * should go to the trouble of setting it up at run time and base it 80 * on memory size rather than the number of compile time CPUs. 81 * 82 * XX64 We should be using physmem size to calculate PSE_TABLE_SIZE, 83 * PSE_SHIFT, PIO_SHIFT. 84 * 85 * These might break in 64 bit world. 86 */ 87 #define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */ 88 89 #define PSE_TABLE_SIZE 128 /* number of mutexes to have */ 90 91 #define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */ 92 #define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */ 93 94 pad_mutex_t ph_mutex[PH_TABLE_SIZE]; 95 pad_mutex_t pse_mutex[PSE_TABLE_SIZE]; 96 kmutex_t pio_mutex[PIO_TABLE_SIZE]; 97 98 #define PAGE_SE_MUTEX(pp) \ 99 &pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \ 100 ((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \ 101 (PSE_TABLE_SIZE - 1))].pad_mutex 102 103 #define PAGE_IO_MUTEX(pp) \ 104 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)] 105 106 #define PSZC_MTX_TABLE_SIZE 128 107 #define PSZC_MTX_TABLE_SHIFT 7 108 109 static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; 110 111 #define PAGE_SZC_MUTEX(_pp) \ 112 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \ 113 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \ 114 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \ 115 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex 116 117 /* 118 * The vph_mutex[] array holds the mutexes to protect the vnode chains, 119 * (i.e., the list of pages anchored by v_pages and connected via p_vpprev 120 * and p_vpnext). 121 * 122 * The page_vnode_mutex(vp) function returns the address of the appropriate 123 * mutex from this array given a pointer to a vnode. It is complicated 124 * by the fact that the kernel's vnode and the swapfs vnode are referenced 125 * frequently enough to warrent their own mutexes. 126 * 127 * The VP_HASH_FUNC returns the index into the vph_mutex array given 128 * an address of a vnode. 129 */ 130 131 /* 132 * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. 133 * Need to review again. 134 */ 135 #define VPH_TABLE_SIZE (2 << VP_SHIFT) 136 137 #define VP_HASH_FUNC(vp) \ 138 ((((uintptr_t)(vp) >> 6) + \ 139 ((uintptr_t)(vp) >> 8) + \ 140 ((uintptr_t)(vp) >> 10) + \ 141 ((uintptr_t)(vp) >> 12)) \ 142 & (VPH_TABLE_SIZE - 1)) 143 144 extern struct vnode kvp; 145 146 kmutex_t vph_mutex[VPH_TABLE_SIZE + 2]; 147 148 /* 149 * Initialize the locks used by the Virtual Memory Management system. 150 */ 151 void 152 page_lock_init() 153 { 154 } 155 156 /* 157 * At present we only use page ownership to aid debugging, so it's 158 * OK if the owner field isn't exact. In the 32-bit world two thread ids 159 * can map to the same owner because we just 'or' in 0x80000000 and 160 * then clear the second highest bit, so that (for example) 0x2faced00 161 * and 0xafaced00 both map to 0xafaced00. 162 * In the 64-bit world, p_selock may not be large enough to hold a full 163 * thread pointer. If we ever need precise ownership (e.g. if we implement 164 * priority inheritance for page locks) then p_selock should become a 165 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2). 166 */ 167 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED) 168 #define SE_READER 1 169 170 /* 171 * A page that is deleted must be marked as such using the 172 * page_lock_delete() function. The page must be exclusively locked. 173 * The SE_DELETED marker is put in p_selock when this function is called. 174 * SE_DELETED must be distinct from any SE_WRITER value. 175 */ 176 #define SE_DELETED (1 | INT_MIN) 177 178 #ifdef VM_STATS 179 uint_t vph_kvp_count; 180 uint_t vph_swapfsvp_count; 181 uint_t vph_other; 182 #endif /* VM_STATS */ 183 184 #ifdef VM_STATS 185 uint_t page_lock_count; 186 uint_t page_lock_miss; 187 uint_t page_lock_miss_lock; 188 uint_t page_lock_reclaim; 189 uint_t page_lock_bad_reclaim; 190 uint_t page_lock_same_page; 191 uint_t page_lock_upgrade; 192 uint_t page_lock_retired; 193 uint_t page_lock_upgrade_failed; 194 uint_t page_lock_deleted; 195 196 uint_t page_trylock_locked; 197 uint_t page_trylock_failed; 198 uint_t page_trylock_missed; 199 200 uint_t page_try_reclaim_upgrade; 201 #endif /* VM_STATS */ 202 203 /* 204 * Acquire the "shared/exclusive" lock on a page. 205 * 206 * Returns 1 on success and locks the page appropriately. 207 * 0 on failure and does not lock the page. 208 * 209 * If `lock' is non-NULL, it will be dropped and reacquired in the 210 * failure case. This routine can block, and if it does 211 * it will always return a failure since the page identity [vp, off] 212 * or state may have changed. 213 */ 214 215 int 216 page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim) 217 { 218 return (page_lock_es(pp, se, lock, reclaim, 0)); 219 } 220 221 /* 222 * With the addition of reader-writer lock semantics to page_lock_es, 223 * callers wanting an exclusive (writer) lock may prevent shared-lock 224 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. 225 * In this case, when an exclusive lock cannot be acquired, p_selock's 226 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied 227 * if the page is slated for retirement. 228 * 229 * The se and es parameters determine if the lock should be granted 230 * based on the following decision table: 231 * 232 * Lock wanted es flags p_selock/SE_EWANTED Action 233 * ----------- -------------- ------------------- --------- 234 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED 235 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED 236 * SE_EXCL none any lock/any deny 237 * SE_SHARED n/a [2][3] shared/0 grant 238 * SE_SHARED n/a [2][3] unlocked/0 grant 239 * SE_SHARED n/a shared/1 deny 240 * SE_SHARED n/a unlocked/1 deny 241 * SE_SHARED n/a excl/any deny 242 * 243 * Notes: 244 * [1] The code grants an exclusive lock to the caller and clears the bit 245 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED 246 * bit's value. This was deemed acceptable as we are not concerned about 247 * exclusive-lock starvation. If this ever becomes an issue, a priority or 248 * fifo mechanism should also be implemented. Meantime, the thread that 249 * set SE_EWANTED should be prepared to catch this condition and reset it 250 * 251 * [2] Retired pages may not be locked at any time, regardless of the 252 * dispostion of se, unless the es parameter has SE_RETIRED flag set. 253 * 254 * [3] If the page is slated for retirement by an agent, the lock is denied. 255 * 256 * Notes on values of "es": 257 * 258 * es & 1: page_lookup_create will attempt page relocation 259 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete 260 * memory thread); this prevents reader-starvation of waiting 261 * writer thread(s) by giving priority to writers over readers. 262 * es & SE_RETIRED: caller wants to lock pages even if they are 263 * retired. Default is to deny the lock if the page is retired. 264 * 265 * And yes, we know, the semantics of this function are too complicated. 266 * It's on the list to be cleaned up. 267 */ 268 int 269 page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) 270 { 271 int retval; 272 kmutex_t *pse = PAGE_SE_MUTEX(pp); 273 int upgraded; 274 int reclaim_it; 275 276 ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); 277 278 VM_STAT_ADD(page_lock_count); 279 280 upgraded = 0; 281 reclaim_it = 0; 282 283 mutex_enter(pse); 284 285 ASSERT(((es & SE_EXCL_WANTED) == 0) || 286 ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); 287 288 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { 289 mutex_exit(pse); 290 VM_STAT_ADD(page_lock_retired); 291 return (0); 292 } 293 294 if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { 295 se = SE_EXCL; 296 } 297 298 if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { 299 300 reclaim_it = 1; 301 if (se == SE_SHARED) { 302 /* 303 * This is an interesting situation. 304 * 305 * Remember that p_free can only change if 306 * p_selock < 0. 307 * p_free does not depend on our holding `pse'. 308 * And, since we hold `pse', p_selock can not change. 309 * So, if p_free changes on us, the page is already 310 * exclusively held, and we would fail to get p_selock 311 * regardless. 312 * 313 * We want to avoid getting the share 314 * lock on a free page that needs to be reclaimed. 315 * It is possible that some other thread has the share 316 * lock and has left the free page on the cache list. 317 * pvn_vplist_dirty() does this for brief periods. 318 * If the se_share is currently SE_EXCL, we will fail 319 * to acquire p_selock anyway. Blocking is the 320 * right thing to do. 321 * If we need to reclaim this page, we must get 322 * exclusive access to it, force the upgrade now. 323 * Again, we will fail to acquire p_selock if the 324 * page is not free and block. 325 */ 326 upgraded = 1; 327 se = SE_EXCL; 328 VM_STAT_ADD(page_lock_upgrade); 329 } 330 } 331 332 if (se == SE_EXCL) { 333 if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { 334 /* 335 * if the caller wants a writer lock (but did not 336 * specify exclusive access), and there is a pending 337 * writer that wants exclusive access, return failure 338 */ 339 retval = 0; 340 } else if ((pp->p_selock & ~SE_EWANTED) == 0) { 341 /* no reader/writer lock held */ 342 THREAD_KPRI_REQUEST(); 343 /* this clears our setting of the SE_EWANTED bit */ 344 pp->p_selock = SE_WRITER; 345 retval = 1; 346 } else { 347 /* page is locked */ 348 if (es & SE_EXCL_WANTED) { 349 /* set the SE_EWANTED bit */ 350 pp->p_selock |= SE_EWANTED; 351 } 352 retval = 0; 353 } 354 } else { 355 retval = 0; 356 if (pp->p_selock >= 0) { 357 /* 358 * Readers are not allowed when excl wanted or 359 * an FMA retire is pending. 360 */ 361 if ((pp->p_selock & SE_EWANTED) == 0) { 362 if (!PP_PR_NOSHARE(pp)) { 363 pp->p_selock += SE_READER; 364 retval = 1; 365 } 366 } 367 } 368 } 369 370 if (retval == 0) { 371 if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { 372 VM_STAT_ADD(page_lock_deleted); 373 mutex_exit(pse); 374 return (retval); 375 } 376 377 #ifdef VM_STATS 378 VM_STAT_ADD(page_lock_miss); 379 if (upgraded) { 380 VM_STAT_ADD(page_lock_upgrade_failed); 381 } 382 #endif 383 if (lock) { 384 VM_STAT_ADD(page_lock_miss_lock); 385 mutex_exit(lock); 386 } 387 388 /* 389 * Now, wait for the page to be unlocked and 390 * release the lock protecting p_cv and p_selock. 391 */ 392 cv_wait(&pp->p_cv, pse); 393 mutex_exit(pse); 394 395 /* 396 * The page identity may have changed while we were 397 * blocked. If we are willing to depend on "pp" 398 * still pointing to a valid page structure (i.e., 399 * assuming page structures are not dynamically allocated 400 * or freed), we could try to lock the page if its 401 * identity hasn't changed. 402 * 403 * This needs to be measured, since we come back from 404 * cv_wait holding pse (the expensive part of this 405 * operation) we might as well try the cheap part. 406 * Though we would also have to confirm that dropping 407 * `lock' did not cause any grief to the callers. 408 */ 409 if (lock) { 410 mutex_enter(lock); 411 } 412 } else { 413 /* 414 * We have the page lock. 415 * If we needed to reclaim the page, and the page 416 * needed reclaiming (ie, it was free), then we 417 * have the page exclusively locked. We may need 418 * to downgrade the page. 419 */ 420 ASSERT((upgraded) ? 421 ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); 422 mutex_exit(pse); 423 424 /* 425 * We now hold this page's lock, either shared or 426 * exclusive. This will prevent its identity from changing. 427 * The page, however, may or may not be free. If the caller 428 * requested, and it is free, go reclaim it from the 429 * free list. If the page can't be reclaimed, return failure 430 * so that the caller can start all over again. 431 * 432 * NOTE:page_reclaim() releases the page lock (p_selock) 433 * if it can't be reclaimed. 434 */ 435 if (reclaim_it) { 436 if (!page_reclaim(pp, lock)) { 437 VM_STAT_ADD(page_lock_bad_reclaim); 438 retval = 0; 439 } else { 440 VM_STAT_ADD(page_lock_reclaim); 441 if (upgraded) { 442 page_downgrade(pp); 443 } 444 } 445 } 446 } 447 return (retval); 448 } 449 450 /* 451 * Clear the SE_EWANTED bit from p_selock. This function allows 452 * callers of page_lock_es and page_try_reclaim_lock to clear 453 * their setting of this bit if they decide they no longer wish 454 * to gain exclusive access to the page. Currently only 455 * delete_memory_thread uses this when the delete memory 456 * operation is cancelled. 457 */ 458 void 459 page_lock_clr_exclwanted(page_t *pp) 460 { 461 kmutex_t *pse = PAGE_SE_MUTEX(pp); 462 463 mutex_enter(pse); 464 pp->p_selock &= ~SE_EWANTED; 465 if (CV_HAS_WAITERS(&pp->p_cv)) 466 cv_broadcast(&pp->p_cv); 467 mutex_exit(pse); 468 } 469 470 /* 471 * Read the comments inside of page_lock_es() carefully. 472 * 473 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the 474 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. 475 * This is used by threads subject to reader-starvation (eg. memory delete). 476 * 477 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, 478 * it is expected that it will retry at a later time. Threads that will 479 * not retry the lock *must* call page_lock_clr_exclwanted to clear the 480 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, 481 * the bit is cleared.) 482 */ 483 int 484 page_try_reclaim_lock(page_t *pp, se_t se, int es) 485 { 486 kmutex_t *pse = PAGE_SE_MUTEX(pp); 487 selock_t old; 488 489 mutex_enter(pse); 490 491 old = pp->p_selock; 492 493 ASSERT(((es & SE_EXCL_WANTED) == 0) || 494 ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); 495 496 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { 497 mutex_exit(pse); 498 VM_STAT_ADD(page_trylock_failed); 499 return (0); 500 } 501 502 if (se == SE_SHARED && es == 1 && old == 0) { 503 se = SE_EXCL; 504 } 505 506 if (se == SE_SHARED) { 507 if (!PP_ISFREE(pp)) { 508 if (old >= 0) { 509 /* 510 * Readers are not allowed when excl wanted 511 * or a retire is pending. 512 */ 513 if ((old & SE_EWANTED) == 0) { 514 if (!PP_PR_NOSHARE(pp)) { 515 pp->p_selock = old + SE_READER; 516 mutex_exit(pse); 517 return (1); 518 } 519 } 520 } 521 mutex_exit(pse); 522 return (0); 523 } 524 /* 525 * The page is free, so we really want SE_EXCL (below) 526 */ 527 VM_STAT_ADD(page_try_reclaim_upgrade); 528 } 529 530 /* 531 * The caller wants a writer lock. We try for it only if 532 * SE_EWANTED is not set, or if the caller specified 533 * SE_EXCL_WANTED. 534 */ 535 if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) { 536 if ((old & ~SE_EWANTED) == 0) { 537 /* no reader/writer lock held */ 538 THREAD_KPRI_REQUEST(); 539 /* this clears out our setting of the SE_EWANTED bit */ 540 pp->p_selock = SE_WRITER; 541 mutex_exit(pse); 542 return (1); 543 } 544 } 545 if (es & SE_EXCL_WANTED) { 546 /* page is locked, set the SE_EWANTED bit */ 547 pp->p_selock |= SE_EWANTED; 548 } 549 mutex_exit(pse); 550 return (0); 551 } 552 553 /* 554 * Acquire a page's "shared/exclusive" lock, but never block. 555 * Returns 1 on success, 0 on failure. 556 */ 557 int 558 page_trylock(page_t *pp, se_t se) 559 { 560 kmutex_t *pse = PAGE_SE_MUTEX(pp); 561 562 mutex_enter(pse); 563 if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) || 564 (se == SE_SHARED && PP_PR_NOSHARE(pp))) { 565 /* 566 * Fail if a thread wants exclusive access and page is 567 * retired, if the page is slated for retirement, or a 568 * share lock is requested. 569 */ 570 mutex_exit(pse); 571 VM_STAT_ADD(page_trylock_failed); 572 return (0); 573 } 574 575 if (se == SE_EXCL) { 576 if (pp->p_selock == 0) { 577 THREAD_KPRI_REQUEST(); 578 pp->p_selock = SE_WRITER; 579 mutex_exit(pse); 580 return (1); 581 } 582 } else { 583 if (pp->p_selock >= 0) { 584 pp->p_selock += SE_READER; 585 mutex_exit(pse); 586 return (1); 587 } 588 } 589 mutex_exit(pse); 590 return (0); 591 } 592 593 /* 594 * Variant of page_unlock() specifically for the page freelist 595 * code. The mere existence of this code is a vile hack that 596 * has resulted due to the backwards locking order of the page 597 * freelist manager; please don't call it. 598 */ 599 void 600 page_unlock_noretire(page_t *pp) 601 { 602 kmutex_t *pse = PAGE_SE_MUTEX(pp); 603 selock_t old; 604 605 mutex_enter(pse); 606 607 old = pp->p_selock; 608 if ((old & ~SE_EWANTED) == SE_READER) { 609 pp->p_selock = old & ~SE_READER; 610 if (CV_HAS_WAITERS(&pp->p_cv)) 611 cv_broadcast(&pp->p_cv); 612 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 613 panic("page_unlock_noretire: page %p is deleted", pp); 614 } else if (old < 0) { 615 THREAD_KPRI_RELEASE(); 616 pp->p_selock &= SE_EWANTED; 617 if (CV_HAS_WAITERS(&pp->p_cv)) 618 cv_broadcast(&pp->p_cv); 619 } else if ((old & ~SE_EWANTED) > SE_READER) { 620 pp->p_selock = old - SE_READER; 621 } else { 622 panic("page_unlock_noretire: page %p is not locked", pp); 623 } 624 625 mutex_exit(pse); 626 } 627 628 /* 629 * Release the page's "shared/exclusive" lock and wake up anyone 630 * who might be waiting for it. 631 */ 632 void 633 page_unlock(page_t *pp) 634 { 635 kmutex_t *pse = PAGE_SE_MUTEX(pp); 636 selock_t old; 637 638 mutex_enter(pse); 639 640 old = pp->p_selock; 641 if ((old & ~SE_EWANTED) == SE_READER) { 642 pp->p_selock = old & ~SE_READER; 643 if (CV_HAS_WAITERS(&pp->p_cv)) 644 cv_broadcast(&pp->p_cv); 645 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 646 panic("page_unlock: page %p is deleted", pp); 647 } else if (old < 0) { 648 THREAD_KPRI_RELEASE(); 649 pp->p_selock &= SE_EWANTED; 650 if (CV_HAS_WAITERS(&pp->p_cv)) 651 cv_broadcast(&pp->p_cv); 652 } else if ((old & ~SE_EWANTED) > SE_READER) { 653 pp->p_selock = old - SE_READER; 654 } else { 655 panic("page_unlock: page %p is not locked", pp); 656 } 657 658 if (pp->p_selock == 0 && PP_PR_REQ(pp)) { 659 /* 660 * Try to retire the page. If it retires, great. 661 * If not, oh well, we'll get it in the next unlock 662 * request, and repeat the cycle. Regardless, 663 * page_tryretire() will drop the page lock. 664 */ 665 if ((pp->p_toxic & PR_BUSY) == 0) { 666 THREAD_KPRI_REQUEST(); 667 pp->p_selock = SE_WRITER; 668 page_settoxic(pp, PR_BUSY); 669 mutex_exit(pse); 670 page_tryretire(pp); 671 } else { 672 pp->p_selock = SE_WRITER; 673 page_clrtoxic(pp, PR_BUSY); 674 pp->p_selock = 0; 675 mutex_exit(pse); 676 } 677 } else { 678 mutex_exit(pse); 679 } 680 } 681 682 /* 683 * Try to upgrade the lock on the page from a "shared" to an 684 * "exclusive" lock. Since this upgrade operation is done while 685 * holding the mutex protecting this page, no one else can acquire this page's 686 * lock and change the page. Thus, it is safe to drop the "shared" 687 * lock and attempt to acquire the "exclusive" lock. 688 * 689 * Returns 1 on success, 0 on failure. 690 */ 691 int 692 page_tryupgrade(page_t *pp) 693 { 694 kmutex_t *pse = PAGE_SE_MUTEX(pp); 695 696 mutex_enter(pse); 697 if (!(pp->p_selock & SE_EWANTED)) { 698 /* no threads want exclusive access, try upgrade */ 699 if (pp->p_selock == SE_READER) { 700 THREAD_KPRI_REQUEST(); 701 /* convert to exclusive lock */ 702 pp->p_selock = SE_WRITER; 703 mutex_exit(pse); 704 return (1); 705 } 706 } 707 mutex_exit(pse); 708 return (0); 709 } 710 711 /* 712 * Downgrade the "exclusive" lock on the page to a "shared" lock 713 * while holding the mutex protecting this page's p_selock field. 714 */ 715 void 716 page_downgrade(page_t *pp) 717 { 718 kmutex_t *pse = PAGE_SE_MUTEX(pp); 719 int excl_waiting; 720 721 ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED); 722 ASSERT(PAGE_EXCL(pp)); 723 724 mutex_enter(pse); 725 excl_waiting = pp->p_selock & SE_EWANTED; 726 THREAD_KPRI_RELEASE(); 727 pp->p_selock = SE_READER | excl_waiting; 728 if (CV_HAS_WAITERS(&pp->p_cv)) 729 cv_broadcast(&pp->p_cv); 730 mutex_exit(pse); 731 } 732 733 void 734 page_lock_delete(page_t *pp) 735 { 736 kmutex_t *pse = PAGE_SE_MUTEX(pp); 737 738 ASSERT(PAGE_EXCL(pp)); 739 ASSERT(pp->p_vnode == NULL); 740 ASSERT(pp->p_offset == (u_offset_t)-1); 741 ASSERT(!PP_ISFREE(pp)); 742 743 mutex_enter(pse); 744 THREAD_KPRI_RELEASE(); 745 pp->p_selock = SE_DELETED; 746 if (CV_HAS_WAITERS(&pp->p_cv)) 747 cv_broadcast(&pp->p_cv); 748 mutex_exit(pse); 749 } 750 751 /* 752 * Implement the io lock for pages 753 */ 754 void 755 page_iolock_init(page_t *pp) 756 { 757 pp->p_iolock_state = 0; 758 cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL); 759 } 760 761 /* 762 * Acquire the i/o lock on a page. 763 */ 764 void 765 page_io_lock(page_t *pp) 766 { 767 kmutex_t *pio; 768 769 pio = PAGE_IO_MUTEX(pp); 770 mutex_enter(pio); 771 while (pp->p_iolock_state & PAGE_IO_INUSE) { 772 cv_wait(&(pp->p_io_cv), pio); 773 } 774 pp->p_iolock_state |= PAGE_IO_INUSE; 775 mutex_exit(pio); 776 } 777 778 /* 779 * Release the i/o lock on a page. 780 */ 781 void 782 page_io_unlock(page_t *pp) 783 { 784 kmutex_t *pio; 785 786 pio = PAGE_IO_MUTEX(pp); 787 mutex_enter(pio); 788 cv_signal(&pp->p_io_cv); 789 pp->p_iolock_state &= ~PAGE_IO_INUSE; 790 mutex_exit(pio); 791 } 792 793 /* 794 * Try to acquire the i/o lock on a page without blocking. 795 * Returns 1 on success, 0 on failure. 796 */ 797 int 798 page_io_trylock(page_t *pp) 799 { 800 kmutex_t *pio; 801 802 if (pp->p_iolock_state & PAGE_IO_INUSE) 803 return (0); 804 805 pio = PAGE_IO_MUTEX(pp); 806 mutex_enter(pio); 807 808 if (pp->p_iolock_state & PAGE_IO_INUSE) { 809 mutex_exit(pio); 810 return (0); 811 } 812 pp->p_iolock_state |= PAGE_IO_INUSE; 813 mutex_exit(pio); 814 815 return (1); 816 } 817 818 /* 819 * Assert that the i/o lock on a page is held. 820 * Returns 1 on success, 0 on failure. 821 */ 822 int 823 page_iolock_assert(page_t *pp) 824 { 825 return (pp->p_iolock_state & PAGE_IO_INUSE); 826 } 827 828 /* 829 * Wrapper exported to kernel routines that are built 830 * platform-independent (the macro is platform-dependent; 831 * the size of vph_mutex[] is based on NCPU). 832 * 833 * Note that you can do stress testing on this by setting the 834 * variable page_vnode_mutex_stress to something other than 835 * zero in a DEBUG kernel in a debugger after loading the kernel. 836 * Setting it after the kernel is running may not work correctly. 837 */ 838 #ifdef DEBUG 839 static int page_vnode_mutex_stress = 0; 840 #endif 841 842 kmutex_t * 843 page_vnode_mutex(vnode_t *vp) 844 { 845 if (vp == &kvp) 846 return (&vph_mutex[VPH_TABLE_SIZE + 0]); 847 #ifdef DEBUG 848 if (page_vnode_mutex_stress != 0) 849 return (&vph_mutex[0]); 850 #endif 851 852 return (&vph_mutex[VP_HASH_FUNC(vp)]); 853 } 854 855 kmutex_t * 856 page_se_mutex(page_t *pp) 857 { 858 return (PAGE_SE_MUTEX(pp)); 859 } 860 861 #ifdef VM_STATS 862 uint_t pszclck_stat[4]; 863 #endif 864 /* 865 * Find, take and return a mutex held by hat_page_demote(). 866 * Called by page_demote_vp_pages() before hat_page_demote() call and by 867 * routines that want to block hat_page_demote() but can't do it 868 * via locking all constituent pages. 869 * 870 * Return NULL if p_szc is 0. 871 * 872 * It should only be used for pages that can be demoted by hat_page_demote() 873 * i.e. non swapfs file system pages. The logic here is lifted from 874 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase 875 * since the page is locked and not free. 876 * 877 * Hash of the root page is used to find the lock. 878 * To find the root in the presense of hat_page_demote() chageing the location 879 * of the root this routine relies on the fact that hat_page_demote() changes 880 * root last. 881 * 882 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is 883 * returned pp's p_szc may be any value. 884 */ 885 kmutex_t * 886 page_szc_lock(page_t *pp) 887 { 888 kmutex_t *mtx; 889 page_t *rootpp; 890 uint_t szc; 891 uint_t rszc; 892 uint_t pszc = pp->p_szc; 893 894 ASSERT(pp != NULL); 895 ASSERT(PAGE_LOCKED(pp)); 896 ASSERT(!PP_ISFREE(pp)); 897 ASSERT(pp->p_vnode != NULL); 898 ASSERT(!IS_SWAPFSVP(pp->p_vnode)); 899 ASSERT(pp->p_vnode != &kvp); 900 901 again: 902 if (pszc == 0) { 903 VM_STAT_ADD(pszclck_stat[0]); 904 return (NULL); 905 } 906 907 /* The lock lives in the root page */ 908 909 rootpp = PP_GROUPLEADER(pp, pszc); 910 mtx = PAGE_SZC_MUTEX(rootpp); 911 mutex_enter(mtx); 912 913 /* 914 * since p_szc can only decrease if pp == rootpp 915 * rootpp will be always the same i.e we have the right root 916 * regardless of rootpp->p_szc. 917 * If location of pp's root didn't change after we took 918 * the lock we have the right root. return mutex hashed off it. 919 */ 920 if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { 921 VM_STAT_ADD(pszclck_stat[1]); 922 return (mtx); 923 } 924 925 /* 926 * root location changed because page got demoted. 927 * locate the new root. 928 */ 929 if (rszc < pszc) { 930 szc = pp->p_szc; 931 ASSERT(szc < pszc); 932 mutex_exit(mtx); 933 pszc = szc; 934 VM_STAT_ADD(pszclck_stat[2]); 935 goto again; 936 } 937 938 VM_STAT_ADD(pszclck_stat[3]); 939 /* 940 * current hat_page_demote not done yet. 941 * wait for it to finish. 942 */ 943 mutex_exit(mtx); 944 rootpp = PP_GROUPLEADER(rootpp, rszc); 945 mtx = PAGE_SZC_MUTEX(rootpp); 946 mutex_enter(mtx); 947 mutex_exit(mtx); 948 ASSERT(rootpp->p_szc < rszc); 949 goto again; 950 } 951 952 int 953 page_szc_lock_assert(page_t *pp) 954 { 955 page_t *rootpp = PP_PAGEROOT(pp); 956 kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp); 957 958 return (MUTEX_HELD(mtx)); 959 } 960