1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * VM - page locking primitives 30 */ 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/vtrace.h> 34 #include <sys/debug.h> 35 #include <sys/cmn_err.h> 36 #include <sys/vnode.h> 37 #include <sys/bitmap.h> 38 #include <sys/lockstat.h> 39 #include <sys/condvar_impl.h> 40 #include <vm/page.h> 41 #include <vm/seg_enum.h> 42 #include <vm/vm_dep.h> 43 44 /* 45 * This global mutex is for logical page locking. 46 * The following fields in the page structure are protected 47 * by this lock: 48 * 49 * p_lckcnt 50 * p_cowcnt 51 */ 52 kmutex_t page_llock; 53 54 /* 55 * This is a global lock for the logical page free list. The 56 * logical free list, in this implementation, is maintained as two 57 * separate physical lists - the cache list and the free list. 58 */ 59 kmutex_t page_freelock; 60 61 /* 62 * The hash table, page_hash[], the p_selock fields, and the 63 * list of pages associated with vnodes are protected by arrays of mutexes. 64 * 65 * Unless the hashes are changed radically, the table sizes must be 66 * a power of two. Also, we typically need more mutexes for the 67 * vnodes since these locks are occasionally held for long periods. 68 * And since there seem to be two special vnodes (kvp and swapvp), 69 * we make room for private mutexes for them. 70 * 71 * The pse_mutex[] array holds the mutexes to protect the p_selock 72 * fields of all page_t structures. 73 * 74 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex 75 * when given a pointer to a page_t. 76 * 77 * PSE_TABLE_SIZE must be a power of two. One could argue that we 78 * should go to the trouble of setting it up at run time and base it 79 * on memory size rather than the number of compile time CPUs. 80 * 81 * XX64 We should be using physmem size to calculate PSE_TABLE_SIZE, 82 * PSE_SHIFT, PIO_SHIFT. 83 * 84 * These might break in 64 bit world. 85 */ 86 #define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */ 87 88 #define PSE_TABLE_SIZE 128 /* number of mutexes to have */ 89 90 #define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */ 91 #define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */ 92 93 pad_mutex_t ph_mutex[PH_TABLE_SIZE]; 94 pad_mutex_t pse_mutex[PSE_TABLE_SIZE]; 95 kmutex_t pio_mutex[PIO_TABLE_SIZE]; 96 97 #define PAGE_SE_MUTEX(pp) \ 98 &pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \ 99 ((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \ 100 (PSE_TABLE_SIZE - 1))].pad_mutex 101 102 #define PAGE_IO_MUTEX(pp) \ 103 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)] 104 105 #define PSZC_MTX_TABLE_SIZE 128 106 #define PSZC_MTX_TABLE_SHIFT 7 107 108 static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; 109 110 #define PAGE_SZC_MUTEX(_pp) \ 111 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \ 112 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \ 113 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \ 114 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex 115 116 /* 117 * The vph_mutex[] array holds the mutexes to protect the vnode chains, 118 * (i.e., the list of pages anchored by v_pages and connected via p_vpprev 119 * and p_vpnext). 120 * 121 * The page_vnode_mutex(vp) function returns the address of the appropriate 122 * mutex from this array given a pointer to a vnode. It is complicated 123 * by the fact that the kernel's vnode and the swapfs vnode are referenced 124 * frequently enough to warrent their own mutexes. 125 * 126 * The VP_HASH_FUNC returns the index into the vph_mutex array given 127 * an address of a vnode. 128 */ 129 130 /* 131 * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. 132 * Need to review again. 133 */ 134 #define VPH_TABLE_SIZE (2 << VP_SHIFT) 135 136 #define VP_HASH_FUNC(vp) \ 137 ((((uintptr_t)(vp) >> 6) + \ 138 ((uintptr_t)(vp) >> 8) + \ 139 ((uintptr_t)(vp) >> 10) + \ 140 ((uintptr_t)(vp) >> 12)) \ 141 & (VPH_TABLE_SIZE - 1)) 142 143 extern struct vnode kvp; 144 145 kmutex_t vph_mutex[VPH_TABLE_SIZE + 2]; 146 147 /* 148 * Initialize the locks used by the Virtual Memory Management system. 149 */ 150 void 151 page_lock_init() 152 { 153 } 154 155 /* 156 * At present we only use page ownership to aid debugging, so it's 157 * OK if the owner field isn't exact. In the 32-bit world two thread ids 158 * can map to the same owner because we just 'or' in 0x80000000 and 159 * then clear the second highest bit, so that (for example) 0x2faced00 160 * and 0xafaced00 both map to 0xafaced00. 161 * In the 64-bit world, p_selock may not be large enough to hold a full 162 * thread pointer. If we ever need precise ownership (e.g. if we implement 163 * priority inheritance for page locks) then p_selock should become a 164 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2). 165 */ 166 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED) 167 #define SE_READER 1 168 169 /* 170 * A page that is deleted must be marked as such using the 171 * page_lock_delete() function. The page must be exclusively locked. 172 * The SE_DELETED marker is put in p_selock when this function is called. 173 * SE_DELETED must be distinct from any SE_WRITER value. 174 */ 175 #define SE_DELETED (1 | INT_MIN) 176 177 #ifdef VM_STATS 178 uint_t vph_kvp_count; 179 uint_t vph_swapfsvp_count; 180 uint_t vph_other; 181 #endif /* VM_STATS */ 182 183 #ifdef VM_STATS 184 uint_t page_lock_count; 185 uint_t page_lock_miss; 186 uint_t page_lock_miss_lock; 187 uint_t page_lock_reclaim; 188 uint_t page_lock_bad_reclaim; 189 uint_t page_lock_same_page; 190 uint_t page_lock_upgrade; 191 uint_t page_lock_retired; 192 uint_t page_lock_upgrade_failed; 193 uint_t page_lock_deleted; 194 195 uint_t page_trylock_locked; 196 uint_t page_trylock_failed; 197 uint_t page_trylock_missed; 198 199 uint_t page_try_reclaim_upgrade; 200 #endif /* VM_STATS */ 201 202 /* 203 * Acquire the "shared/exclusive" lock on a page. 204 * 205 * Returns 1 on success and locks the page appropriately. 206 * 0 on failure and does not lock the page. 207 * 208 * If `lock' is non-NULL, it will be dropped and reacquired in the 209 * failure case. This routine can block, and if it does 210 * it will always return a failure since the page identity [vp, off] 211 * or state may have changed. 212 */ 213 214 int 215 page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim) 216 { 217 return (page_lock_es(pp, se, lock, reclaim, 0)); 218 } 219 220 /* 221 * With the addition of reader-writer lock semantics to page_lock_es, 222 * callers wanting an exclusive (writer) lock may prevent shared-lock 223 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. 224 * In this case, when an exclusive lock cannot be acquired, p_selock's 225 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied 226 * if the page is slated for retirement. 227 * 228 * The se and es parameters determine if the lock should be granted 229 * based on the following decision table: 230 * 231 * Lock wanted es flags p_selock/SE_EWANTED Action 232 * ----------- -------------- ------------------- --------- 233 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED 234 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED 235 * SE_EXCL none any lock/any deny 236 * SE_SHARED n/a [2] shared/0 grant 237 * SE_SHARED n/a [2] unlocked/0 grant 238 * SE_SHARED n/a shared/1 deny 239 * SE_SHARED n/a unlocked/1 deny 240 * SE_SHARED n/a excl/any deny 241 * 242 * Notes: 243 * [1] The code grants an exclusive lock to the caller and clears the bit 244 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED 245 * bit's value. This was deemed acceptable as we are not concerned about 246 * exclusive-lock starvation. If this ever becomes an issue, a priority or 247 * fifo mechanism should also be implemented. Meantime, the thread that 248 * set SE_EWANTED should be prepared to catch this condition and reset it 249 * 250 * [2] Retired pages may not be locked at any time, regardless of the 251 * dispostion of se, unless the es parameter has SE_RETIRED flag set. 252 * 253 * Notes on values of "es": 254 * 255 * es & 1: page_lookup_create will attempt page relocation 256 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete 257 * memory thread); this prevents reader-starvation of waiting 258 * writer thread(s) by giving priority to writers over readers. 259 * es & SE_RETIRED: caller wants to lock pages even if they are 260 * retired. Default is to deny the lock if the page is retired. 261 * 262 * And yes, we know, the semantics of this function are too complicated. 263 * It's on the list to be cleaned up. 264 */ 265 int 266 page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) 267 { 268 int retval; 269 kmutex_t *pse = PAGE_SE_MUTEX(pp); 270 int upgraded; 271 int reclaim_it; 272 273 ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); 274 275 VM_STAT_ADD(page_lock_count); 276 277 upgraded = 0; 278 reclaim_it = 0; 279 280 mutex_enter(pse); 281 282 ASSERT(((es & SE_EXCL_WANTED) == 0) || 283 ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); 284 285 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { 286 mutex_exit(pse); 287 VM_STAT_ADD(page_lock_retired); 288 return (0); 289 } 290 291 if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { 292 se = SE_EXCL; 293 } 294 295 if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { 296 297 reclaim_it = 1; 298 if (se == SE_SHARED) { 299 /* 300 * This is an interesting situation. 301 * 302 * Remember that p_free can only change if 303 * p_selock < 0. 304 * p_free does not depend on our holding `pse'. 305 * And, since we hold `pse', p_selock can not change. 306 * So, if p_free changes on us, the page is already 307 * exclusively held, and we would fail to get p_selock 308 * regardless. 309 * 310 * We want to avoid getting the share 311 * lock on a free page that needs to be reclaimed. 312 * It is possible that some other thread has the share 313 * lock and has left the free page on the cache list. 314 * pvn_vplist_dirty() does this for brief periods. 315 * If the se_share is currently SE_EXCL, we will fail 316 * to acquire p_selock anyway. Blocking is the 317 * right thing to do. 318 * If we need to reclaim this page, we must get 319 * exclusive access to it, force the upgrade now. 320 * Again, we will fail to acquire p_selock if the 321 * page is not free and block. 322 */ 323 upgraded = 1; 324 se = SE_EXCL; 325 VM_STAT_ADD(page_lock_upgrade); 326 } 327 } 328 329 if (se == SE_EXCL) { 330 if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { 331 /* 332 * if the caller wants a writer lock (but did not 333 * specify exclusive access), and there is a pending 334 * writer that wants exclusive access, return failure 335 */ 336 retval = 0; 337 } else if ((pp->p_selock & ~SE_EWANTED) == 0) { 338 /* no reader/writer lock held */ 339 THREAD_KPRI_REQUEST(); 340 /* this clears our setting of the SE_EWANTED bit */ 341 pp->p_selock = SE_WRITER; 342 retval = 1; 343 } else { 344 /* page is locked */ 345 if (es & SE_EXCL_WANTED) { 346 /* set the SE_EWANTED bit */ 347 pp->p_selock |= SE_EWANTED; 348 } 349 retval = 0; 350 } 351 } else { 352 retval = 0; 353 if (pp->p_selock >= 0) { 354 if ((pp->p_selock & SE_EWANTED) == 0) { 355 pp->p_selock += SE_READER; 356 retval = 1; 357 } 358 } 359 } 360 361 if (retval == 0) { 362 if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { 363 VM_STAT_ADD(page_lock_deleted); 364 mutex_exit(pse); 365 return (retval); 366 } 367 368 #ifdef VM_STATS 369 VM_STAT_ADD(page_lock_miss); 370 if (upgraded) { 371 VM_STAT_ADD(page_lock_upgrade_failed); 372 } 373 #endif 374 if (lock) { 375 VM_STAT_ADD(page_lock_miss_lock); 376 mutex_exit(lock); 377 } 378 379 /* 380 * Now, wait for the page to be unlocked and 381 * release the lock protecting p_cv and p_selock. 382 */ 383 cv_wait(&pp->p_cv, pse); 384 mutex_exit(pse); 385 386 /* 387 * The page identity may have changed while we were 388 * blocked. If we are willing to depend on "pp" 389 * still pointing to a valid page structure (i.e., 390 * assuming page structures are not dynamically allocated 391 * or freed), we could try to lock the page if its 392 * identity hasn't changed. 393 * 394 * This needs to be measured, since we come back from 395 * cv_wait holding pse (the expensive part of this 396 * operation) we might as well try the cheap part. 397 * Though we would also have to confirm that dropping 398 * `lock' did not cause any grief to the callers. 399 */ 400 if (lock) { 401 mutex_enter(lock); 402 } 403 } else { 404 /* 405 * We have the page lock. 406 * If we needed to reclaim the page, and the page 407 * needed reclaiming (ie, it was free), then we 408 * have the page exclusively locked. We may need 409 * to downgrade the page. 410 */ 411 ASSERT((upgraded) ? 412 ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); 413 mutex_exit(pse); 414 415 /* 416 * We now hold this page's lock, either shared or 417 * exclusive. This will prevent its identity from changing. 418 * The page, however, may or may not be free. If the caller 419 * requested, and it is free, go reclaim it from the 420 * free list. If the page can't be reclaimed, return failure 421 * so that the caller can start all over again. 422 * 423 * NOTE:page_reclaim() releases the page lock (p_selock) 424 * if it can't be reclaimed. 425 */ 426 if (reclaim_it) { 427 if (!page_reclaim(pp, lock)) { 428 VM_STAT_ADD(page_lock_bad_reclaim); 429 retval = 0; 430 } else { 431 VM_STAT_ADD(page_lock_reclaim); 432 if (upgraded) { 433 page_downgrade(pp); 434 } 435 } 436 } 437 } 438 return (retval); 439 } 440 441 /* 442 * Clear the SE_EWANTED bit from p_selock. This function allows 443 * callers of page_lock_es and page_try_reclaim_lock to clear 444 * their setting of this bit if they decide they no longer wish 445 * to gain exclusive access to the page. Currently only 446 * delete_memory_thread uses this when the delete memory 447 * operation is cancelled. 448 */ 449 void 450 page_lock_clr_exclwanted(page_t *pp) 451 { 452 kmutex_t *pse = PAGE_SE_MUTEX(pp); 453 454 mutex_enter(pse); 455 pp->p_selock &= ~SE_EWANTED; 456 if (CV_HAS_WAITERS(&pp->p_cv)) 457 cv_broadcast(&pp->p_cv); 458 mutex_exit(pse); 459 } 460 461 /* 462 * Read the comments inside of page_lock_es() carefully. 463 * 464 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the 465 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. 466 * This is used by threads subject to reader-starvation (eg. memory delete). 467 * 468 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, 469 * it is expected that it will retry at a later time. Threads that will 470 * not retry the lock *must* call page_lock_clr_exclwanted to clear the 471 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, 472 * the bit is cleared.) 473 */ 474 int 475 page_try_reclaim_lock(page_t *pp, se_t se, int es) 476 { 477 kmutex_t *pse = PAGE_SE_MUTEX(pp); 478 selock_t old; 479 480 mutex_enter(pse); 481 482 old = pp->p_selock; 483 484 ASSERT(((es & SE_EXCL_WANTED) == 0) || 485 ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); 486 487 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { 488 mutex_exit(pse); 489 VM_STAT_ADD(page_trylock_failed); 490 return (0); 491 } 492 493 if (se == SE_SHARED && es == 1 && old == 0) { 494 se = SE_EXCL; 495 } 496 497 if (se == SE_SHARED) { 498 if (!PP_ISFREE(pp)) { 499 if (old >= 0) { 500 /* 501 * Readers are not allowed when excl wanted 502 */ 503 if ((old & SE_EWANTED) == 0) { 504 pp->p_selock = old + SE_READER; 505 mutex_exit(pse); 506 return (1); 507 } 508 } 509 mutex_exit(pse); 510 return (0); 511 } 512 /* 513 * The page is free, so we really want SE_EXCL (below) 514 */ 515 VM_STAT_ADD(page_try_reclaim_upgrade); 516 } 517 518 /* 519 * The caller wants a writer lock. We try for it only if 520 * SE_EWANTED is not set, or if the caller specified 521 * SE_EXCL_WANTED. 522 */ 523 if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) { 524 if ((old & ~SE_EWANTED) == 0) { 525 /* no reader/writer lock held */ 526 THREAD_KPRI_REQUEST(); 527 /* this clears out our setting of the SE_EWANTED bit */ 528 pp->p_selock = SE_WRITER; 529 mutex_exit(pse); 530 return (1); 531 } 532 } 533 if (es & SE_EXCL_WANTED) { 534 /* page is locked, set the SE_EWANTED bit */ 535 pp->p_selock |= SE_EWANTED; 536 } 537 mutex_exit(pse); 538 return (0); 539 } 540 541 /* 542 * Acquire a page's "shared/exclusive" lock, but never block. 543 * Returns 1 on success, 0 on failure. 544 */ 545 int 546 page_trylock(page_t *pp, se_t se) 547 { 548 kmutex_t *pse = PAGE_SE_MUTEX(pp); 549 550 mutex_enter(pse); 551 if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) || 552 (se == SE_SHARED && PP_PR_NOSHARE(pp))) { 553 /* 554 * Fail if a thread wants exclusive access and page is 555 * retired, if the page is slated for retirement, or a 556 * share lock is requested. 557 */ 558 mutex_exit(pse); 559 VM_STAT_ADD(page_trylock_failed); 560 return (0); 561 } 562 563 if (se == SE_EXCL) { 564 if (pp->p_selock == 0) { 565 THREAD_KPRI_REQUEST(); 566 pp->p_selock = SE_WRITER; 567 mutex_exit(pse); 568 return (1); 569 } 570 } else { 571 if (pp->p_selock >= 0) { 572 pp->p_selock += SE_READER; 573 mutex_exit(pse); 574 return (1); 575 } 576 } 577 mutex_exit(pse); 578 return (0); 579 } 580 581 /* 582 * Variant of page_unlock() specifically for the page freelist 583 * code. The mere existence of this code is a vile hack that 584 * has resulted due to the backwards locking order of the page 585 * freelist manager; please don't call it. 586 */ 587 void 588 page_unlock_noretire(page_t *pp) 589 { 590 kmutex_t *pse = PAGE_SE_MUTEX(pp); 591 selock_t old; 592 593 mutex_enter(pse); 594 595 old = pp->p_selock; 596 if ((old & ~SE_EWANTED) == SE_READER) { 597 pp->p_selock = old & ~SE_READER; 598 if (CV_HAS_WAITERS(&pp->p_cv)) 599 cv_broadcast(&pp->p_cv); 600 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 601 panic("page_unlock_noretire: page %p is deleted", pp); 602 } else if (old < 0) { 603 THREAD_KPRI_RELEASE(); 604 pp->p_selock &= SE_EWANTED; 605 if (CV_HAS_WAITERS(&pp->p_cv)) 606 cv_broadcast(&pp->p_cv); 607 } else if ((old & ~SE_EWANTED) > SE_READER) { 608 pp->p_selock = old - SE_READER; 609 } else { 610 panic("page_unlock_noretire: page %p is not locked", pp); 611 } 612 613 mutex_exit(pse); 614 } 615 616 /* 617 * Release the page's "shared/exclusive" lock and wake up anyone 618 * who might be waiting for it. 619 */ 620 void 621 page_unlock(page_t *pp) 622 { 623 kmutex_t *pse = PAGE_SE_MUTEX(pp); 624 selock_t old; 625 626 mutex_enter(pse); 627 628 old = pp->p_selock; 629 if ((old & ~SE_EWANTED) == SE_READER) { 630 pp->p_selock = old & ~SE_READER; 631 if (CV_HAS_WAITERS(&pp->p_cv)) 632 cv_broadcast(&pp->p_cv); 633 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 634 panic("page_unlock: page %p is deleted", pp); 635 } else if (old < 0) { 636 THREAD_KPRI_RELEASE(); 637 pp->p_selock &= SE_EWANTED; 638 if (CV_HAS_WAITERS(&pp->p_cv)) 639 cv_broadcast(&pp->p_cv); 640 } else if ((old & ~SE_EWANTED) > SE_READER) { 641 pp->p_selock = old - SE_READER; 642 } else { 643 panic("page_unlock: page %p is not locked", pp); 644 } 645 646 if (pp->p_selock == 0 && PP_PR_REQ(pp)) { 647 /* 648 * Try to retire the page. If it retires, great. 649 * If not, oh well, we'll get it in the next unlock 650 * request, and repeat the cycle. Regardless, 651 * page_tryretire() will drop the page lock. 652 */ 653 if ((pp->p_toxic & PR_BUSY) == 0) { 654 THREAD_KPRI_REQUEST(); 655 pp->p_selock = SE_WRITER; 656 page_settoxic(pp, PR_BUSY); 657 mutex_exit(pse); 658 page_tryretire(pp); 659 } else { 660 pp->p_selock = SE_WRITER; 661 page_clrtoxic(pp, PR_BUSY); 662 pp->p_selock = 0; 663 mutex_exit(pse); 664 } 665 } else { 666 mutex_exit(pse); 667 } 668 } 669 670 /* 671 * Try to upgrade the lock on the page from a "shared" to an 672 * "exclusive" lock. Since this upgrade operation is done while 673 * holding the mutex protecting this page, no one else can acquire this page's 674 * lock and change the page. Thus, it is safe to drop the "shared" 675 * lock and attempt to acquire the "exclusive" lock. 676 * 677 * Returns 1 on success, 0 on failure. 678 */ 679 int 680 page_tryupgrade(page_t *pp) 681 { 682 kmutex_t *pse = PAGE_SE_MUTEX(pp); 683 684 mutex_enter(pse); 685 if (!(pp->p_selock & SE_EWANTED)) { 686 /* no threads want exclusive access, try upgrade */ 687 if (pp->p_selock == SE_READER) { 688 THREAD_KPRI_REQUEST(); 689 /* convert to exclusive lock */ 690 pp->p_selock = SE_WRITER; 691 mutex_exit(pse); 692 return (1); 693 } 694 } 695 mutex_exit(pse); 696 return (0); 697 } 698 699 /* 700 * Downgrade the "exclusive" lock on the page to a "shared" lock 701 * while holding the mutex protecting this page's p_selock field. 702 */ 703 void 704 page_downgrade(page_t *pp) 705 { 706 kmutex_t *pse = PAGE_SE_MUTEX(pp); 707 int excl_waiting; 708 709 ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED); 710 ASSERT(PAGE_EXCL(pp)); 711 712 mutex_enter(pse); 713 excl_waiting = pp->p_selock & SE_EWANTED; 714 THREAD_KPRI_RELEASE(); 715 pp->p_selock = SE_READER | excl_waiting; 716 if (CV_HAS_WAITERS(&pp->p_cv)) 717 cv_broadcast(&pp->p_cv); 718 mutex_exit(pse); 719 } 720 721 void 722 page_lock_delete(page_t *pp) 723 { 724 kmutex_t *pse = PAGE_SE_MUTEX(pp); 725 726 ASSERT(PAGE_EXCL(pp)); 727 ASSERT(pp->p_vnode == NULL); 728 ASSERT(pp->p_offset == (u_offset_t)-1); 729 ASSERT(!PP_ISFREE(pp)); 730 731 mutex_enter(pse); 732 THREAD_KPRI_RELEASE(); 733 pp->p_selock = SE_DELETED; 734 if (CV_HAS_WAITERS(&pp->p_cv)) 735 cv_broadcast(&pp->p_cv); 736 mutex_exit(pse); 737 } 738 739 /* 740 * Implement the io lock for pages 741 */ 742 void 743 page_iolock_init(page_t *pp) 744 { 745 pp->p_iolock_state = 0; 746 cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL); 747 } 748 749 /* 750 * Acquire the i/o lock on a page. 751 */ 752 void 753 page_io_lock(page_t *pp) 754 { 755 kmutex_t *pio; 756 757 pio = PAGE_IO_MUTEX(pp); 758 mutex_enter(pio); 759 while (pp->p_iolock_state & PAGE_IO_INUSE) { 760 cv_wait(&(pp->p_io_cv), pio); 761 } 762 pp->p_iolock_state |= PAGE_IO_INUSE; 763 mutex_exit(pio); 764 } 765 766 /* 767 * Release the i/o lock on a page. 768 */ 769 void 770 page_io_unlock(page_t *pp) 771 { 772 kmutex_t *pio; 773 774 pio = PAGE_IO_MUTEX(pp); 775 mutex_enter(pio); 776 cv_signal(&pp->p_io_cv); 777 pp->p_iolock_state &= ~PAGE_IO_INUSE; 778 mutex_exit(pio); 779 } 780 781 /* 782 * Try to acquire the i/o lock on a page without blocking. 783 * Returns 1 on success, 0 on failure. 784 */ 785 int 786 page_io_trylock(page_t *pp) 787 { 788 kmutex_t *pio; 789 790 if (pp->p_iolock_state & PAGE_IO_INUSE) 791 return (0); 792 793 pio = PAGE_IO_MUTEX(pp); 794 mutex_enter(pio); 795 796 if (pp->p_iolock_state & PAGE_IO_INUSE) { 797 mutex_exit(pio); 798 return (0); 799 } 800 pp->p_iolock_state |= PAGE_IO_INUSE; 801 mutex_exit(pio); 802 803 return (1); 804 } 805 806 /* 807 * Assert that the i/o lock on a page is held. 808 * Returns 1 on success, 0 on failure. 809 */ 810 int 811 page_iolock_assert(page_t *pp) 812 { 813 return (pp->p_iolock_state & PAGE_IO_INUSE); 814 } 815 816 /* 817 * Wrapper exported to kernel routines that are built 818 * platform-independent (the macro is platform-dependent; 819 * the size of vph_mutex[] is based on NCPU). 820 * 821 * Note that you can do stress testing on this by setting the 822 * variable page_vnode_mutex_stress to something other than 823 * zero in a DEBUG kernel in a debugger after loading the kernel. 824 * Setting it after the kernel is running may not work correctly. 825 */ 826 #ifdef DEBUG 827 static int page_vnode_mutex_stress = 0; 828 #endif 829 830 kmutex_t * 831 page_vnode_mutex(vnode_t *vp) 832 { 833 if (vp == &kvp) 834 return (&vph_mutex[VPH_TABLE_SIZE + 0]); 835 #ifdef DEBUG 836 if (page_vnode_mutex_stress != 0) 837 return (&vph_mutex[0]); 838 #endif 839 840 return (&vph_mutex[VP_HASH_FUNC(vp)]); 841 } 842 843 kmutex_t * 844 page_se_mutex(page_t *pp) 845 { 846 return (PAGE_SE_MUTEX(pp)); 847 } 848 849 #ifdef VM_STATS 850 uint_t pszclck_stat[4]; 851 #endif 852 /* 853 * Find, take and return a mutex held by hat_page_demote(). 854 * Called by page_demote_vp_pages() before hat_page_demote() call and by 855 * routines that want to block hat_page_demote() but can't do it 856 * via locking all constituent pages. 857 * 858 * Return NULL if p_szc is 0. 859 * 860 * It should only be used for pages that can be demoted by hat_page_demote() 861 * i.e. non swapfs file system pages. The logic here is lifted from 862 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase 863 * since the page is locked and not free. 864 * 865 * Hash of the root page is used to find the lock. 866 * To find the root in the presense of hat_page_demote() chageing the location 867 * of the root this routine relies on the fact that hat_page_demote() changes 868 * root last. 869 * 870 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is 871 * returned pp's p_szc may be any value. 872 */ 873 kmutex_t * 874 page_szc_lock(page_t *pp) 875 { 876 kmutex_t *mtx; 877 page_t *rootpp; 878 uint_t szc; 879 uint_t rszc; 880 uint_t pszc = pp->p_szc; 881 882 ASSERT(pp != NULL); 883 ASSERT(PAGE_LOCKED(pp)); 884 ASSERT(!PP_ISFREE(pp)); 885 ASSERT(pp->p_vnode != NULL); 886 ASSERT(!IS_SWAPFSVP(pp->p_vnode)); 887 ASSERT(pp->p_vnode != &kvp); 888 889 again: 890 if (pszc == 0) { 891 VM_STAT_ADD(pszclck_stat[0]); 892 return (NULL); 893 } 894 895 /* The lock lives in the root page */ 896 897 rootpp = PP_GROUPLEADER(pp, pszc); 898 mtx = PAGE_SZC_MUTEX(rootpp); 899 mutex_enter(mtx); 900 901 /* 902 * since p_szc can only decrease if pp == rootpp 903 * rootpp will be always the same i.e we have the right root 904 * regardless of rootpp->p_szc. 905 * If location of pp's root didn't change after we took 906 * the lock we have the right root. return mutex hashed off it. 907 */ 908 if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { 909 VM_STAT_ADD(pszclck_stat[1]); 910 return (mtx); 911 } 912 913 /* 914 * root location changed because page got demoted. 915 * locate the new root. 916 */ 917 if (rszc < pszc) { 918 szc = pp->p_szc; 919 ASSERT(szc < pszc); 920 mutex_exit(mtx); 921 pszc = szc; 922 VM_STAT_ADD(pszclck_stat[2]); 923 goto again; 924 } 925 926 VM_STAT_ADD(pszclck_stat[3]); 927 /* 928 * current hat_page_demote not done yet. 929 * wait for it to finish. 930 */ 931 mutex_exit(mtx); 932 rootpp = PP_GROUPLEADER(rootpp, rszc); 933 mtx = PAGE_SZC_MUTEX(rootpp); 934 mutex_enter(mtx); 935 mutex_exit(mtx); 936 ASSERT(rootpp->p_szc < rszc); 937 goto again; 938 } 939 940 int 941 page_szc_lock_assert(page_t *pp) 942 { 943 page_t *rootpp = PP_PAGEROOT(pp); 944 kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp); 945 946 return (MUTEX_HELD(mtx)); 947 } 948