1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * VM - page locking primitives 30 */ 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/vtrace.h> 34 #include <sys/debug.h> 35 #include <sys/cmn_err.h> 36 #include <sys/vnode.h> 37 #include <sys/bitmap.h> 38 #include <sys/lockstat.h> 39 #include <sys/condvar_impl.h> 40 #include <vm/page.h> 41 #include <vm/seg_enum.h> 42 #include <vm/vm_dep.h> 43 44 /* 45 * This global mutex is for logical page locking. 46 * The following fields in the page structure are protected 47 * by this lock: 48 * 49 * p_lckcnt 50 * p_cowcnt 51 */ 52 kmutex_t page_llock; 53 54 /* 55 * This is a global lock for the logical page free list. The 56 * logical free list, in this implementation, is maintained as two 57 * separate physical lists - the cache list and the free list. 58 */ 59 kmutex_t page_freelock; 60 61 /* 62 * The hash table, page_hash[], the p_selock fields, and the 63 * list of pages associated with vnodes are protected by arrays of mutexes. 64 * 65 * Unless the hashes are changed radically, the table sizes must be 66 * a power of two. Also, we typically need more mutexes for the 67 * vnodes since these locks are occasionally held for long periods. 68 * And since there seem to be two special vnodes (kvp and swapvp), 69 * we make room for private mutexes for them. 70 * 71 * The pse_mutex[] array holds the mutexes to protect the p_selock 72 * fields of all page_t structures. 73 * 74 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex 75 * when given a pointer to a page_t. 76 * 77 * PSE_TABLE_SIZE must be a power of two. One could argue that we 78 * should go to the trouble of setting it up at run time and base it 79 * on memory size rather than the number of compile time CPUs. 80 * 81 * XX64 We should be using physmem size to calculate PSE_TABLE_SIZE, 82 * PSE_SHIFT, PIO_SHIFT. 83 * 84 * These might break in 64 bit world. 85 */ 86 #define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */ 87 88 #define PSE_TABLE_SIZE 128 /* number of mutexes to have */ 89 90 #define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */ 91 #define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */ 92 93 pad_mutex_t ph_mutex[PH_TABLE_SIZE]; 94 pad_mutex_t pse_mutex[PSE_TABLE_SIZE]; 95 kmutex_t pio_mutex[PIO_TABLE_SIZE]; 96 97 #define PAGE_SE_MUTEX(pp) \ 98 &pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \ 99 ((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \ 100 (PSE_TABLE_SIZE - 1))].pad_mutex 101 102 #define PAGE_IO_MUTEX(pp) \ 103 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)] 104 105 #define PSZC_MTX_TABLE_SIZE 128 106 #define PSZC_MTX_TABLE_SHIFT 7 107 108 static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; 109 110 #define PAGE_SZC_MUTEX(_pp) \ 111 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \ 112 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \ 113 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \ 114 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex 115 116 /* 117 * The vph_mutex[] array holds the mutexes to protect the vnode chains, 118 * (i.e., the list of pages anchored by v_pages and connected via p_vpprev 119 * and p_vpnext). 120 * 121 * The page_vnode_mutex(vp) function returns the address of the appropriate 122 * mutex from this array given a pointer to a vnode. It is complicated 123 * by the fact that the kernel's vnode and the swapfs vnode are referenced 124 * frequently enough to warrent their own mutexes. 125 * 126 * The VP_HASH_FUNC returns the index into the vph_mutex array given 127 * an address of a vnode. 128 */ 129 130 /* 131 * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. 132 * Need to review again. 133 */ 134 #define VPH_TABLE_SIZE (2 << VP_SHIFT) 135 136 #define VP_HASH_FUNC(vp) \ 137 ((((uintptr_t)(vp) >> 6) + \ 138 ((uintptr_t)(vp) >> 8) + \ 139 ((uintptr_t)(vp) >> 10) + \ 140 ((uintptr_t)(vp) >> 12)) \ 141 & (VPH_TABLE_SIZE - 1)) 142 143 extern struct vnode kvp; 144 145 /* 146 * Two slots after VPH_TABLE_SIZE are reserved in vph_mutex for kernel vnodes. 147 * The lock for kvp is VPH_TABLE_SIZE + 0, and the lock for zvp is 148 * VPH_TABLE_SIZE + 1. 149 */ 150 151 kmutex_t vph_mutex[VPH_TABLE_SIZE + 2]; 152 153 /* 154 * Initialize the locks used by the Virtual Memory Management system. 155 */ 156 void 157 page_lock_init() 158 { 159 } 160 161 /* 162 * At present we only use page ownership to aid debugging, so it's 163 * OK if the owner field isn't exact. In the 32-bit world two thread ids 164 * can map to the same owner because we just 'or' in 0x80000000 and 165 * then clear the second highest bit, so that (for example) 0x2faced00 166 * and 0xafaced00 both map to 0xafaced00. 167 * In the 64-bit world, p_selock may not be large enough to hold a full 168 * thread pointer. If we ever need precise ownership (e.g. if we implement 169 * priority inheritance for page locks) then p_selock should become a 170 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2). 171 */ 172 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED) 173 #define SE_READER 1 174 175 /* 176 * A page that is deleted must be marked as such using the 177 * page_lock_delete() function. The page must be exclusively locked. 178 * The SE_DELETED marker is put in p_selock when this function is called. 179 * SE_DELETED must be distinct from any SE_WRITER value. 180 */ 181 #define SE_DELETED (1 | INT_MIN) 182 183 #ifdef VM_STATS 184 uint_t vph_kvp_count; 185 uint_t vph_swapfsvp_count; 186 uint_t vph_other; 187 #endif /* VM_STATS */ 188 189 #ifdef VM_STATS 190 uint_t page_lock_count; 191 uint_t page_lock_miss; 192 uint_t page_lock_miss_lock; 193 uint_t page_lock_reclaim; 194 uint_t page_lock_bad_reclaim; 195 uint_t page_lock_same_page; 196 uint_t page_lock_upgrade; 197 uint_t page_lock_retired; 198 uint_t page_lock_upgrade_failed; 199 uint_t page_lock_deleted; 200 201 uint_t page_trylock_locked; 202 uint_t page_trylock_failed; 203 uint_t page_trylock_missed; 204 205 uint_t page_try_reclaim_upgrade; 206 #endif /* VM_STATS */ 207 208 /* 209 * Acquire the "shared/exclusive" lock on a page. 210 * 211 * Returns 1 on success and locks the page appropriately. 212 * 0 on failure and does not lock the page. 213 * 214 * If `lock' is non-NULL, it will be dropped and reacquired in the 215 * failure case. This routine can block, and if it does 216 * it will always return a failure since the page identity [vp, off] 217 * or state may have changed. 218 */ 219 220 int 221 page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim) 222 { 223 return (page_lock_es(pp, se, lock, reclaim, 0)); 224 } 225 226 /* 227 * With the addition of reader-writer lock semantics to page_lock_es, 228 * callers wanting an exclusive (writer) lock may prevent shared-lock 229 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. 230 * In this case, when an exclusive lock cannot be acquired, p_selock's 231 * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied 232 * if the page is slated for retirement. 233 * 234 * The se and es parameters determine if the lock should be granted 235 * based on the following decision table: 236 * 237 * Lock wanted es flags p_selock/SE_EWANTED Action 238 * ----------- -------------- ------------------- --------- 239 * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED 240 * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED 241 * SE_EXCL none any lock/any deny 242 * SE_SHARED n/a [2] shared/0 grant 243 * SE_SHARED n/a [2] unlocked/0 grant 244 * SE_SHARED n/a shared/1 deny 245 * SE_SHARED n/a unlocked/1 deny 246 * SE_SHARED n/a excl/any deny 247 * 248 * Notes: 249 * [1] The code grants an exclusive lock to the caller and clears the bit 250 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED 251 * bit's value. This was deemed acceptable as we are not concerned about 252 * exclusive-lock starvation. If this ever becomes an issue, a priority or 253 * fifo mechanism should also be implemented. Meantime, the thread that 254 * set SE_EWANTED should be prepared to catch this condition and reset it 255 * 256 * [2] Retired pages may not be locked at any time, regardless of the 257 * dispostion of se, unless the es parameter has SE_RETIRED flag set. 258 * 259 * Notes on values of "es": 260 * 261 * es & 1: page_lookup_create will attempt page relocation 262 * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete 263 * memory thread); this prevents reader-starvation of waiting 264 * writer thread(s) by giving priority to writers over readers. 265 * es & SE_RETIRED: caller wants to lock pages even if they are 266 * retired. Default is to deny the lock if the page is retired. 267 * 268 * And yes, we know, the semantics of this function are too complicated. 269 * It's on the list to be cleaned up. 270 */ 271 int 272 page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) 273 { 274 int retval; 275 kmutex_t *pse = PAGE_SE_MUTEX(pp); 276 int upgraded; 277 int reclaim_it; 278 279 ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); 280 281 VM_STAT_ADD(page_lock_count); 282 283 upgraded = 0; 284 reclaim_it = 0; 285 286 mutex_enter(pse); 287 288 ASSERT(((es & SE_EXCL_WANTED) == 0) || 289 ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); 290 291 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { 292 mutex_exit(pse); 293 VM_STAT_ADD(page_lock_retired); 294 return (0); 295 } 296 297 if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { 298 se = SE_EXCL; 299 } 300 301 if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { 302 303 reclaim_it = 1; 304 if (se == SE_SHARED) { 305 /* 306 * This is an interesting situation. 307 * 308 * Remember that p_free can only change if 309 * p_selock < 0. 310 * p_free does not depend on our holding `pse'. 311 * And, since we hold `pse', p_selock can not change. 312 * So, if p_free changes on us, the page is already 313 * exclusively held, and we would fail to get p_selock 314 * regardless. 315 * 316 * We want to avoid getting the share 317 * lock on a free page that needs to be reclaimed. 318 * It is possible that some other thread has the share 319 * lock and has left the free page on the cache list. 320 * pvn_vplist_dirty() does this for brief periods. 321 * If the se_share is currently SE_EXCL, we will fail 322 * to acquire p_selock anyway. Blocking is the 323 * right thing to do. 324 * If we need to reclaim this page, we must get 325 * exclusive access to it, force the upgrade now. 326 * Again, we will fail to acquire p_selock if the 327 * page is not free and block. 328 */ 329 upgraded = 1; 330 se = SE_EXCL; 331 VM_STAT_ADD(page_lock_upgrade); 332 } 333 } 334 335 if (se == SE_EXCL) { 336 if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { 337 /* 338 * if the caller wants a writer lock (but did not 339 * specify exclusive access), and there is a pending 340 * writer that wants exclusive access, return failure 341 */ 342 retval = 0; 343 } else if ((pp->p_selock & ~SE_EWANTED) == 0) { 344 /* no reader/writer lock held */ 345 THREAD_KPRI_REQUEST(); 346 /* this clears our setting of the SE_EWANTED bit */ 347 pp->p_selock = SE_WRITER; 348 retval = 1; 349 } else { 350 /* page is locked */ 351 if (es & SE_EXCL_WANTED) { 352 /* set the SE_EWANTED bit */ 353 pp->p_selock |= SE_EWANTED; 354 } 355 retval = 0; 356 } 357 } else { 358 retval = 0; 359 if (pp->p_selock >= 0) { 360 if ((pp->p_selock & SE_EWANTED) == 0) { 361 pp->p_selock += SE_READER; 362 retval = 1; 363 } 364 } 365 } 366 367 if (retval == 0) { 368 if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { 369 VM_STAT_ADD(page_lock_deleted); 370 mutex_exit(pse); 371 return (retval); 372 } 373 374 #ifdef VM_STATS 375 VM_STAT_ADD(page_lock_miss); 376 if (upgraded) { 377 VM_STAT_ADD(page_lock_upgrade_failed); 378 } 379 #endif 380 if (lock) { 381 VM_STAT_ADD(page_lock_miss_lock); 382 mutex_exit(lock); 383 } 384 385 /* 386 * Now, wait for the page to be unlocked and 387 * release the lock protecting p_cv and p_selock. 388 */ 389 cv_wait(&pp->p_cv, pse); 390 mutex_exit(pse); 391 392 /* 393 * The page identity may have changed while we were 394 * blocked. If we are willing to depend on "pp" 395 * still pointing to a valid page structure (i.e., 396 * assuming page structures are not dynamically allocated 397 * or freed), we could try to lock the page if its 398 * identity hasn't changed. 399 * 400 * This needs to be measured, since we come back from 401 * cv_wait holding pse (the expensive part of this 402 * operation) we might as well try the cheap part. 403 * Though we would also have to confirm that dropping 404 * `lock' did not cause any grief to the callers. 405 */ 406 if (lock) { 407 mutex_enter(lock); 408 } 409 } else { 410 /* 411 * We have the page lock. 412 * If we needed to reclaim the page, and the page 413 * needed reclaiming (ie, it was free), then we 414 * have the page exclusively locked. We may need 415 * to downgrade the page. 416 */ 417 ASSERT((upgraded) ? 418 ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); 419 mutex_exit(pse); 420 421 /* 422 * We now hold this page's lock, either shared or 423 * exclusive. This will prevent its identity from changing. 424 * The page, however, may or may not be free. If the caller 425 * requested, and it is free, go reclaim it from the 426 * free list. If the page can't be reclaimed, return failure 427 * so that the caller can start all over again. 428 * 429 * NOTE:page_reclaim() releases the page lock (p_selock) 430 * if it can't be reclaimed. 431 */ 432 if (reclaim_it) { 433 if (!page_reclaim(pp, lock)) { 434 VM_STAT_ADD(page_lock_bad_reclaim); 435 retval = 0; 436 } else { 437 VM_STAT_ADD(page_lock_reclaim); 438 if (upgraded) { 439 page_downgrade(pp); 440 } 441 } 442 } 443 } 444 return (retval); 445 } 446 447 /* 448 * Clear the SE_EWANTED bit from p_selock. This function allows 449 * callers of page_lock_es and page_try_reclaim_lock to clear 450 * their setting of this bit if they decide they no longer wish 451 * to gain exclusive access to the page. Currently only 452 * delete_memory_thread uses this when the delete memory 453 * operation is cancelled. 454 */ 455 void 456 page_lock_clr_exclwanted(page_t *pp) 457 { 458 kmutex_t *pse = PAGE_SE_MUTEX(pp); 459 460 mutex_enter(pse); 461 pp->p_selock &= ~SE_EWANTED; 462 if (CV_HAS_WAITERS(&pp->p_cv)) 463 cv_broadcast(&pp->p_cv); 464 mutex_exit(pse); 465 } 466 467 /* 468 * Read the comments inside of page_lock_es() carefully. 469 * 470 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the 471 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. 472 * This is used by threads subject to reader-starvation (eg. memory delete). 473 * 474 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, 475 * it is expected that it will retry at a later time. Threads that will 476 * not retry the lock *must* call page_lock_clr_exclwanted to clear the 477 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, 478 * the bit is cleared.) 479 */ 480 int 481 page_try_reclaim_lock(page_t *pp, se_t se, int es) 482 { 483 kmutex_t *pse = PAGE_SE_MUTEX(pp); 484 selock_t old; 485 486 mutex_enter(pse); 487 488 old = pp->p_selock; 489 490 ASSERT(((es & SE_EXCL_WANTED) == 0) || 491 ((es & SE_EXCL_WANTED) && (se == SE_EXCL))); 492 493 if (PP_RETIRED(pp) && !(es & SE_RETIRED)) { 494 mutex_exit(pse); 495 VM_STAT_ADD(page_trylock_failed); 496 return (0); 497 } 498 499 if (se == SE_SHARED && es == 1 && old == 0) { 500 se = SE_EXCL; 501 } 502 503 if (se == SE_SHARED) { 504 if (!PP_ISFREE(pp)) { 505 if (old >= 0) { 506 /* 507 * Readers are not allowed when excl wanted 508 */ 509 if ((old & SE_EWANTED) == 0) { 510 pp->p_selock = old + SE_READER; 511 mutex_exit(pse); 512 return (1); 513 } 514 } 515 mutex_exit(pse); 516 return (0); 517 } 518 /* 519 * The page is free, so we really want SE_EXCL (below) 520 */ 521 VM_STAT_ADD(page_try_reclaim_upgrade); 522 } 523 524 /* 525 * The caller wants a writer lock. We try for it only if 526 * SE_EWANTED is not set, or if the caller specified 527 * SE_EXCL_WANTED. 528 */ 529 if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) { 530 if ((old & ~SE_EWANTED) == 0) { 531 /* no reader/writer lock held */ 532 THREAD_KPRI_REQUEST(); 533 /* this clears out our setting of the SE_EWANTED bit */ 534 pp->p_selock = SE_WRITER; 535 mutex_exit(pse); 536 return (1); 537 } 538 } 539 if (es & SE_EXCL_WANTED) { 540 /* page is locked, set the SE_EWANTED bit */ 541 pp->p_selock |= SE_EWANTED; 542 } 543 mutex_exit(pse); 544 return (0); 545 } 546 547 /* 548 * Acquire a page's "shared/exclusive" lock, but never block. 549 * Returns 1 on success, 0 on failure. 550 */ 551 int 552 page_trylock(page_t *pp, se_t se) 553 { 554 kmutex_t *pse = PAGE_SE_MUTEX(pp); 555 556 mutex_enter(pse); 557 if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) || 558 (se == SE_SHARED && PP_PR_NOSHARE(pp))) { 559 /* 560 * Fail if a thread wants exclusive access and page is 561 * retired, if the page is slated for retirement, or a 562 * share lock is requested. 563 */ 564 mutex_exit(pse); 565 VM_STAT_ADD(page_trylock_failed); 566 return (0); 567 } 568 569 if (se == SE_EXCL) { 570 if (pp->p_selock == 0) { 571 THREAD_KPRI_REQUEST(); 572 pp->p_selock = SE_WRITER; 573 mutex_exit(pse); 574 return (1); 575 } 576 } else { 577 if (pp->p_selock >= 0) { 578 pp->p_selock += SE_READER; 579 mutex_exit(pse); 580 return (1); 581 } 582 } 583 mutex_exit(pse); 584 return (0); 585 } 586 587 /* 588 * Variant of page_unlock() specifically for the page freelist 589 * code. The mere existence of this code is a vile hack that 590 * has resulted due to the backwards locking order of the page 591 * freelist manager; please don't call it. 592 */ 593 void 594 page_unlock_nocapture(page_t *pp) 595 { 596 kmutex_t *pse = PAGE_SE_MUTEX(pp); 597 selock_t old; 598 599 mutex_enter(pse); 600 601 old = pp->p_selock; 602 if ((old & ~SE_EWANTED) == SE_READER) { 603 pp->p_selock = old & ~SE_READER; 604 if (CV_HAS_WAITERS(&pp->p_cv)) 605 cv_broadcast(&pp->p_cv); 606 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 607 panic("page_unlock_nocapture: page %p is deleted", pp); 608 } else if (old < 0) { 609 THREAD_KPRI_RELEASE(); 610 pp->p_selock &= SE_EWANTED; 611 if (CV_HAS_WAITERS(&pp->p_cv)) 612 cv_broadcast(&pp->p_cv); 613 } else if ((old & ~SE_EWANTED) > SE_READER) { 614 pp->p_selock = old - SE_READER; 615 } else { 616 panic("page_unlock_nocapture: page %p is not locked", pp); 617 } 618 619 mutex_exit(pse); 620 } 621 622 /* 623 * Release the page's "shared/exclusive" lock and wake up anyone 624 * who might be waiting for it. 625 */ 626 void 627 page_unlock(page_t *pp) 628 { 629 kmutex_t *pse = PAGE_SE_MUTEX(pp); 630 selock_t old; 631 632 mutex_enter(pse); 633 634 old = pp->p_selock; 635 if ((old & ~SE_EWANTED) == SE_READER) { 636 pp->p_selock = old & ~SE_READER; 637 if (CV_HAS_WAITERS(&pp->p_cv)) 638 cv_broadcast(&pp->p_cv); 639 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 640 panic("page_unlock: page %p is deleted", pp); 641 } else if (old < 0) { 642 THREAD_KPRI_RELEASE(); 643 pp->p_selock &= SE_EWANTED; 644 if (CV_HAS_WAITERS(&pp->p_cv)) 645 cv_broadcast(&pp->p_cv); 646 } else if ((old & ~SE_EWANTED) > SE_READER) { 647 pp->p_selock = old - SE_READER; 648 } else { 649 panic("page_unlock: page %p is not locked", pp); 650 } 651 652 if (pp->p_selock == 0) { 653 /* 654 * If the T_CAPTURING bit is set, that means that we should 655 * not try and capture the page again as we could recurse 656 * which could lead to a stack overflow panic or spending a 657 * relatively long time in the kernel making no progress. 658 */ 659 if ((pp->p_toxic & PR_CAPTURE) && 660 !(curthread->t_flag & T_CAPTURING) && 661 !PP_RETIRED(pp)) { 662 THREAD_KPRI_REQUEST(); 663 pp->p_selock = SE_WRITER; 664 mutex_exit(pse); 665 page_unlock_capture(pp); 666 } else { 667 mutex_exit(pse); 668 } 669 } else { 670 mutex_exit(pse); 671 } 672 } 673 674 /* 675 * Try to upgrade the lock on the page from a "shared" to an 676 * "exclusive" lock. Since this upgrade operation is done while 677 * holding the mutex protecting this page, no one else can acquire this page's 678 * lock and change the page. Thus, it is safe to drop the "shared" 679 * lock and attempt to acquire the "exclusive" lock. 680 * 681 * Returns 1 on success, 0 on failure. 682 */ 683 int 684 page_tryupgrade(page_t *pp) 685 { 686 kmutex_t *pse = PAGE_SE_MUTEX(pp); 687 688 mutex_enter(pse); 689 if (!(pp->p_selock & SE_EWANTED)) { 690 /* no threads want exclusive access, try upgrade */ 691 if (pp->p_selock == SE_READER) { 692 THREAD_KPRI_REQUEST(); 693 /* convert to exclusive lock */ 694 pp->p_selock = SE_WRITER; 695 mutex_exit(pse); 696 return (1); 697 } 698 } 699 mutex_exit(pse); 700 return (0); 701 } 702 703 /* 704 * Downgrade the "exclusive" lock on the page to a "shared" lock 705 * while holding the mutex protecting this page's p_selock field. 706 */ 707 void 708 page_downgrade(page_t *pp) 709 { 710 kmutex_t *pse = PAGE_SE_MUTEX(pp); 711 int excl_waiting; 712 713 ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED); 714 ASSERT(PAGE_EXCL(pp)); 715 716 mutex_enter(pse); 717 excl_waiting = pp->p_selock & SE_EWANTED; 718 THREAD_KPRI_RELEASE(); 719 pp->p_selock = SE_READER | excl_waiting; 720 if (CV_HAS_WAITERS(&pp->p_cv)) 721 cv_broadcast(&pp->p_cv); 722 mutex_exit(pse); 723 } 724 725 void 726 page_lock_delete(page_t *pp) 727 { 728 kmutex_t *pse = PAGE_SE_MUTEX(pp); 729 730 ASSERT(PAGE_EXCL(pp)); 731 ASSERT(pp->p_vnode == NULL); 732 ASSERT(pp->p_offset == (u_offset_t)-1); 733 ASSERT(!PP_ISFREE(pp)); 734 735 mutex_enter(pse); 736 THREAD_KPRI_RELEASE(); 737 pp->p_selock = SE_DELETED; 738 if (CV_HAS_WAITERS(&pp->p_cv)) 739 cv_broadcast(&pp->p_cv); 740 mutex_exit(pse); 741 } 742 743 int 744 page_deleted(page_t *pp) 745 { 746 return (pp->p_selock == SE_DELETED); 747 } 748 749 /* 750 * Implement the io lock for pages 751 */ 752 void 753 page_iolock_init(page_t *pp) 754 { 755 pp->p_iolock_state = 0; 756 cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL); 757 } 758 759 /* 760 * Acquire the i/o lock on a page. 761 */ 762 void 763 page_io_lock(page_t *pp) 764 { 765 kmutex_t *pio; 766 767 pio = PAGE_IO_MUTEX(pp); 768 mutex_enter(pio); 769 while (pp->p_iolock_state & PAGE_IO_INUSE) { 770 cv_wait(&(pp->p_io_cv), pio); 771 } 772 pp->p_iolock_state |= PAGE_IO_INUSE; 773 mutex_exit(pio); 774 } 775 776 /* 777 * Release the i/o lock on a page. 778 */ 779 void 780 page_io_unlock(page_t *pp) 781 { 782 kmutex_t *pio; 783 784 pio = PAGE_IO_MUTEX(pp); 785 mutex_enter(pio); 786 cv_broadcast(&pp->p_io_cv); 787 pp->p_iolock_state &= ~PAGE_IO_INUSE; 788 mutex_exit(pio); 789 } 790 791 /* 792 * Try to acquire the i/o lock on a page without blocking. 793 * Returns 1 on success, 0 on failure. 794 */ 795 int 796 page_io_trylock(page_t *pp) 797 { 798 kmutex_t *pio; 799 800 if (pp->p_iolock_state & PAGE_IO_INUSE) 801 return (0); 802 803 pio = PAGE_IO_MUTEX(pp); 804 mutex_enter(pio); 805 806 if (pp->p_iolock_state & PAGE_IO_INUSE) { 807 mutex_exit(pio); 808 return (0); 809 } 810 pp->p_iolock_state |= PAGE_IO_INUSE; 811 mutex_exit(pio); 812 813 return (1); 814 } 815 816 /* 817 * Wait until the i/o lock is not held. 818 */ 819 void 820 page_io_wait(page_t *pp) 821 { 822 kmutex_t *pio; 823 824 pio = PAGE_IO_MUTEX(pp); 825 mutex_enter(pio); 826 while (pp->p_iolock_state & PAGE_IO_INUSE) { 827 cv_wait(&(pp->p_io_cv), pio); 828 } 829 mutex_exit(pio); 830 } 831 832 /* 833 * Returns 1 on success, 0 on failure. 834 */ 835 int 836 page_io_locked(page_t *pp) 837 { 838 return (pp->p_iolock_state & PAGE_IO_INUSE); 839 } 840 841 /* 842 * Assert that the i/o lock on a page is held. 843 * Returns 1 on success, 0 on failure. 844 */ 845 int 846 page_iolock_assert(page_t *pp) 847 { 848 return (page_io_locked(pp)); 849 } 850 851 /* 852 * Wrapper exported to kernel routines that are built 853 * platform-independent (the macro is platform-dependent; 854 * the size of vph_mutex[] is based on NCPU). 855 * 856 * Note that you can do stress testing on this by setting the 857 * variable page_vnode_mutex_stress to something other than 858 * zero in a DEBUG kernel in a debugger after loading the kernel. 859 * Setting it after the kernel is running may not work correctly. 860 */ 861 #ifdef DEBUG 862 static int page_vnode_mutex_stress = 0; 863 #endif 864 865 kmutex_t * 866 page_vnode_mutex(vnode_t *vp) 867 { 868 if (vp == &kvp) 869 return (&vph_mutex[VPH_TABLE_SIZE + 0]); 870 871 if (vp == &zvp) 872 return (&vph_mutex[VPH_TABLE_SIZE + 1]); 873 #ifdef DEBUG 874 if (page_vnode_mutex_stress != 0) 875 return (&vph_mutex[0]); 876 #endif 877 878 return (&vph_mutex[VP_HASH_FUNC(vp)]); 879 } 880 881 kmutex_t * 882 page_se_mutex(page_t *pp) 883 { 884 return (PAGE_SE_MUTEX(pp)); 885 } 886 887 #ifdef VM_STATS 888 uint_t pszclck_stat[4]; 889 #endif 890 /* 891 * Find, take and return a mutex held by hat_page_demote(). 892 * Called by page_demote_vp_pages() before hat_page_demote() call and by 893 * routines that want to block hat_page_demote() but can't do it 894 * via locking all constituent pages. 895 * 896 * Return NULL if p_szc is 0. 897 * 898 * It should only be used for pages that can be demoted by hat_page_demote() 899 * i.e. non swapfs file system pages. The logic here is lifted from 900 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase 901 * since the page is locked and not free. 902 * 903 * Hash of the root page is used to find the lock. 904 * To find the root in the presense of hat_page_demote() chageing the location 905 * of the root this routine relies on the fact that hat_page_demote() changes 906 * root last. 907 * 908 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is 909 * returned pp's p_szc may be any value. 910 */ 911 kmutex_t * 912 page_szc_lock(page_t *pp) 913 { 914 kmutex_t *mtx; 915 page_t *rootpp; 916 uint_t szc; 917 uint_t rszc; 918 uint_t pszc = pp->p_szc; 919 920 ASSERT(pp != NULL); 921 ASSERT(PAGE_LOCKED(pp)); 922 ASSERT(!PP_ISFREE(pp)); 923 ASSERT(pp->p_vnode != NULL); 924 ASSERT(!IS_SWAPFSVP(pp->p_vnode)); 925 ASSERT(!PP_ISKAS(pp)); 926 927 again: 928 if (pszc == 0) { 929 VM_STAT_ADD(pszclck_stat[0]); 930 return (NULL); 931 } 932 933 /* The lock lives in the root page */ 934 935 rootpp = PP_GROUPLEADER(pp, pszc); 936 mtx = PAGE_SZC_MUTEX(rootpp); 937 mutex_enter(mtx); 938 939 /* 940 * since p_szc can only decrease if pp == rootpp 941 * rootpp will be always the same i.e we have the right root 942 * regardless of rootpp->p_szc. 943 * If location of pp's root didn't change after we took 944 * the lock we have the right root. return mutex hashed off it. 945 */ 946 if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { 947 VM_STAT_ADD(pszclck_stat[1]); 948 return (mtx); 949 } 950 951 /* 952 * root location changed because page got demoted. 953 * locate the new root. 954 */ 955 if (rszc < pszc) { 956 szc = pp->p_szc; 957 ASSERT(szc < pszc); 958 mutex_exit(mtx); 959 pszc = szc; 960 VM_STAT_ADD(pszclck_stat[2]); 961 goto again; 962 } 963 964 VM_STAT_ADD(pszclck_stat[3]); 965 /* 966 * current hat_page_demote not done yet. 967 * wait for it to finish. 968 */ 969 mutex_exit(mtx); 970 rootpp = PP_GROUPLEADER(rootpp, rszc); 971 mtx = PAGE_SZC_MUTEX(rootpp); 972 mutex_enter(mtx); 973 mutex_exit(mtx); 974 ASSERT(rootpp->p_szc < rszc); 975 goto again; 976 } 977 978 int 979 page_szc_lock_assert(page_t *pp) 980 { 981 page_t *rootpp = PP_PAGEROOT(pp); 982 kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp); 983 984 return (MUTEX_HELD(mtx)); 985 } 986