1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * VM - page locking primitives 31 */ 32 #include <sys/param.h> 33 #include <sys/t_lock.h> 34 #include <sys/vtrace.h> 35 #include <sys/debug.h> 36 #include <sys/cmn_err.h> 37 #include <sys/vnode.h> 38 #include <sys/bitmap.h> 39 #include <sys/lockstat.h> 40 #include <sys/condvar_impl.h> 41 #include <vm/page.h> 42 #include <vm/seg_enum.h> 43 #include <vm/vm_dep.h> 44 45 /* 46 * This global mutex is for logical page locking. 47 * The following fields in the page structure are protected 48 * by this lock: 49 * 50 * p_lckcnt 51 * p_cowcnt 52 */ 53 kmutex_t page_llock; 54 55 /* 56 * This is a global lock for the logical page free list. The 57 * logical free list, in this implementation, is maintained as two 58 * separate physical lists - the cache list and the free list. 59 */ 60 kmutex_t page_freelock; 61 62 /* 63 * The hash table, page_hash[], the p_selock fields, and the 64 * list of pages associated with vnodes are protected by arrays of mutexes. 65 * 66 * Unless the hashes are changed radically, the table sizes must be 67 * a power of two. Also, we typically need more mutexes for the 68 * vnodes since these locks are occasionally held for long periods. 69 * And since there seem to be two special vnodes (kvp and swapvp), 70 * we make room for private mutexes for them. 71 * 72 * The pse_mutex[] array holds the mutexes to protect the p_selock 73 * fields of all page_t structures. 74 * 75 * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex 76 * when given a pointer to a page_t. 77 * 78 * PSE_TABLE_SIZE must be a power of two. One could argue that we 79 * should go to the trouble of setting it up at run time and base it 80 * on memory size rather than the number of compile time CPUs. 81 * 82 * XX64 We should be using physmem size to calculate PSE_TABLE_SIZE, 83 * PSE_SHIFT, PIO_SHIFT. 84 * 85 * These might break in 64 bit world. 86 */ 87 #define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */ 88 89 #define PSE_TABLE_SIZE 128 /* number of mutexes to have */ 90 91 #define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */ 92 #define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */ 93 94 pad_mutex_t ph_mutex[PH_TABLE_SIZE]; 95 pad_mutex_t pse_mutex[PSE_TABLE_SIZE]; 96 kmutex_t pio_mutex[PIO_TABLE_SIZE]; 97 98 #define PAGE_SE_MUTEX(pp) \ 99 &pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \ 100 ((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \ 101 (PSE_TABLE_SIZE - 1))].pad_mutex 102 103 #define PAGE_IO_MUTEX(pp) \ 104 &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)] 105 106 #define PSZC_MTX_TABLE_SIZE 128 107 #define PSZC_MTX_TABLE_SHIFT 7 108 109 static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; 110 111 #define PAGE_SZC_MUTEX(_pp) \ 112 &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \ 113 ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \ 114 ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \ 115 (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex 116 117 /* 118 * The vph_mutex[] array holds the mutexes to protect the vnode chains, 119 * (i.e., the list of pages anchored by v_pages and connected via p_vpprev 120 * and p_vpnext). 121 * 122 * The page_vnode_mutex(vp) function returns the address of the appropriate 123 * mutex from this array given a pointer to a vnode. It is complicated 124 * by the fact that the kernel's vnode and the swapfs vnode are referenced 125 * frequently enough to warrent their own mutexes. 126 * 127 * The VP_HASH_FUNC returns the index into the vph_mutex array given 128 * an address of a vnode. 129 */ 130 131 /* 132 * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. 133 * Need to review again. 134 */ 135 #define VPH_TABLE_SIZE (2 << VP_SHIFT) 136 137 #define VP_HASH_FUNC(vp) \ 138 ((((uintptr_t)(vp) >> 6) + \ 139 ((uintptr_t)(vp) >> 8) + \ 140 ((uintptr_t)(vp) >> 10) + \ 141 ((uintptr_t)(vp) >> 12)) \ 142 & (VPH_TABLE_SIZE - 1)) 143 144 extern struct vnode kvp; 145 146 kmutex_t vph_mutex[VPH_TABLE_SIZE + 2]; 147 148 /* 149 * Initialize the locks used by the Virtual Memory Management system. 150 */ 151 void 152 page_lock_init() 153 { 154 } 155 156 /* 157 * At present we only use page ownership to aid debugging, so it's 158 * OK if the owner field isn't exact. In the 32-bit world two thread ids 159 * can map to the same owner because we just 'or' in 0x80000000 and 160 * then clear the second highest bit, so that (for example) 0x2faced00 161 * and 0xafaced00 both map to 0xafaced00. 162 * In the 64-bit world, p_selock may not be large enough to hold a full 163 * thread pointer. If we ever need precise ownership (e.g. if we implement 164 * priority inheritance for page locks) then p_selock should become a 165 * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2). 166 */ 167 #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED) 168 #define SE_READER 1 169 170 /* 171 * A page that is deleted must be marked as such using the 172 * page_lock_delete() function. The page must be exclusively locked. 173 * The SE_DELETED marker is put in p_selock when this function is called. 174 * SE_DELETED must be distinct from any SE_WRITER value. 175 */ 176 #define SE_DELETED (1 | INT_MIN) 177 178 #ifdef VM_STATS 179 uint_t vph_kvp_count; 180 uint_t vph_swapfsvp_count; 181 uint_t vph_other; 182 #endif /* VM_STATS */ 183 184 #ifdef VM_STATS 185 uint_t page_lock_count; 186 uint_t page_lock_miss; 187 uint_t page_lock_miss_lock; 188 uint_t page_lock_reclaim; 189 uint_t page_lock_bad_reclaim; 190 uint_t page_lock_same_page; 191 uint_t page_lock_upgrade; 192 uint_t page_lock_upgrade_failed; 193 uint_t page_lock_deleted; 194 195 uint_t page_trylock_locked; 196 uint_t page_trylock_missed; 197 198 uint_t page_try_reclaim_upgrade; 199 #endif /* VM_STATS */ 200 201 202 /* 203 * Acquire the "shared/exclusive" lock on a page. 204 * 205 * Returns 1 on success and locks the page appropriately. 206 * 0 on failure and does not lock the page. 207 * 208 * If `lock' is non-NULL, it will be dropped and reacquired in the 209 * failure case. This routine can block, and if it does 210 * it will always return a failure since the page identity [vp, off] 211 * or state may have changed. 212 */ 213 214 int 215 page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim) 216 { 217 return (page_lock_es(pp, se, lock, reclaim, 0)); 218 } 219 220 /* 221 * With the addition of reader-writer lock semantics to page_lock_es, 222 * callers wanting an exclusive (writer) lock may prevent shared-lock 223 * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. 224 * In this case, when an exclusive lock cannot be acquired, p_selock's 225 * SE_EWANTED bit is set. 226 * This bit, along with the se and es parameters, are used to decide 227 * if the requested lock should be granted: 228 * 229 * Lock wanted SE_EXCL_WANTED p_selock/SE_EWANTED Action 230 * ---------- -------------- ------------------- --------- 231 * SE_EXCL no dont-care/1 deny lock 232 * SE_EXCL any(see note) unlocked/any grant lock, clear SE_EWANTED 233 * SE_EXCL yes any lock/any deny, set SE_EWANTED 234 * SE_EXCL no any lock/any deny 235 * SE_SHARED not applicable shared/0 grant 236 * SE_SHARED not applicable unlocked/0 grant 237 * SE_SHARED not applicable shared/1 deny 238 * SE_SHARED not applicable unlocked/1 deny 239 * SE_SHARED not applicable excl/any deny 240 * 241 * Note: the code grants an exclusive lock to the caller and clears 242 * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED 243 * bit's value. This was deemed acceptable as we are not concerned about 244 * exclusive-lock starvation. If this ever becomes an issue, a priority or 245 * fifo mechanism should also be implemented. 246 */ 247 int 248 page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) 249 { 250 int retval; 251 kmutex_t *pse = PAGE_SE_MUTEX(pp); 252 int upgraded; 253 int reclaim_it; 254 255 ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); 256 257 VM_STAT_ADD(page_lock_count); 258 259 upgraded = 0; 260 reclaim_it = 0; 261 262 mutex_enter(pse); 263 264 /* 265 * Current uses of 'es': 266 * es == 1 page_lookup_create will attempt page relocation 267 * es == SE_EXCL_WANTED caller wants SE_EWANTED set (eg. delete 268 * memory thread); this prevents reader-starvation of waiting 269 * writer thread(s). 270 */ 271 272 273 ASSERT(((es & SE_EXCL_WANTED) == 0) || 274 ((es == SE_EXCL_WANTED) && (se == SE_EXCL))); 275 276 if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { 277 se = SE_EXCL; 278 } 279 280 if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { 281 282 reclaim_it = 1; 283 if (se == SE_SHARED) { 284 /* 285 * This is an interesting situation. 286 * 287 * Remember that p_free can only change if 288 * p_selock < 0. 289 * p_free does not depend on our holding `pse'. 290 * And, since we hold `pse', p_selock can not change. 291 * So, if p_free changes on us, the page is already 292 * exclusively held, and we would fail to get p_selock 293 * regardless. 294 * 295 * We want to avoid getting the share 296 * lock on a free page that needs to be reclaimed. 297 * It is possible that some other thread has the share 298 * lock and has left the free page on the cache list. 299 * pvn_vplist_dirty() does this for brief periods. 300 * If the se_share is currently SE_EXCL, we will fail 301 * to acquire p_selock anyway. Blocking is the 302 * right thing to do. 303 * If we need to reclaim this page, we must get 304 * exclusive access to it, force the upgrade now. 305 * Again, we will fail to acquire p_selock if the 306 * page is not free and block. 307 */ 308 upgraded = 1; 309 se = SE_EXCL; 310 VM_STAT_ADD(page_lock_upgrade); 311 } 312 } 313 314 if (se == SE_EXCL) { 315 if ((es != SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { 316 /* 317 * if the caller wants a writer lock (but did not 318 * specify exclusive access), and there is a pending 319 * writer that wants exclusive access, return failure 320 */ 321 retval = 0; 322 } else if ((pp->p_selock & ~SE_EWANTED) == 0) { 323 /* no reader/writer lock held */ 324 THREAD_KPRI_REQUEST(); 325 /* this clears our setting of the SE_EWANTED bit */ 326 pp->p_selock = SE_WRITER; 327 retval = 1; 328 } else { 329 /* page is locked */ 330 if (es == SE_EXCL_WANTED) { 331 /* set the SE_EWANTED bit */ 332 pp->p_selock |= SE_EWANTED; 333 } 334 retval = 0; 335 } 336 } else { 337 retval = 0; 338 if (pp->p_selock >= 0) { 339 /* readers are not allowed when excl wanted */ 340 if (!(pp->p_selock & SE_EWANTED)) { 341 pp->p_selock += SE_READER; 342 retval = 1; 343 } 344 } 345 } 346 347 if (retval == 0) { 348 if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { 349 VM_STAT_ADD(page_lock_deleted); 350 mutex_exit(pse); 351 return (retval); 352 } 353 354 #ifdef VM_STATS 355 VM_STAT_ADD(page_lock_miss); 356 if (upgraded) { 357 VM_STAT_ADD(page_lock_upgrade_failed); 358 } 359 #endif 360 if (lock) { 361 VM_STAT_ADD(page_lock_miss_lock); 362 mutex_exit(lock); 363 } 364 365 /* 366 * Now, wait for the page to be unlocked and 367 * release the lock protecting p_cv and p_selock. 368 */ 369 cv_wait(&pp->p_cv, pse); 370 mutex_exit(pse); 371 372 /* 373 * The page identity may have changed while we were 374 * blocked. If we are willing to depend on "pp" 375 * still pointing to a valid page structure (i.e., 376 * assuming page structures are not dynamically allocated 377 * or freed), we could try to lock the page if its 378 * identity hasn't changed. 379 * 380 * This needs to be measured, since we come back from 381 * cv_wait holding pse (the expensive part of this 382 * operation) we might as well try the cheap part. 383 * Though we would also have to confirm that dropping 384 * `lock' did not cause any grief to the callers. 385 */ 386 if (lock) { 387 mutex_enter(lock); 388 } 389 } else { 390 /* 391 * We have the page lock. 392 * If we needed to reclaim the page, and the page 393 * needed reclaiming (ie, it was free), then we 394 * have the page exclusively locked. We may need 395 * to downgrade the page. 396 */ 397 ASSERT((upgraded) ? 398 ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); 399 mutex_exit(pse); 400 401 /* 402 * We now hold this page's lock, either shared or 403 * exclusive. This will prevent its identity from changing. 404 * The page, however, may or may not be free. If the caller 405 * requested, and it is free, go reclaim it from the 406 * free list. If the page can't be reclaimed, return failure 407 * so that the caller can start all over again. 408 * 409 * NOTE:page_reclaim() releases the page lock (p_selock) 410 * if it can't be reclaimed. 411 */ 412 if (reclaim_it) { 413 if (!page_reclaim(pp, lock)) { 414 VM_STAT_ADD(page_lock_bad_reclaim); 415 retval = 0; 416 } else { 417 VM_STAT_ADD(page_lock_reclaim); 418 if (upgraded) { 419 page_downgrade(pp); 420 } 421 } 422 } 423 } 424 return (retval); 425 } 426 427 /* 428 * Clear the SE_EWANTED bit from p_selock. This function allows 429 * callers of page_lock_es and page_try_reclaim_lock to clear 430 * their setting of this bit if they decide they no longer wish 431 * to gain exclusive access to the page. Currently only 432 * delete_memory_thread uses this when the delete memory 433 * operation is cancelled. 434 */ 435 void 436 page_lock_clr_exclwanted(page_t *pp) 437 { 438 kmutex_t *pse = PAGE_SE_MUTEX(pp); 439 440 mutex_enter(pse); 441 pp->p_selock &= ~SE_EWANTED; 442 if (CV_HAS_WAITERS(&pp->p_cv)) 443 cv_broadcast(&pp->p_cv); 444 mutex_exit(pse); 445 } 446 447 /* 448 * Read the comments inside of page_lock_es() carefully. 449 * 450 * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the 451 * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. 452 * This is used by threads subject to reader-starvation (eg. memory delete). 453 * 454 * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, 455 * it is expected that it will retry at a later time. Threads that will 456 * not retry the lock *must* call page_lock_clr_exclwanted to clear the 457 * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, 458 * the bit is cleared.) 459 */ 460 int 461 page_try_reclaim_lock(page_t *pp, se_t se, int es) 462 { 463 kmutex_t *pse = PAGE_SE_MUTEX(pp); 464 selock_t old; 465 466 mutex_enter(pse); 467 468 old = pp->p_selock; 469 470 ASSERT(((es & SE_EXCL_WANTED) == 0) || 471 ((es == SE_EXCL_WANTED) && (se == SE_EXCL))); 472 473 if (se == SE_SHARED && es == 1 && old == 0) { 474 se = SE_EXCL; 475 } 476 477 if (se == SE_SHARED) { 478 if (!PP_ISFREE(pp)) { 479 if (old >= 0) { 480 /* readers are not allowed when excl wanted */ 481 if (!(old & SE_EWANTED)) { 482 pp->p_selock = old + SE_READER; 483 mutex_exit(pse); 484 return (1); 485 } 486 } 487 mutex_exit(pse); 488 return (0); 489 } 490 /* 491 * The page is free, so we really want SE_EXCL (below) 492 */ 493 VM_STAT_ADD(page_try_reclaim_upgrade); 494 } 495 496 /* 497 * The caller wants a writer lock. We try for it only if 498 * SE_EWANTED is not set, or if the caller specified 499 * SE_EXCL_WANTED. 500 */ 501 if (!(old & SE_EWANTED) || (es == SE_EXCL_WANTED)) { 502 if ((old & ~SE_EWANTED) == 0) { 503 /* no reader/writer lock held */ 504 THREAD_KPRI_REQUEST(); 505 /* this clears out our setting of the SE_EWANTED bit */ 506 pp->p_selock = SE_WRITER; 507 mutex_exit(pse); 508 return (1); 509 } 510 } 511 if (es == SE_EXCL_WANTED) { 512 /* page is locked, set the SE_EWANTED bit */ 513 pp->p_selock |= SE_EWANTED; 514 } 515 mutex_exit(pse); 516 return (0); 517 } 518 519 /* 520 * Acquire a page's "shared/exclusive" lock, but never block. 521 * Returns 1 on success, 0 on failure. 522 */ 523 int 524 page_trylock(page_t *pp, se_t se) 525 { 526 kmutex_t *pse = PAGE_SE_MUTEX(pp); 527 528 mutex_enter(pse); 529 if (pp->p_selock & SE_EWANTED) { 530 /* fail if a thread wants exclusive access */ 531 mutex_exit(pse); 532 return (0); 533 } 534 535 if (se == SE_EXCL) { 536 if (pp->p_selock == 0) { 537 THREAD_KPRI_REQUEST(); 538 pp->p_selock = SE_WRITER; 539 mutex_exit(pse); 540 return (1); 541 } 542 } else { 543 if (pp->p_selock >= 0) { 544 pp->p_selock += SE_READER; 545 mutex_exit(pse); 546 return (1); 547 } 548 } 549 mutex_exit(pse); 550 return (0); 551 } 552 553 /* 554 * Release the page's "shared/exclusive" lock and wake up anyone 555 * who might be waiting for it. 556 */ 557 void 558 page_unlock(page_t *pp) 559 { 560 kmutex_t *pse = PAGE_SE_MUTEX(pp); 561 selock_t old; 562 563 mutex_enter(pse); 564 old = pp->p_selock; 565 if ((old & ~SE_EWANTED) == SE_READER) { 566 pp->p_selock = old & ~SE_READER; 567 if (CV_HAS_WAITERS(&pp->p_cv)) 568 cv_broadcast(&pp->p_cv); 569 } else if ((old & ~SE_EWANTED) == SE_DELETED) { 570 panic("page_unlock: page %p is deleted", pp); 571 } else if (old < 0) { 572 THREAD_KPRI_RELEASE(); 573 pp->p_selock &= SE_EWANTED; 574 if (CV_HAS_WAITERS(&pp->p_cv)) 575 cv_broadcast(&pp->p_cv); 576 } else if ((old & ~SE_EWANTED) > SE_READER) { 577 pp->p_selock = old - SE_READER; 578 } else { 579 panic("page_unlock: page %p is not locked", pp); 580 } 581 mutex_exit(pse); 582 } 583 584 /* 585 * Try to upgrade the lock on the page from a "shared" to an 586 * "exclusive" lock. Since this upgrade operation is done while 587 * holding the mutex protecting this page, no one else can acquire this page's 588 * lock and change the page. Thus, it is safe to drop the "shared" 589 * lock and attempt to acquire the "exclusive" lock. 590 * 591 * Returns 1 on success, 0 on failure. 592 */ 593 int 594 page_tryupgrade(page_t *pp) 595 { 596 kmutex_t *pse = PAGE_SE_MUTEX(pp); 597 598 mutex_enter(pse); 599 if (!(pp->p_selock & SE_EWANTED)) { 600 /* no threads want exclusive access, try upgrade */ 601 if (pp->p_selock == SE_READER) { 602 THREAD_KPRI_REQUEST(); 603 /* convert to exclusive lock */ 604 pp->p_selock = SE_WRITER; 605 mutex_exit(pse); 606 return (1); 607 } 608 } 609 mutex_exit(pse); 610 return (0); 611 } 612 613 /* 614 * Downgrade the "exclusive" lock on the page to a "shared" lock 615 * while holding the mutex protecting this page's p_selock field. 616 */ 617 void 618 page_downgrade(page_t *pp) 619 { 620 kmutex_t *pse = PAGE_SE_MUTEX(pp); 621 int excl_waiting; 622 623 ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED); 624 ASSERT(PAGE_EXCL(pp)); 625 626 mutex_enter(pse); 627 excl_waiting = pp->p_selock & SE_EWANTED; 628 THREAD_KPRI_RELEASE(); 629 pp->p_selock = SE_READER | excl_waiting; 630 if (CV_HAS_WAITERS(&pp->p_cv)) 631 cv_broadcast(&pp->p_cv); 632 mutex_exit(pse); 633 } 634 635 void 636 page_lock_delete(page_t *pp) 637 { 638 kmutex_t *pse = PAGE_SE_MUTEX(pp); 639 640 ASSERT(PAGE_EXCL(pp)); 641 ASSERT(pp->p_vnode == NULL); 642 ASSERT(pp->p_offset == (u_offset_t)-1); 643 ASSERT(!PP_ISFREE(pp)); 644 645 mutex_enter(pse); 646 THREAD_KPRI_RELEASE(); 647 pp->p_selock = SE_DELETED; 648 if (CV_HAS_WAITERS(&pp->p_cv)) 649 cv_broadcast(&pp->p_cv); 650 mutex_exit(pse); 651 } 652 653 /* 654 * Implement the io lock for pages 655 */ 656 void 657 page_iolock_init(page_t *pp) 658 { 659 pp->p_iolock_state = 0; 660 cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL); 661 } 662 663 /* 664 * Acquire the i/o lock on a page. 665 */ 666 void 667 page_io_lock(page_t *pp) 668 { 669 kmutex_t *pio; 670 671 pio = PAGE_IO_MUTEX(pp); 672 mutex_enter(pio); 673 while (pp->p_iolock_state & PAGE_IO_INUSE) { 674 cv_wait(&(pp->p_io_cv), pio); 675 } 676 pp->p_iolock_state |= PAGE_IO_INUSE; 677 mutex_exit(pio); 678 } 679 680 /* 681 * Release the i/o lock on a page. 682 */ 683 void 684 page_io_unlock(page_t *pp) 685 { 686 kmutex_t *pio; 687 688 pio = PAGE_IO_MUTEX(pp); 689 mutex_enter(pio); 690 cv_signal(&pp->p_io_cv); 691 pp->p_iolock_state &= ~PAGE_IO_INUSE; 692 mutex_exit(pio); 693 } 694 695 /* 696 * Try to acquire the i/o lock on a page without blocking. 697 * Returns 1 on success, 0 on failure. 698 */ 699 int 700 page_io_trylock(page_t *pp) 701 { 702 kmutex_t *pio; 703 704 if (pp->p_iolock_state & PAGE_IO_INUSE) 705 return (0); 706 707 pio = PAGE_IO_MUTEX(pp); 708 mutex_enter(pio); 709 710 if (pp->p_iolock_state & PAGE_IO_INUSE) { 711 mutex_exit(pio); 712 return (0); 713 } 714 pp->p_iolock_state |= PAGE_IO_INUSE; 715 mutex_exit(pio); 716 717 return (1); 718 } 719 720 /* 721 * Assert that the i/o lock on a page is held. 722 * Returns 1 on success, 0 on failure. 723 */ 724 int 725 page_iolock_assert(page_t *pp) 726 { 727 return (pp->p_iolock_state & PAGE_IO_INUSE); 728 } 729 730 /* 731 * Wrapper exported to kernel routines that are built 732 * platform-independent (the macro is platform-dependent; 733 * the size of vph_mutex[] is based on NCPU). 734 * 735 * Note that you can do stress testing on this by setting the 736 * variable page_vnode_mutex_stress to something other than 737 * zero in a DEBUG kernel in a debugger after loading the kernel. 738 * Setting it after the kernel is running may not work correctly. 739 */ 740 #ifdef DEBUG 741 static int page_vnode_mutex_stress = 0; 742 #endif 743 744 kmutex_t * 745 page_vnode_mutex(vnode_t *vp) 746 { 747 if (vp == &kvp) 748 return (&vph_mutex[VPH_TABLE_SIZE + 0]); 749 #ifdef DEBUG 750 if (page_vnode_mutex_stress != 0) 751 return (&vph_mutex[0]); 752 #endif 753 754 return (&vph_mutex[VP_HASH_FUNC(vp)]); 755 } 756 757 kmutex_t * 758 page_se_mutex(page_t *pp) 759 { 760 return (PAGE_SE_MUTEX(pp)); 761 } 762 763 #ifdef VM_STATS 764 uint_t pszclck_stat[4]; 765 #endif 766 /* 767 * Find, take and return a mutex held by hat_page_demote(). 768 * Called by page_demote_vp_pages() before hat_page_demote() call and by 769 * routines that want to block hat_page_demote() but can't do it 770 * via locking all constituent pages. 771 * 772 * Return NULL if p_szc is 0. 773 * 774 * It should only be used for pages that can be demoted by hat_page_demote() 775 * i.e. non swapfs file system pages. The logic here is lifted from 776 * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase 777 * since the page is locked and not free. 778 * 779 * Hash of the root page is used to find the lock. 780 * To find the root in the presense of hat_page_demote() chageing the location 781 * of the root this routine relies on the fact that hat_page_demote() changes 782 * root last. 783 * 784 * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is 785 * returned pp's p_szc may be any value. 786 */ 787 kmutex_t * 788 page_szc_lock(page_t *pp) 789 { 790 kmutex_t *mtx; 791 page_t *rootpp; 792 uint_t szc; 793 uint_t rszc; 794 uint_t pszc = pp->p_szc; 795 796 ASSERT(pp != NULL); 797 ASSERT(PAGE_LOCKED(pp)); 798 ASSERT(!PP_ISFREE(pp)); 799 ASSERT(pp->p_vnode != NULL); 800 ASSERT(!IS_SWAPFSVP(pp->p_vnode)); 801 ASSERT(pp->p_vnode != &kvp); 802 803 again: 804 if (pszc == 0) { 805 VM_STAT_ADD(pszclck_stat[0]); 806 return (NULL); 807 } 808 809 /* The lock lives in the root page */ 810 811 rootpp = PP_GROUPLEADER(pp, pszc); 812 mtx = PAGE_SZC_MUTEX(rootpp); 813 mutex_enter(mtx); 814 815 /* 816 * since p_szc can only decrease if pp == rootpp 817 * rootpp will be always the same i.e we have the right root 818 * regardless of rootpp->p_szc. 819 * If location of pp's root didn't change after we took 820 * the lock we have the right root. return mutex hashed off it. 821 */ 822 if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { 823 VM_STAT_ADD(pszclck_stat[1]); 824 return (mtx); 825 } 826 827 /* 828 * root location changed because page got demoted. 829 * locate the new root. 830 */ 831 if (rszc < pszc) { 832 szc = pp->p_szc; 833 ASSERT(szc < pszc); 834 mutex_exit(mtx); 835 pszc = szc; 836 VM_STAT_ADD(pszclck_stat[2]); 837 goto again; 838 } 839 840 VM_STAT_ADD(pszclck_stat[3]); 841 /* 842 * current hat_page_demote not done yet. 843 * wait for it to finish. 844 */ 845 mutex_exit(mtx); 846 rootpp = PP_GROUPLEADER(rootpp, rszc); 847 mtx = PAGE_SZC_MUTEX(rootpp); 848 mutex_enter(mtx); 849 mutex_exit(mtx); 850 ASSERT(rootpp->p_szc < rszc); 851 goto again; 852 } 853 854 int 855 page_szc_lock_assert(page_t *pp) 856 { 857 page_t *rootpp = PP_PAGEROOT(pp); 858 kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp); 859 860 return (MUTEX_HELD(mtx)); 861 } 862