1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Big Theory Statement for turnstiles. 31 * 32 * Turnstiles provide blocking and wakeup support, including priority 33 * inheritance, for synchronization primitives (e.g. mutexes and rwlocks). 34 * Typical usage is as follows: 35 * 36 * To block on lock 'lp' for read access in foo_enter(): 37 * 38 * ts = turnstile_lookup(lp); 39 * [ If the lock is still held, set the waiters bit 40 * turnstile_block(ts, TS_READER_Q, lp, &foo_sobj_ops); 41 * 42 * To wake threads waiting for write access to lock 'lp' in foo_exit(): 43 * 44 * ts = turnstile_lookup(lp); 45 * [ Either drop the lock (change owner to NULL) or perform a direct 46 * [ handoff (change owner to one of the threads we're about to wake). 47 * [ If we're going to wake the last waiter, clear the waiters bit. 48 * turnstile_wakeup(ts, TS_WRITER_Q, nwaiters, new_owner or NULL); 49 * 50 * turnstile_lookup() returns holding the turnstile hash chain lock for lp. 51 * Both turnstile_block() and turnstile_wakeup() drop the turnstile lock. 52 * To abort a turnstile operation, the client must call turnstile_exit(). 53 * 54 * Requirements of the client: 55 * 56 * (1) The lock's waiters indicator may be manipulated *only* while 57 * holding the turnstile hash chain lock (i.e. under turnstile_lookup()). 58 * 59 * (2) Once the lock is marked as having waiters, the owner may be 60 * changed *only* while holding the turnstile hash chain lock. 61 * 62 * (3) The caller must never block on an unheld lock. 63 * 64 * Consequences of these assumptions include the following: 65 * 66 * (a) It is impossible for a lock to be unheld but have waiters. 67 * 68 * (b) The priority inheritance code can safely assume that an active 69 * turnstile's ts_inheritor never changes until the inheritor calls 70 * turnstile_pi_waive(). 71 * 72 * These assumptions simplify the implementation of both turnstiles and 73 * their clients. 74 * 75 * Background on priority inheritance: 76 * 77 * Priority inheritance allows a thread to "will" its dispatch priority 78 * to all the threads blocking it, directly or indirectly. This prevents 79 * situations called priority inversions in which a high-priority thread 80 * needs a lock held by a low-priority thread, which cannot run because 81 * of medium-priority threads. Without PI, the medium-priority threads 82 * can starve out the high-priority thread indefinitely. With PI, the 83 * low-priority thread becomes high-priority until it releases whatever 84 * synchronization object the real high-priority thread is waiting for. 85 * 86 * How turnstiles work: 87 * 88 * All active turnstiles reside in a global hash table, turnstile_table[]. 89 * The address of a synchronization object determines its hash index. 90 * Each hash chain is protected by its own dispatcher lock, acquired 91 * by turnstile_lookup(). This lock protects the hash chain linkage, the 92 * contents of all turnstiles on the hash chain, and the waiters bits of 93 * every synchronization object in the system that hashes to the same chain. 94 * Giving the lock such broad scope simplifies the interactions between 95 * the turnstile code and its clients considerably. The blocking path 96 * is rare enough that this has no impact on scalability. (If it ever 97 * does, it's almost surely a second-order effect -- the real problem 98 * is that some synchronization object is *very* heavily contended.) 99 * 100 * Each thread has an attached turnstile in case it needs to block. 101 * A thread cannot block on more than one lock at a time, so one 102 * turnstile per thread is the most we ever need. The first thread 103 * to block on a lock donates its attached turnstile and adds it to 104 * the appropriate hash chain in turnstile_table[]. This becomes the 105 * "active turnstile" for the lock. Each subsequent thread that blocks 106 * on the same lock discovers that the lock already has an active 107 * turnstile, so it stashes its own turnstile on the active turnstile's 108 * freelist. As threads wake up, the process is reversed. 109 * 110 * turnstile_block() puts the current thread to sleep on the active 111 * turnstile for the desired lock, walks the blocking chain to apply 112 * priority inheritance to everyone in its way, and yields the CPU. 113 * 114 * turnstile_wakeup() waives any priority the owner may have inherited 115 * and wakes the specified number of waiting threads. If the caller is 116 * doing direct handoff of ownership (rather than just dropping the lock), 117 * the new owner automatically inherits priority from any existing waiters. 118 */ 119 120 #include <sys/param.h> 121 #include <sys/systm.h> 122 #include <sys/thread.h> 123 #include <sys/proc.h> 124 #include <sys/debug.h> 125 #include <sys/cpuvar.h> 126 #include <sys/turnstile.h> 127 #include <sys/t_lock.h> 128 #include <sys/disp.h> 129 #include <sys/sobject.h> 130 #include <sys/cmn_err.h> 131 #include <sys/sysmacros.h> 132 #include <sys/lockstat.h> 133 #include <sys/lwp_upimutex_impl.h> 134 #include <sys/schedctl.h> 135 #include <sys/cpu.h> 136 #include <sys/sdt.h> 137 #include <sys/cpupart.h> 138 139 extern upib_t upimutextab[UPIMUTEX_TABSIZE]; 140 141 #define IS_UPI(sobj) \ 142 ((uintptr_t)(sobj) - (uintptr_t)upimutextab < sizeof (upimutextab)) 143 144 /* 145 * The turnstile hash table is partitioned into two halves: the lower half 146 * is used for upimutextab[] locks, the upper half for everything else. 147 * The reason for the distinction is that SOBJ_USER_PI locks present a 148 * unique problem: the upimutextab[] lock passed to turnstile_block() 149 * cannot be dropped until the calling thread has blocked on its 150 * SOBJ_USER_PI lock and willed its priority down the blocking chain. 151 * At that point, the caller's t_lockp will be one of the turnstile locks. 152 * If mutex_exit() discovers that the upimutextab[] lock has waiters, it 153 * must wake them, which forces a lock ordering on us: the turnstile lock 154 * for the upimutextab[] lock will be acquired in mutex_vector_exit(), 155 * which will eventually call into turnstile_pi_waive(), which will then 156 * acquire the caller's thread lock, which in this case is the turnstile 157 * lock for the SOBJ_USER_PI lock. In general, when two turnstile locks 158 * must be held at the same time, the lock order must be the address order. 159 * Therefore, to prevent deadlock in turnstile_pi_waive(), we must ensure 160 * that upimutextab[] locks *always* hash to lower addresses than any 161 * other locks. You think this is cheesy? Let's see you do better. 162 */ 163 #define TURNSTILE_HASH_SIZE 128 /* must be power of 2 */ 164 #define TURNSTILE_HASH_MASK (TURNSTILE_HASH_SIZE - 1) 165 #define TURNSTILE_SOBJ_HASH(sobj) \ 166 ((((ulong_t)sobj >> 2) + ((ulong_t)sobj >> 9)) & TURNSTILE_HASH_MASK) 167 #define TURNSTILE_SOBJ_BUCKET(sobj) \ 168 ((IS_UPI(sobj) ? 0 : TURNSTILE_HASH_SIZE) + TURNSTILE_SOBJ_HASH(sobj)) 169 #define TURNSTILE_CHAIN(sobj) turnstile_table[TURNSTILE_SOBJ_BUCKET(sobj)] 170 171 typedef struct turnstile_chain { 172 turnstile_t *tc_first; /* first turnstile on hash chain */ 173 disp_lock_t tc_lock; /* lock for this hash chain */ 174 } turnstile_chain_t; 175 176 turnstile_chain_t turnstile_table[2 * TURNSTILE_HASH_SIZE]; 177 178 static lock_t turnstile_loser_lock; 179 180 /* 181 * Make 'inheritor' inherit priority from this turnstile. 182 */ 183 static void 184 turnstile_pi_inherit(turnstile_t *ts, kthread_t *inheritor, pri_t epri) 185 { 186 ASSERT(THREAD_LOCK_HELD(inheritor)); 187 ASSERT(DISP_LOCK_HELD(&TURNSTILE_CHAIN(ts->ts_sobj).tc_lock)); 188 189 if (epri <= inheritor->t_pri) 190 return; 191 192 if (ts->ts_inheritor == NULL) { 193 ts->ts_inheritor = inheritor; 194 ts->ts_epri = epri; 195 disp_lock_enter_high(&inheritor->t_pi_lock); 196 ts->ts_prioinv = inheritor->t_prioinv; 197 inheritor->t_prioinv = ts; 198 disp_lock_exit_high(&inheritor->t_pi_lock); 199 } else { 200 /* 201 * 'inheritor' is already inheriting from this turnstile, 202 * so just adjust its priority. 203 */ 204 ASSERT(ts->ts_inheritor == inheritor); 205 if (ts->ts_epri < epri) 206 ts->ts_epri = epri; 207 } 208 209 if (epri > DISP_PRIO(inheritor)) 210 thread_change_epri(inheritor, epri); 211 } 212 213 /* 214 * If turnstile is non-NULL, remove it from inheritor's t_prioinv list. 215 * Compute new inherited priority, and return it. 216 */ 217 static pri_t 218 turnstile_pi_tsdelete(turnstile_t *ts, kthread_t *inheritor) 219 { 220 turnstile_t **tspp, *tsp; 221 pri_t new_epri = 0; 222 223 disp_lock_enter_high(&inheritor->t_pi_lock); 224 tspp = &inheritor->t_prioinv; 225 while ((tsp = *tspp) != NULL) { 226 if (tsp == ts) 227 *tspp = tsp->ts_prioinv; 228 else 229 new_epri = MAX(new_epri, tsp->ts_epri); 230 tspp = &tsp->ts_prioinv; 231 } 232 disp_lock_exit_high(&inheritor->t_pi_lock); 233 return (new_epri); 234 } 235 236 /* 237 * Remove turnstile from inheritor's t_prioinv list, compute 238 * new priority, and change the inheritor's effective priority if 239 * necessary. Keep in synch with turnstile_pi_recalc(). 240 */ 241 static void 242 turnstile_pi_waive(turnstile_t *ts) 243 { 244 kthread_t *inheritor = ts->ts_inheritor; 245 pri_t new_epri; 246 247 ASSERT(inheritor == curthread); 248 249 thread_lock_high(inheritor); 250 new_epri = turnstile_pi_tsdelete(ts, inheritor); 251 if (new_epri != DISP_PRIO(inheritor)) 252 thread_change_epri(inheritor, new_epri); 253 ts->ts_inheritor = NULL; 254 if (DISP_MUST_SURRENDER(inheritor)) 255 cpu_surrender(inheritor); 256 thread_unlock_high(inheritor); 257 } 258 259 /* 260 * Compute caller's new inherited priority, and change its effective 261 * priority if necessary. Necessary only for SOBJ_USER_PI, because of 262 * its interruptibility characteristic. 263 */ 264 void 265 turnstile_pi_recalc(void) 266 { 267 kthread_t *inheritor = curthread; 268 pri_t new_epri; 269 270 thread_lock(inheritor); 271 new_epri = turnstile_pi_tsdelete(NULL, inheritor); 272 if (new_epri != DISP_PRIO(inheritor)) 273 thread_change_epri(inheritor, new_epri); 274 if (DISP_MUST_SURRENDER(inheritor)) 275 cpu_surrender(inheritor); 276 thread_unlock(inheritor); 277 } 278 279 /* 280 * Grab the lock protecting the hash chain for sobj 281 * and return the active turnstile for sobj, if any. 282 */ 283 turnstile_t * 284 turnstile_lookup(void *sobj) 285 { 286 turnstile_t *ts; 287 turnstile_chain_t *tc = &TURNSTILE_CHAIN(sobj); 288 289 disp_lock_enter(&tc->tc_lock); 290 291 for (ts = tc->tc_first; ts != NULL; ts = ts->ts_next) 292 if (ts->ts_sobj == sobj) 293 break; 294 295 return (ts); 296 } 297 298 /* 299 * Drop the lock protecting the hash chain for sobj. 300 */ 301 void 302 turnstile_exit(void *sobj) 303 { 304 disp_lock_exit(&TURNSTILE_CHAIN(sobj).tc_lock); 305 } 306 307 /* 308 * When we apply priority inheritance, we must grab the owner's thread lock 309 * while already holding the waiter's thread lock. If both thread locks are 310 * turnstile locks, this can lead to deadlock: while we hold L1 and try to 311 * grab L2, some unrelated thread may be applying priority inheritance to 312 * some other blocking chain, holding L2 and trying to grab L1. The most 313 * obvious solution -- do a lock_try() for the owner lock -- isn't quite 314 * sufficient because it can cause livelock: each thread may hold one lock, 315 * try to grab the other, fail, bail out, and try again, looping forever. 316 * To prevent livelock we must define a winner, i.e. define an arbitrary 317 * lock ordering on the turnstile locks. For simplicity we declare that 318 * virtual address order defines lock order, i.e. if L1 < L2, then the 319 * correct lock ordering is L1, L2. Thus the thread that holds L1 and 320 * wants L2 should spin until L2 is available, but the thread that holds 321 * L2 and can't get L1 on the first try must drop L2 and return failure. 322 * Moreover, the losing thread must not reacquire L2 until the winning 323 * thread has had a chance to grab it; to ensure this, the losing thread 324 * must grab L1 after dropping L2, thus spinning until the winner is done. 325 * Complicating matters further, note that the owner's thread lock pointer 326 * can change (i.e. be pointed at a different lock) while we're trying to 327 * grab it. If that happens, we must unwind our state and try again. 328 * 329 * On success, returns 1 with both locks held. 330 * On failure, returns 0 with neither lock held. 331 */ 332 static int 333 turnstile_interlock(lock_t *wlp, lock_t *volatile *olpp) 334 { 335 ASSERT(LOCK_HELD(wlp)); 336 337 for (;;) { 338 volatile lock_t *olp = *olpp; 339 340 /* 341 * If the locks are identical, there's nothing to do. 342 */ 343 if (olp == wlp) 344 return (1); 345 if (lock_try((lock_t *)olp)) { 346 /* 347 * If 'olp' is still the right lock, return success. 348 * Otherwise, drop 'olp' and try the dance again. 349 */ 350 if (olp == *olpp) 351 return (1); 352 lock_clear((lock_t *)olp); 353 } else { 354 hrtime_t spin_time = 0; 355 /* 356 * If we're grabbing the locks out of order, we lose. 357 * Drop the waiter's lock, and then grab and release 358 * the owner's lock to ensure that we won't retry 359 * until the winner is done (as described above). 360 */ 361 if (olp >= (lock_t *)turnstile_table && olp < wlp) { 362 lock_clear(wlp); 363 lock_set((lock_t *)olp); 364 lock_clear((lock_t *)olp); 365 return (0); 366 } 367 /* 368 * We're grabbing the locks in the right order, 369 * so spin until the owner's lock either becomes 370 * available or spontaneously changes. 371 */ 372 spin_time = 373 LOCKSTAT_START_TIME(LS_TURNSTILE_INTERLOCK_SPIN); 374 while (olp == *olpp && LOCK_HELD(olp)) { 375 if (panicstr) 376 return (1); 377 SMT_PAUSE(); 378 } 379 LOCKSTAT_RECORD_TIME(LS_TURNSTILE_INTERLOCK_SPIN, 380 olp, spin_time); 381 } 382 } 383 } 384 385 /* 386 * Block the current thread on a synchronization object. 387 * 388 * Turnstiles implement both kernel and user-level priority inheritance. 389 * To avoid missed wakeups in the user-level case, lwp_upimutex_lock() calls 390 * turnstile_block() holding the appropriate lock in the upimutextab (see 391 * the block comment in lwp_upimutex_lock() for details). The held lock is 392 * passed to turnstile_block() as the "mp" parameter, and will be dropped 393 * after priority has been willed, but before the thread actually sleeps 394 * (this locking behavior leads to some subtle ordering issues; see the 395 * block comment on turnstile hashing for details). This _must_ be the only 396 * lock held when calling turnstile_block() with a SOBJ_USER_PI sobj; holding 397 * other locks can result in panics due to cycles in the blocking chain. 398 * 399 * turnstile_block() always succeeds for kernel synchronization objects. 400 * For SOBJ_USER_PI locks the possible errors are EINTR for signals, and 401 * EDEADLK for cycles in the blocking chain. A return code of zero indicates 402 * *either* that the lock is now held, or that this is a spurious wake-up, or 403 * that the lock can never be held due to an ENOTRECOVERABLE error. 404 * It is up to lwp_upimutex_lock() to sort this all out. 405 */ 406 407 int 408 turnstile_block(turnstile_t *ts, int qnum, void *sobj, sobj_ops_t *sobj_ops, 409 kmutex_t *mp, lwp_timer_t *lwptp) 410 { 411 kthread_t *owner; 412 kthread_t *t = curthread; 413 proc_t *p = ttoproc(t); 414 klwp_t *lwp = ttolwp(t); 415 turnstile_chain_t *tc = &TURNSTILE_CHAIN(sobj); 416 int error = 0; 417 int loser = 0; 418 419 ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); 420 ASSERT(mp == NULL || IS_UPI(mp)); 421 ASSERT((SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) ^ (mp == NULL)); 422 423 thread_lock_high(t); 424 425 if (ts == NULL) { 426 /* 427 * This is the first thread to block on this sobj. 428 * Take its attached turnstile and add it to the hash chain. 429 */ 430 ts = t->t_ts; 431 ts->ts_sobj = sobj; 432 ts->ts_next = tc->tc_first; 433 tc->tc_first = ts; 434 ASSERT(ts->ts_waiters == 0); 435 } else { 436 /* 437 * Another thread has already donated its turnstile 438 * to block on this sobj, so ours isn't needed. 439 * Stash it on the active turnstile's freelist. 440 */ 441 turnstile_t *myts = t->t_ts; 442 myts->ts_free = ts->ts_free; 443 ts->ts_free = myts; 444 t->t_ts = ts; 445 ASSERT(ts->ts_sobj == sobj); 446 ASSERT(ts->ts_waiters > 0); 447 } 448 449 /* 450 * Put the thread to sleep. 451 */ 452 ASSERT(t != CPU->cpu_idle_thread); 453 ASSERT(CPU_ON_INTR(CPU) == 0); 454 ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL); 455 ASSERT(t->t_state == TS_ONPROC); 456 457 if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { 458 curthread->t_flag |= T_WAKEABLE; 459 } 460 CL_SLEEP(t); /* assign kernel priority */ 461 THREAD_SLEEP(t, &tc->tc_lock); 462 t->t_wchan = sobj; 463 t->t_sobj_ops = sobj_ops; 464 DTRACE_SCHED(sleep); 465 466 if (lwp != NULL) { 467 lwp->lwp_ru.nvcsw++; 468 (void) new_mstate(t, LMS_SLEEP); 469 if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { 470 lwp->lwp_asleep = 1; 471 lwp->lwp_sysabort = 0; 472 /* 473 * make wchan0 non-zero to conform to the rule that 474 * threads blocking for user-level objects have a 475 * non-zero wchan0: this prevents spurious wake-ups 476 * by, for example, /proc. 477 */ 478 t->t_wchan0 = (caddr_t)1; 479 } 480 } 481 ts->ts_waiters++; 482 sleepq_insert(&ts->ts_sleepq[qnum], t); 483 484 if (SOBJ_TYPE(sobj_ops) == SOBJ_MUTEX && 485 SOBJ_OWNER(sobj_ops, sobj) == NULL) 486 panic("turnstile_block(%p): unowned mutex", (void *)ts); 487 488 /* 489 * Follow the blocking chain to its end, willing our priority to 490 * everyone who's in our way. 491 */ 492 while (t->t_sobj_ops != NULL && 493 (owner = SOBJ_OWNER(t->t_sobj_ops, t->t_wchan)) != NULL) { 494 if (owner == curthread) { 495 if (SOBJ_TYPE(sobj_ops) != SOBJ_USER_PI) { 496 panic("Deadlock: cycle in blocking chain"); 497 } 498 /* 499 * If the cycle we've encountered ends in mp, 500 * then we know it isn't a 'real' cycle because 501 * we're going to drop mp before we go to sleep. 502 * Moreover, since we've come full circle we know 503 * that we must have willed priority to everyone 504 * in our way. Therefore, we can break out now. 505 */ 506 if (t->t_wchan == (void *)mp) 507 break; 508 509 if (loser) 510 lock_clear(&turnstile_loser_lock); 511 /* 512 * For SOBJ_USER_PI, a cycle is an application 513 * deadlock which needs to be communicated 514 * back to the application. 515 */ 516 thread_unlock_nopreempt(t); 517 mutex_exit(mp); 518 setrun(curthread); 519 swtch(); /* necessary to transition state */ 520 curthread->t_flag &= ~T_WAKEABLE; 521 if (lwptp->lwpt_id != 0) 522 (void) lwp_timer_dequeue(lwptp); 523 setallwatch(); 524 lwp->lwp_asleep = 0; 525 lwp->lwp_sysabort = 0; 526 return (EDEADLK); 527 } 528 if (!turnstile_interlock(t->t_lockp, &owner->t_lockp)) { 529 /* 530 * If we failed to grab the owner's thread lock, 531 * turnstile_interlock() will have dropped t's 532 * thread lock, so at this point we don't even know 533 * that 't' exists anymore. The simplest solution 534 * is to restart the entire priority inheritance dance 535 * from the beginning of the blocking chain, since 536 * we *do* know that 'curthread' still exists. 537 * Application of priority inheritance is idempotent, 538 * so it's OK that we're doing it more than once. 539 * Note also that since we've dropped our thread lock, 540 * we may already have been woken up; if so, our 541 * t_sobj_ops will be NULL, the loop will terminate, 542 * and the call to swtch() will be a no-op. Phew. 543 * 544 * There is one further complication: if two (or more) 545 * threads keep trying to grab the turnstile locks out 546 * of order and keep losing the race to another thread, 547 * these "dueling losers" can livelock the system. 548 * Therefore, once we get into this rare situation, 549 * we serialize all the losers. 550 */ 551 if (loser == 0) { 552 loser = 1; 553 lock_set(&turnstile_loser_lock); 554 } 555 t = curthread; 556 thread_lock_high(t); 557 continue; 558 } 559 560 /* 561 * We now have the owner's thread lock. If we are traversing 562 * from non-SOBJ_USER_PI ops to SOBJ_USER_PI ops, then we know 563 * that we have caught the thread while in the TS_SLEEP state, 564 * but holding mp. We know that this situation is transient 565 * (mp will be dropped before the holder actually sleeps on 566 * the SOBJ_USER_PI sobj), so we will spin waiting for mp to 567 * be dropped. Then, as in the turnstile_interlock() failure 568 * case, we will restart the priority inheritance dance. 569 */ 570 if (SOBJ_TYPE(t->t_sobj_ops) != SOBJ_USER_PI && 571 owner->t_sobj_ops != NULL && 572 SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_USER_PI) { 573 kmutex_t *upi_lock = (kmutex_t *)t->t_wchan; 574 575 ASSERT(IS_UPI(upi_lock)); 576 ASSERT(SOBJ_TYPE(t->t_sobj_ops) == SOBJ_MUTEX); 577 578 if (t->t_lockp != owner->t_lockp) 579 thread_unlock_high(owner); 580 thread_unlock_high(t); 581 if (loser) 582 lock_clear(&turnstile_loser_lock); 583 584 while (mutex_owner(upi_lock) == owner) { 585 SMT_PAUSE(); 586 continue; 587 } 588 589 if (loser) 590 lock_set(&turnstile_loser_lock); 591 t = curthread; 592 thread_lock_high(t); 593 continue; 594 } 595 596 turnstile_pi_inherit(t->t_ts, owner, DISP_PRIO(t)); 597 if (t->t_lockp != owner->t_lockp) 598 thread_unlock_high(t); 599 t = owner; 600 } 601 602 if (loser) 603 lock_clear(&turnstile_loser_lock); 604 605 /* 606 * Note: 't' and 'curthread' were synonymous before the loop above, 607 * but now they may be different. ('t' is now the last thread in 608 * the blocking chain.) 609 */ 610 if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { 611 ushort_t s = curthread->t_oldspl; 612 int timedwait = 0; 613 uint_t imm_timeout = 0; 614 clock_t tim = -1; 615 616 thread_unlock_high(t); 617 if (lwptp->lwpt_id != 0) { 618 /* 619 * We enqueued a timeout. If it has already fired, 620 * lwptp->lwpt_imm_timeout has been set with cas, 621 * so fetch it with cas. 622 */ 623 timedwait = 1; 624 imm_timeout = 625 atomic_cas_uint(&lwptp->lwpt_imm_timeout, 0, 0); 626 } 627 mutex_exit(mp); 628 splx(s); 629 630 if (ISSIG(curthread, JUSTLOOKING) || 631 MUSTRETURN(p, curthread) || imm_timeout) 632 setrun(curthread); 633 swtch(); 634 curthread->t_flag &= ~T_WAKEABLE; 635 if (timedwait) 636 tim = lwp_timer_dequeue(lwptp); 637 setallwatch(); 638 if (ISSIG(curthread, FORREAL) || lwp->lwp_sysabort || 639 MUSTRETURN(p, curthread)) 640 error = EINTR; 641 else if (imm_timeout || (timedwait && tim == -1)) 642 error = ETIME; 643 lwp->lwp_sysabort = 0; 644 lwp->lwp_asleep = 0; 645 } else { 646 thread_unlock_nopreempt(t); 647 swtch(); 648 } 649 650 return (error); 651 } 652 653 /* 654 * Remove thread from specified turnstile sleep queue; retrieve its 655 * free turnstile; if it is the last waiter, delete the turnstile 656 * from the turnstile chain and if there is an inheritor, delete it 657 * from the inheritor's t_prioinv chain. 658 */ 659 static void 660 turnstile_dequeue(kthread_t *t) 661 { 662 turnstile_t *ts = t->t_ts; 663 turnstile_chain_t *tc = &TURNSTILE_CHAIN(ts->ts_sobj); 664 turnstile_t *tsfree, **tspp; 665 666 ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); 667 ASSERT(t->t_lockp == &tc->tc_lock); 668 669 if ((tsfree = ts->ts_free) != NULL) { 670 ASSERT(ts->ts_waiters > 1); 671 ASSERT(tsfree->ts_waiters == 0); 672 t->t_ts = tsfree; 673 ts->ts_free = tsfree->ts_free; 674 tsfree->ts_free = NULL; 675 } else { 676 /* 677 * The active turnstile's freelist is empty, so this 678 * must be the last waiter. Remove the turnstile 679 * from the hash chain and leave the now-inactive 680 * turnstile attached to the thread we're waking. 681 * Note that the ts_inheritor for the turnstile 682 * may be NULL. If one exists, its t_prioinv 683 * chain has to be updated. 684 */ 685 ASSERT(ts->ts_waiters == 1); 686 if (ts->ts_inheritor != NULL) { 687 (void) turnstile_pi_tsdelete(ts, ts->ts_inheritor); 688 /* 689 * If we ever do a "disinherit" or "unboost", we need 690 * to do it only if "t" is a thread at the head of the 691 * sleep queue. Since the sleep queue is prioritized, 692 * the disinherit is necessary only if the interrupted 693 * thread is the highest priority thread. 694 * Otherwise, there is a higher priority thread blocked 695 * on the turnstile, whose inheritance cannot be 696 * disinherited. However, disinheriting is explicitly 697 * not done here, since it would require holding the 698 * inheritor's thread lock (see turnstile_unsleep()). 699 */ 700 ts->ts_inheritor = NULL; 701 } 702 tspp = &tc->tc_first; 703 while (*tspp != ts) 704 tspp = &(*tspp)->ts_next; 705 *tspp = ts->ts_next; 706 ASSERT(t->t_ts == ts); 707 } 708 ts->ts_waiters--; 709 sleepq_dequeue(t); 710 t->t_sobj_ops = NULL; 711 t->t_wchan = NULL; 712 t->t_wchan0 = NULL; 713 ASSERT(t->t_state == TS_SLEEP); 714 } 715 716 /* 717 * Wake threads that are blocked in a turnstile. 718 */ 719 void 720 turnstile_wakeup(turnstile_t *ts, int qnum, int nthreads, kthread_t *owner) 721 { 722 turnstile_chain_t *tc = &TURNSTILE_CHAIN(ts->ts_sobj); 723 sleepq_t *sqp = &ts->ts_sleepq[qnum]; 724 725 ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); 726 727 /* 728 * Waive any priority we may have inherited from this turnstile. 729 */ 730 if (ts->ts_inheritor != NULL) { 731 turnstile_pi_waive(ts); 732 } 733 while (nthreads-- > 0) { 734 kthread_t *t = sqp->sq_first; 735 ASSERT(t->t_ts == ts); 736 ASSERT(ts->ts_waiters > 1 || ts->ts_inheritor == NULL); 737 DTRACE_SCHED1(wakeup, kthread_t *, t); 738 turnstile_dequeue(t); 739 CL_WAKEUP(t); /* previous thread lock, tc_lock, not dropped */ 740 /* 741 * If the caller did direct handoff of ownership, 742 * make the new owner inherit from this turnstile. 743 */ 744 if (t == owner) { 745 kthread_t *wp = ts->ts_sleepq[TS_WRITER_Q].sq_first; 746 kthread_t *rp = ts->ts_sleepq[TS_READER_Q].sq_first; 747 pri_t wpri = wp ? DISP_PRIO(wp) : 0; 748 pri_t rpri = rp ? DISP_PRIO(rp) : 0; 749 turnstile_pi_inherit(ts, t, MAX(wpri, rpri)); 750 owner = NULL; 751 } 752 thread_unlock_high(t); /* drop run queue lock */ 753 } 754 if (owner != NULL) 755 panic("turnstile_wakeup: owner %p not woken", owner); 756 disp_lock_exit(&tc->tc_lock); 757 } 758 759 /* 760 * Change priority of a thread sleeping in a turnstile. 761 */ 762 void 763 turnstile_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 764 { 765 sleepq_t *sqp = t->t_sleepq; 766 767 sleepq_dequeue(t); 768 *t_prip = pri; 769 sleepq_insert(sqp, t); 770 } 771 772 /* 773 * We don't allow spurious wakeups of threads blocked in turnstiles 774 * for synch objects whose sobj_ops vector is initialized with the 775 * following routine (e.g. kernel synchronization objects). 776 * This is vital to the correctness of direct-handoff logic in some 777 * synchronization primitives, and it also simplifies the PI logic. 778 */ 779 /* ARGSUSED */ 780 void 781 turnstile_stay_asleep(kthread_t *t) 782 { 783 } 784 785 /* 786 * Wake up a thread blocked in a turnstile. Used to enable interruptibility 787 * of threads blocked on a SOBJ_USER_PI sobj. 788 * 789 * The implications of this interface are: 790 * 791 * 1. turnstile_block() may return with an EINTR. 792 * 2. When the owner of an sobj releases it, but no turnstile is found (i.e. 793 * no waiters), the (prior) owner must call turnstile_pi_recalc() to 794 * waive any priority inherited from interrupted waiters. 795 * 796 * When a waiter is interrupted, disinheriting its willed priority from the 797 * inheritor would require holding the inheritor's thread lock, while also 798 * holding the waiter's thread lock which is a turnstile lock. If the 799 * inheritor's thread lock is not free, and is also a turnstile lock that 800 * is out of lock order, the waiter's thread lock would have to be dropped. 801 * This leads to complications for the caller of turnstile_unsleep(), since 802 * the caller holds the waiter's thread lock. So, instead of disinheriting 803 * on waiter interruption, the owner is required to follow rule 2 above. 804 * 805 * Avoiding disinherit on waiter interruption seems acceptable because 806 * the owner runs at an unnecessarily high priority only while sobj is held, 807 * which it would have done in any case, if the waiter had not been interrupted. 808 */ 809 void 810 turnstile_unsleep(kthread_t *t) 811 { 812 turnstile_dequeue(t); 813 THREAD_TRANSITION(t); 814 CL_SETRUN(t); 815 } 816