1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Big Theory Statement for turnstiles. 30 * 31 * Turnstiles provide blocking and wakeup support, including priority 32 * inheritance, for synchronization primitives (e.g. mutexes and rwlocks). 33 * Typical usage is as follows: 34 * 35 * To block on lock 'lp' for read access in foo_enter(): 36 * 37 * ts = turnstile_lookup(lp); 38 * [ If the lock is still held, set the waiters bit 39 * turnstile_block(ts, TS_READER_Q, lp, &foo_sobj_ops); 40 * 41 * To wake threads waiting for write access to lock 'lp' in foo_exit(): 42 * 43 * ts = turnstile_lookup(lp); 44 * [ Either drop the lock (change owner to NULL) or perform a direct 45 * [ handoff (change owner to one of the threads we're about to wake). 46 * [ If we're going to wake the last waiter, clear the waiters bit. 47 * turnstile_wakeup(ts, TS_WRITER_Q, nwaiters, new_owner or NULL); 48 * 49 * turnstile_lookup() returns holding the turnstile hash chain lock for lp. 50 * Both turnstile_block() and turnstile_wakeup() drop the turnstile lock. 51 * To abort a turnstile operation, the client must call turnstile_exit(). 52 * 53 * Requirements of the client: 54 * 55 * (1) The lock's waiters indicator may be manipulated *only* while 56 * holding the turnstile hash chain lock (i.e. under turnstile_lookup()). 57 * 58 * (2) Once the lock is marked as having waiters, the owner may be 59 * changed *only* while holding the turnstile hash chain lock. 60 * 61 * (3) The caller must never block on an unheld lock. 62 * 63 * Consequences of these assumptions include the following: 64 * 65 * (a) It is impossible for a lock to be unheld but have waiters. 66 * 67 * (b) The priority inheritance code can safely assume that an active 68 * turnstile's ts_inheritor never changes until the inheritor calls 69 * turnstile_pi_waive(). 70 * 71 * These assumptions simplify the implementation of both turnstiles and 72 * their clients. 73 * 74 * Background on priority inheritance: 75 * 76 * Priority inheritance allows a thread to "will" its dispatch priority 77 * to all the threads blocking it, directly or indirectly. This prevents 78 * situations called priority inversions in which a high-priority thread 79 * needs a lock held by a low-priority thread, which cannot run because 80 * of medium-priority threads. Without PI, the medium-priority threads 81 * can starve out the high-priority thread indefinitely. With PI, the 82 * low-priority thread becomes high-priority until it releases whatever 83 * synchronization object the real high-priority thread is waiting for. 84 * 85 * How turnstiles work: 86 * 87 * All active turnstiles reside in a global hash table, turnstile_table[]. 88 * The address of a synchronization object determines its hash index. 89 * Each hash chain is protected by its own dispatcher lock, acquired 90 * by turnstile_lookup(). This lock protects the hash chain linkage, the 91 * contents of all turnstiles on the hash chain, and the waiters bits of 92 * every synchronization object in the system that hashes to the same chain. 93 * Giving the lock such broad scope simplifies the interactions between 94 * the turnstile code and its clients considerably. The blocking path 95 * is rare enough that this has no impact on scalability. (If it ever 96 * does, it's almost surely a second-order effect -- the real problem 97 * is that some synchronization object is *very* heavily contended.) 98 * 99 * Each thread has an attached turnstile in case it needs to block. 100 * A thread cannot block on more than one lock at a time, so one 101 * turnstile per thread is the most we ever need. The first thread 102 * to block on a lock donates its attached turnstile and adds it to 103 * the appropriate hash chain in turnstile_table[]. This becomes the 104 * "active turnstile" for the lock. Each subsequent thread that blocks 105 * on the same lock discovers that the lock already has an active 106 * turnstile, so it stashes its own turnstile on the active turnstile's 107 * freelist. As threads wake up, the process is reversed. 108 * 109 * turnstile_block() puts the current thread to sleep on the active 110 * turnstile for the desired lock, walks the blocking chain to apply 111 * priority inheritance to everyone in its way, and yields the CPU. 112 * 113 * turnstile_wakeup() waives any priority the owner may have inherited 114 * and wakes the specified number of waiting threads. If the caller is 115 * doing direct handoff of ownership (rather than just dropping the lock), 116 * the new owner automatically inherits priority from any existing waiters. 117 */ 118 119 #include <sys/param.h> 120 #include <sys/systm.h> 121 #include <sys/thread.h> 122 #include <sys/proc.h> 123 #include <sys/debug.h> 124 #include <sys/cpuvar.h> 125 #include <sys/turnstile.h> 126 #include <sys/t_lock.h> 127 #include <sys/disp.h> 128 #include <sys/sobject.h> 129 #include <sys/cmn_err.h> 130 #include <sys/sysmacros.h> 131 #include <sys/lockstat.h> 132 #include <sys/lwp_upimutex_impl.h> 133 #include <sys/schedctl.h> 134 #include <sys/cpu.h> 135 #include <sys/sdt.h> 136 #include <sys/cpupart.h> 137 138 extern upib_t upimutextab[UPIMUTEX_TABSIZE]; 139 140 #define IS_UPI(sobj) \ 141 ((uintptr_t)(sobj) - (uintptr_t)upimutextab < sizeof (upimutextab)) 142 143 /* 144 * The turnstile hash table is partitioned into two halves: the lower half 145 * is used for upimutextab[] locks, the upper half for everything else. 146 * The reason for the distinction is that SOBJ_USER_PI locks present a 147 * unique problem: the upimutextab[] lock passed to turnstile_block() 148 * cannot be dropped until the calling thread has blocked on its 149 * SOBJ_USER_PI lock and willed its priority down the blocking chain. 150 * At that point, the caller's t_lockp will be one of the turnstile locks. 151 * If mutex_exit() discovers that the upimutextab[] lock has waiters, it 152 * must wake them, which forces a lock ordering on us: the turnstile lock 153 * for the upimutextab[] lock will be acquired in mutex_vector_exit(), 154 * which will eventually call into turnstile_pi_waive(), which will then 155 * acquire the caller's thread lock, which in this case is the turnstile 156 * lock for the SOBJ_USER_PI lock. In general, when two turnstile locks 157 * must be held at the same time, the lock order must be the address order. 158 * Therefore, to prevent deadlock in turnstile_pi_waive(), we must ensure 159 * that upimutextab[] locks *always* hash to lower addresses than any 160 * other locks. You think this is cheesy? Let's see you do better. 161 */ 162 #define TURNSTILE_HASH_SIZE 128 /* must be power of 2 */ 163 #define TURNSTILE_HASH_MASK (TURNSTILE_HASH_SIZE - 1) 164 #define TURNSTILE_SOBJ_HASH(sobj) \ 165 ((((ulong_t)sobj >> 2) + ((ulong_t)sobj >> 9)) & TURNSTILE_HASH_MASK) 166 #define TURNSTILE_SOBJ_BUCKET(sobj) \ 167 ((IS_UPI(sobj) ? 0 : TURNSTILE_HASH_SIZE) + TURNSTILE_SOBJ_HASH(sobj)) 168 #define TURNSTILE_CHAIN(sobj) turnstile_table[TURNSTILE_SOBJ_BUCKET(sobj)] 169 170 typedef struct turnstile_chain { 171 turnstile_t *tc_first; /* first turnstile on hash chain */ 172 disp_lock_t tc_lock; /* lock for this hash chain */ 173 } turnstile_chain_t; 174 175 turnstile_chain_t turnstile_table[2 * TURNSTILE_HASH_SIZE]; 176 177 static lock_t turnstile_loser_lock; 178 179 /* 180 * Make 'inheritor' inherit priority from this turnstile. 181 */ 182 static void 183 turnstile_pi_inherit(turnstile_t *ts, kthread_t *inheritor, pri_t epri) 184 { 185 ASSERT(THREAD_LOCK_HELD(inheritor)); 186 ASSERT(DISP_LOCK_HELD(&TURNSTILE_CHAIN(ts->ts_sobj).tc_lock)); 187 188 if (epri <= inheritor->t_pri) 189 return; 190 191 if (ts->ts_inheritor == NULL) { 192 ts->ts_inheritor = inheritor; 193 ts->ts_epri = epri; 194 disp_lock_enter_high(&inheritor->t_pi_lock); 195 ts->ts_prioinv = inheritor->t_prioinv; 196 inheritor->t_prioinv = ts; 197 disp_lock_exit_high(&inheritor->t_pi_lock); 198 } else { 199 /* 200 * 'inheritor' is already inheriting from this turnstile, 201 * so just adjust its priority. 202 */ 203 ASSERT(ts->ts_inheritor == inheritor); 204 if (ts->ts_epri < epri) 205 ts->ts_epri = epri; 206 } 207 208 if (epri > DISP_PRIO(inheritor)) 209 thread_change_epri(inheritor, epri); 210 } 211 212 /* 213 * If turnstile is non-NULL, remove it from inheritor's t_prioinv list. 214 * Compute new inherited priority, and return it. 215 */ 216 static pri_t 217 turnstile_pi_tsdelete(turnstile_t *ts, kthread_t *inheritor) 218 { 219 turnstile_t **tspp, *tsp; 220 pri_t new_epri = 0; 221 222 disp_lock_enter_high(&inheritor->t_pi_lock); 223 tspp = &inheritor->t_prioinv; 224 while ((tsp = *tspp) != NULL) { 225 if (tsp == ts) 226 *tspp = tsp->ts_prioinv; 227 else 228 new_epri = MAX(new_epri, tsp->ts_epri); 229 tspp = &tsp->ts_prioinv; 230 } 231 disp_lock_exit_high(&inheritor->t_pi_lock); 232 return (new_epri); 233 } 234 235 /* 236 * Remove turnstile from inheritor's t_prioinv list, compute 237 * new priority, and change the inheritor's effective priority if 238 * necessary. Keep in synch with turnstile_pi_recalc(). 239 */ 240 static void 241 turnstile_pi_waive(turnstile_t *ts) 242 { 243 kthread_t *inheritor = ts->ts_inheritor; 244 pri_t new_epri; 245 246 ASSERT(inheritor == curthread); 247 248 thread_lock_high(inheritor); 249 new_epri = turnstile_pi_tsdelete(ts, inheritor); 250 if (new_epri != DISP_PRIO(inheritor)) 251 thread_change_epri(inheritor, new_epri); 252 ts->ts_inheritor = NULL; 253 if (DISP_MUST_SURRENDER(inheritor)) 254 cpu_surrender(inheritor); 255 thread_unlock_high(inheritor); 256 } 257 258 /* 259 * Compute caller's new inherited priority, and change its effective 260 * priority if necessary. Necessary only for SOBJ_USER_PI, because of 261 * its interruptibility characteristic. 262 */ 263 void 264 turnstile_pi_recalc(void) 265 { 266 kthread_t *inheritor = curthread; 267 pri_t new_epri; 268 269 thread_lock(inheritor); 270 new_epri = turnstile_pi_tsdelete(NULL, inheritor); 271 if (new_epri != DISP_PRIO(inheritor)) 272 thread_change_epri(inheritor, new_epri); 273 if (DISP_MUST_SURRENDER(inheritor)) 274 cpu_surrender(inheritor); 275 thread_unlock(inheritor); 276 } 277 278 /* 279 * Grab the lock protecting the hash chain for sobj 280 * and return the active turnstile for sobj, if any. 281 */ 282 turnstile_t * 283 turnstile_lookup(void *sobj) 284 { 285 turnstile_t *ts; 286 turnstile_chain_t *tc = &TURNSTILE_CHAIN(sobj); 287 288 disp_lock_enter(&tc->tc_lock); 289 290 for (ts = tc->tc_first; ts != NULL; ts = ts->ts_next) 291 if (ts->ts_sobj == sobj) 292 break; 293 294 return (ts); 295 } 296 297 /* 298 * Drop the lock protecting the hash chain for sobj. 299 */ 300 void 301 turnstile_exit(void *sobj) 302 { 303 disp_lock_exit(&TURNSTILE_CHAIN(sobj).tc_lock); 304 } 305 306 /* 307 * When we apply priority inheritance, we must grab the owner's thread lock 308 * while already holding the waiter's thread lock. If both thread locks are 309 * turnstile locks, this can lead to deadlock: while we hold L1 and try to 310 * grab L2, some unrelated thread may be applying priority inheritance to 311 * some other blocking chain, holding L2 and trying to grab L1. The most 312 * obvious solution -- do a lock_try() for the owner lock -- isn't quite 313 * sufficient because it can cause livelock: each thread may hold one lock, 314 * try to grab the other, fail, bail out, and try again, looping forever. 315 * To prevent livelock we must define a winner, i.e. define an arbitrary 316 * lock ordering on the turnstile locks. For simplicity we declare that 317 * virtual address order defines lock order, i.e. if L1 < L2, then the 318 * correct lock ordering is L1, L2. Thus the thread that holds L1 and 319 * wants L2 should spin until L2 is available, but the thread that holds 320 * L2 and can't get L1 on the first try must drop L2 and return failure. 321 * Moreover, the losing thread must not reacquire L2 until the winning 322 * thread has had a chance to grab it; to ensure this, the losing thread 323 * must grab L1 after dropping L2, thus spinning until the winner is done. 324 * Complicating matters further, note that the owner's thread lock pointer 325 * can change (i.e. be pointed at a different lock) while we're trying to 326 * grab it. If that happens, we must unwind our state and try again. 327 * 328 * On success, returns 1 with both locks held. 329 * On failure, returns 0 with neither lock held. 330 */ 331 static int 332 turnstile_interlock(lock_t *wlp, lock_t *volatile *olpp) 333 { 334 ASSERT(LOCK_HELD(wlp)); 335 336 for (;;) { 337 volatile lock_t *olp = *olpp; 338 339 /* 340 * If the locks are identical, there's nothing to do. 341 */ 342 if (olp == wlp) 343 return (1); 344 if (lock_try((lock_t *)olp)) { 345 /* 346 * If 'olp' is still the right lock, return success. 347 * Otherwise, drop 'olp' and try the dance again. 348 */ 349 if (olp == *olpp) 350 return (1); 351 lock_clear((lock_t *)olp); 352 } else { 353 hrtime_t spin_time = 0; 354 /* 355 * If we're grabbing the locks out of order, we lose. 356 * Drop the waiter's lock, and then grab and release 357 * the owner's lock to ensure that we won't retry 358 * until the winner is done (as described above). 359 */ 360 if (olp >= (lock_t *)turnstile_table && olp < wlp) { 361 lock_clear(wlp); 362 lock_set((lock_t *)olp); 363 lock_clear((lock_t *)olp); 364 return (0); 365 } 366 /* 367 * We're grabbing the locks in the right order, 368 * so spin until the owner's lock either becomes 369 * available or spontaneously changes. 370 */ 371 spin_time = 372 LOCKSTAT_START_TIME(LS_TURNSTILE_INTERLOCK_SPIN); 373 while (olp == *olpp && LOCK_HELD(olp)) { 374 if (panicstr) 375 return (1); 376 SMT_PAUSE(); 377 } 378 LOCKSTAT_RECORD_TIME(LS_TURNSTILE_INTERLOCK_SPIN, 379 olp, spin_time); 380 } 381 } 382 } 383 384 /* 385 * Block the current thread on a synchronization object. 386 * 387 * Turnstiles implement both kernel and user-level priority inheritance. 388 * To avoid missed wakeups in the user-level case, lwp_upimutex_lock() calls 389 * turnstile_block() holding the appropriate lock in the upimutextab (see 390 * the block comment in lwp_upimutex_lock() for details). The held lock is 391 * passed to turnstile_block() as the "mp" parameter, and will be dropped 392 * after priority has been willed, but before the thread actually sleeps 393 * (this locking behavior leads to some subtle ordering issues; see the 394 * block comment on turnstile hashing for details). This _must_ be the only 395 * lock held when calling turnstile_block() with a SOBJ_USER_PI sobj; holding 396 * other locks can result in panics due to cycles in the blocking chain. 397 * 398 * turnstile_block() always succeeds for kernel synchronization objects. 399 * For SOBJ_USER_PI locks the possible errors are EINTR for signals, and 400 * EDEADLK for cycles in the blocking chain. A return code of zero indicates 401 * *either* that the lock is now held, or that this is a spurious wake-up, or 402 * that the lock can never be held due to an ENOTRECOVERABLE error. 403 * It is up to lwp_upimutex_lock() to sort this all out. 404 */ 405 406 int 407 turnstile_block(turnstile_t *ts, int qnum, void *sobj, sobj_ops_t *sobj_ops, 408 kmutex_t *mp, lwp_timer_t *lwptp) 409 { 410 kthread_t *owner; 411 kthread_t *t = curthread; 412 proc_t *p = ttoproc(t); 413 klwp_t *lwp = ttolwp(t); 414 turnstile_chain_t *tc = &TURNSTILE_CHAIN(sobj); 415 int error = 0; 416 int loser = 0; 417 418 ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); 419 ASSERT(mp == NULL || IS_UPI(mp)); 420 ASSERT((SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) ^ (mp == NULL)); 421 422 thread_lock_high(t); 423 424 if (ts == NULL) { 425 /* 426 * This is the first thread to block on this sobj. 427 * Take its attached turnstile and add it to the hash chain. 428 */ 429 ts = t->t_ts; 430 ts->ts_sobj = sobj; 431 ts->ts_next = tc->tc_first; 432 tc->tc_first = ts; 433 ASSERT(ts->ts_waiters == 0); 434 } else { 435 /* 436 * Another thread has already donated its turnstile 437 * to block on this sobj, so ours isn't needed. 438 * Stash it on the active turnstile's freelist. 439 */ 440 turnstile_t *myts = t->t_ts; 441 myts->ts_free = ts->ts_free; 442 ts->ts_free = myts; 443 t->t_ts = ts; 444 ASSERT(ts->ts_sobj == sobj); 445 ASSERT(ts->ts_waiters > 0); 446 } 447 448 /* 449 * Put the thread to sleep. 450 */ 451 ASSERT(t != CPU->cpu_idle_thread); 452 ASSERT(CPU_ON_INTR(CPU) == 0); 453 ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL); 454 ASSERT(t->t_state == TS_ONPROC); 455 456 if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { 457 curthread->t_flag |= T_WAKEABLE; 458 } 459 CL_SLEEP(t); /* assign kernel priority */ 460 THREAD_SLEEP(t, &tc->tc_lock); 461 t->t_wchan = sobj; 462 t->t_sobj_ops = sobj_ops; 463 DTRACE_SCHED(sleep); 464 465 if (lwp != NULL) { 466 lwp->lwp_ru.nvcsw++; 467 (void) new_mstate(t, LMS_SLEEP); 468 if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { 469 lwp->lwp_asleep = 1; 470 lwp->lwp_sysabort = 0; 471 /* 472 * make wchan0 non-zero to conform to the rule that 473 * threads blocking for user-level objects have a 474 * non-zero wchan0: this prevents spurious wake-ups 475 * by, for example, /proc. 476 */ 477 t->t_wchan0 = (caddr_t)1; 478 } 479 } 480 ts->ts_waiters++; 481 sleepq_insert(&ts->ts_sleepq[qnum], t); 482 483 if (SOBJ_TYPE(sobj_ops) == SOBJ_MUTEX && 484 SOBJ_OWNER(sobj_ops, sobj) == NULL) 485 panic("turnstile_block(%p): unowned mutex", (void *)ts); 486 487 /* 488 * Follow the blocking chain to its end, willing our priority to 489 * everyone who's in our way. 490 */ 491 while (t->t_sobj_ops != NULL && 492 (owner = SOBJ_OWNER(t->t_sobj_ops, t->t_wchan)) != NULL) { 493 if (owner == curthread) { 494 if (SOBJ_TYPE(sobj_ops) != SOBJ_USER_PI) { 495 panic("Deadlock: cycle in blocking chain"); 496 } 497 /* 498 * If the cycle we've encountered ends in mp, 499 * then we know it isn't a 'real' cycle because 500 * we're going to drop mp before we go to sleep. 501 * Moreover, since we've come full circle we know 502 * that we must have willed priority to everyone 503 * in our way. Therefore, we can break out now. 504 */ 505 if (t->t_wchan == (void *)mp) 506 break; 507 508 if (loser) 509 lock_clear(&turnstile_loser_lock); 510 /* 511 * For SOBJ_USER_PI, a cycle is an application 512 * deadlock which needs to be communicated 513 * back to the application. 514 */ 515 thread_unlock_nopreempt(t); 516 if (lwptp->lwpt_id != 0) { 517 /* 518 * We enqueued a timeout, we are 519 * holding curthread->t_delay_lock. 520 * Drop it and dequeue the timeout. 521 */ 522 mutex_exit(&curthread->t_delay_lock); 523 (void) lwp_timer_dequeue(lwptp); 524 } 525 mutex_exit(mp); 526 setrun(curthread); 527 swtch(); /* necessary to transition state */ 528 curthread->t_flag &= ~T_WAKEABLE; 529 setallwatch(); 530 lwp->lwp_asleep = 0; 531 lwp->lwp_sysabort = 0; 532 return (EDEADLK); 533 } 534 if (!turnstile_interlock(t->t_lockp, &owner->t_lockp)) { 535 /* 536 * If we failed to grab the owner's thread lock, 537 * turnstile_interlock() will have dropped t's 538 * thread lock, so at this point we don't even know 539 * that 't' exists anymore. The simplest solution 540 * is to restart the entire priority inheritance dance 541 * from the beginning of the blocking chain, since 542 * we *do* know that 'curthread' still exists. 543 * Application of priority inheritance is idempotent, 544 * so it's OK that we're doing it more than once. 545 * Note also that since we've dropped our thread lock, 546 * we may already have been woken up; if so, our 547 * t_sobj_ops will be NULL, the loop will terminate, 548 * and the call to swtch() will be a no-op. Phew. 549 * 550 * There is one further complication: if two (or more) 551 * threads keep trying to grab the turnstile locks out 552 * of order and keep losing the race to another thread, 553 * these "dueling losers" can livelock the system. 554 * Therefore, once we get into this rare situation, 555 * we serialize all the losers. 556 */ 557 if (loser == 0) { 558 loser = 1; 559 lock_set(&turnstile_loser_lock); 560 } 561 t = curthread; 562 thread_lock_high(t); 563 continue; 564 } 565 566 /* 567 * We now have the owner's thread lock. If we are traversing 568 * from non-SOBJ_USER_PI ops to SOBJ_USER_PI ops, then we know 569 * that we have caught the thread while in the TS_SLEEP state, 570 * but holding mp. We know that this situation is transient 571 * (mp will be dropped before the holder actually sleeps on 572 * the SOBJ_USER_PI sobj), so we will spin waiting for mp to 573 * be dropped. Then, as in the turnstile_interlock() failure 574 * case, we will restart the priority inheritance dance. 575 */ 576 if (SOBJ_TYPE(t->t_sobj_ops) != SOBJ_USER_PI && 577 owner->t_sobj_ops != NULL && 578 SOBJ_TYPE(owner->t_sobj_ops) == SOBJ_USER_PI) { 579 kmutex_t *upi_lock = (kmutex_t *)t->t_wchan; 580 581 ASSERT(IS_UPI(upi_lock)); 582 ASSERT(SOBJ_TYPE(t->t_sobj_ops) == SOBJ_MUTEX); 583 584 if (t->t_lockp != owner->t_lockp) 585 thread_unlock_high(owner); 586 thread_unlock_high(t); 587 if (loser) 588 lock_clear(&turnstile_loser_lock); 589 590 while (mutex_owner(upi_lock) == owner) { 591 SMT_PAUSE(); 592 continue; 593 } 594 595 if (loser) 596 lock_set(&turnstile_loser_lock); 597 t = curthread; 598 thread_lock_high(t); 599 continue; 600 } 601 602 turnstile_pi_inherit(t->t_ts, owner, DISP_PRIO(t)); 603 if (t->t_lockp != owner->t_lockp) 604 thread_unlock_high(t); 605 t = owner; 606 } 607 608 if (loser) 609 lock_clear(&turnstile_loser_lock); 610 611 /* 612 * Note: 't' and 'curthread' were synonymous before the loop above, 613 * but now they may be different. ('t' is now the last thread in 614 * the blocking chain.) 615 */ 616 if (SOBJ_TYPE(sobj_ops) == SOBJ_USER_PI) { 617 ushort_t s = curthread->t_oldspl; 618 int timedwait = 0; 619 clock_t tim = -1; 620 621 thread_unlock_high(t); 622 if (lwptp->lwpt_id != 0) { 623 /* 624 * We enqueued a timeout and we are 625 * holding curthread->t_delay_lock. 626 */ 627 mutex_exit(&curthread->t_delay_lock); 628 timedwait = 1; 629 } 630 mutex_exit(mp); 631 splx(s); 632 633 if (ISSIG(curthread, JUSTLOOKING) || 634 MUSTRETURN(p, curthread) || lwptp->lwpt_imm_timeout) 635 setrun(curthread); 636 swtch(); 637 curthread->t_flag &= ~T_WAKEABLE; 638 if (timedwait) 639 tim = lwp_timer_dequeue(lwptp); 640 setallwatch(); 641 if (ISSIG(curthread, FORREAL) || lwp->lwp_sysabort || 642 MUSTRETURN(p, curthread)) 643 error = EINTR; 644 else if (lwptp->lwpt_imm_timeout || (timedwait && tim == -1)) 645 error = ETIME; 646 lwp->lwp_sysabort = 0; 647 lwp->lwp_asleep = 0; 648 } else { 649 thread_unlock_nopreempt(t); 650 swtch(); 651 } 652 653 return (error); 654 } 655 656 /* 657 * Remove thread from specified turnstile sleep queue; retrieve its 658 * free turnstile; if it is the last waiter, delete the turnstile 659 * from the turnstile chain and if there is an inheritor, delete it 660 * from the inheritor's t_prioinv chain. 661 */ 662 static void 663 turnstile_dequeue(kthread_t *t) 664 { 665 turnstile_t *ts = t->t_ts; 666 turnstile_chain_t *tc = &TURNSTILE_CHAIN(ts->ts_sobj); 667 turnstile_t *tsfree, **tspp; 668 669 ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); 670 ASSERT(t->t_lockp == &tc->tc_lock); 671 672 if ((tsfree = ts->ts_free) != NULL) { 673 ASSERT(ts->ts_waiters > 1); 674 ASSERT(tsfree->ts_waiters == 0); 675 t->t_ts = tsfree; 676 ts->ts_free = tsfree->ts_free; 677 tsfree->ts_free = NULL; 678 } else { 679 /* 680 * The active turnstile's freelist is empty, so this 681 * must be the last waiter. Remove the turnstile 682 * from the hash chain and leave the now-inactive 683 * turnstile attached to the thread we're waking. 684 * Note that the ts_inheritor for the turnstile 685 * may be NULL. If one exists, its t_prioinv 686 * chain has to be updated. 687 */ 688 ASSERT(ts->ts_waiters == 1); 689 if (ts->ts_inheritor != NULL) { 690 (void) turnstile_pi_tsdelete(ts, ts->ts_inheritor); 691 /* 692 * If we ever do a "disinherit" or "unboost", we need 693 * to do it only if "t" is a thread at the head of the 694 * sleep queue. Since the sleep queue is prioritized, 695 * the disinherit is necessary only if the interrupted 696 * thread is the highest priority thread. 697 * Otherwise, there is a higher priority thread blocked 698 * on the turnstile, whose inheritance cannot be 699 * disinherited. However, disinheriting is explicitly 700 * not done here, since it would require holding the 701 * inheritor's thread lock (see turnstile_unsleep()). 702 */ 703 ts->ts_inheritor = NULL; 704 } 705 tspp = &tc->tc_first; 706 while (*tspp != ts) 707 tspp = &(*tspp)->ts_next; 708 *tspp = ts->ts_next; 709 ASSERT(t->t_ts == ts); 710 } 711 ts->ts_waiters--; 712 sleepq_dequeue(t); 713 t->t_sobj_ops = NULL; 714 t->t_wchan = NULL; 715 t->t_wchan0 = NULL; 716 ASSERT(t->t_state == TS_SLEEP); 717 } 718 719 /* 720 * Wake threads that are blocked in a turnstile. 721 */ 722 void 723 turnstile_wakeup(turnstile_t *ts, int qnum, int nthreads, kthread_t *owner) 724 { 725 turnstile_chain_t *tc = &TURNSTILE_CHAIN(ts->ts_sobj); 726 sleepq_t *sqp = &ts->ts_sleepq[qnum]; 727 728 ASSERT(DISP_LOCK_HELD(&tc->tc_lock)); 729 730 /* 731 * Waive any priority we may have inherited from this turnstile. 732 */ 733 if (ts->ts_inheritor != NULL) { 734 turnstile_pi_waive(ts); 735 } 736 while (nthreads-- > 0) { 737 kthread_t *t = sqp->sq_first; 738 ASSERT(t->t_ts == ts); 739 ASSERT(ts->ts_waiters > 1 || ts->ts_inheritor == NULL); 740 DTRACE_SCHED1(wakeup, kthread_t *, t); 741 turnstile_dequeue(t); 742 CL_WAKEUP(t); /* previous thread lock, tc_lock, not dropped */ 743 /* 744 * If the caller did direct handoff of ownership, 745 * make the new owner inherit from this turnstile. 746 */ 747 if (t == owner) { 748 kthread_t *wp = ts->ts_sleepq[TS_WRITER_Q].sq_first; 749 kthread_t *rp = ts->ts_sleepq[TS_READER_Q].sq_first; 750 pri_t wpri = wp ? DISP_PRIO(wp) : 0; 751 pri_t rpri = rp ? DISP_PRIO(rp) : 0; 752 turnstile_pi_inherit(ts, t, MAX(wpri, rpri)); 753 owner = NULL; 754 } 755 thread_unlock_high(t); /* drop run queue lock */ 756 } 757 if (owner != NULL) 758 panic("turnstile_wakeup: owner %p not woken", owner); 759 disp_lock_exit(&tc->tc_lock); 760 } 761 762 /* 763 * Change priority of a thread sleeping in a turnstile. 764 */ 765 void 766 turnstile_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 767 { 768 sleepq_t *sqp = t->t_sleepq; 769 770 sleepq_dequeue(t); 771 *t_prip = pri; 772 sleepq_insert(sqp, t); 773 } 774 775 /* 776 * We don't allow spurious wakeups of threads blocked in turnstiles 777 * for synch objects whose sobj_ops vector is initialized with the 778 * following routine (e.g. kernel synchronization objects). 779 * This is vital to the correctness of direct-handoff logic in some 780 * synchronization primitives, and it also simplifies the PI logic. 781 */ 782 /* ARGSUSED */ 783 void 784 turnstile_stay_asleep(kthread_t *t) 785 { 786 } 787 788 /* 789 * Wake up a thread blocked in a turnstile. Used to enable interruptibility 790 * of threads blocked on a SOBJ_USER_PI sobj. 791 * 792 * The implications of this interface are: 793 * 794 * 1. turnstile_block() may return with an EINTR. 795 * 2. When the owner of an sobj releases it, but no turnstile is found (i.e. 796 * no waiters), the (prior) owner must call turnstile_pi_recalc() to 797 * waive any priority inherited from interrupted waiters. 798 * 799 * When a waiter is interrupted, disinheriting its willed priority from the 800 * inheritor would require holding the inheritor's thread lock, while also 801 * holding the waiter's thread lock which is a turnstile lock. If the 802 * inheritor's thread lock is not free, and is also a turnstile lock that 803 * is out of lock order, the waiter's thread lock would have to be dropped. 804 * This leads to complications for the caller of turnstile_unsleep(), since 805 * the caller holds the waiter's thread lock. So, instead of disinheriting 806 * on waiter interruption, the owner is required to follow rule 2 above. 807 * 808 * Avoiding disinherit on waiter interruption seems acceptable because 809 * the owner runs at an unnecessarily high priority only while sobj is held, 810 * which it would have done in any case, if the waiter had not been interrupted. 811 */ 812 void 813 turnstile_unsleep(kthread_t *t) 814 { 815 turnstile_dequeue(t); 816 THREAD_TRANSITION(t); 817 CL_SETRUN(t); 818 } 819