1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/thread.h> 29 #include <sys/proc.h> 30 #include <sys/debug.h> 31 #include <sys/cmn_err.h> 32 #include <sys/systm.h> 33 #include <sys/sobject.h> 34 #include <sys/sleepq.h> 35 #include <sys/cpuvar.h> 36 #include <sys/condvar.h> 37 #include <sys/condvar_impl.h> 38 #include <sys/schedctl.h> 39 #include <sys/procfs.h> 40 #include <sys/sdt.h> 41 42 /* 43 * CV_MAX_WAITERS is the maximum number of waiters we track; once 44 * the number becomes higher than that, we look at the sleepq to 45 * see whether there are *really* any waiters. 46 */ 47 #define CV_MAX_WAITERS 1024 /* must be power of 2 */ 48 #define CV_WAITERS_MASK (CV_MAX_WAITERS - 1) 49 50 /* 51 * Threads don't "own" condition variables. 52 */ 53 /* ARGSUSED */ 54 static kthread_t * 55 cv_owner(void *cvp) 56 { 57 return (NULL); 58 } 59 60 /* 61 * Unsleep a thread that's blocked on a condition variable. 62 */ 63 static void 64 cv_unsleep(kthread_t *t) 65 { 66 condvar_impl_t *cvp = (condvar_impl_t *)t->t_wchan; 67 sleepq_head_t *sqh = SQHASH(cvp); 68 69 ASSERT(THREAD_LOCK_HELD(t)); 70 71 if (cvp == NULL) 72 panic("cv_unsleep: thread %p not on sleepq %p", t, sqh); 73 DTRACE_SCHED1(wakeup, kthread_t *, t); 74 sleepq_unsleep(t); 75 if (cvp->cv_waiters != CV_MAX_WAITERS) 76 cvp->cv_waiters--; 77 disp_lock_exit_high(&sqh->sq_lock); 78 CL_SETRUN(t); 79 } 80 81 /* 82 * Change the priority of a thread that's blocked on a condition variable. 83 */ 84 static void 85 cv_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip) 86 { 87 condvar_impl_t *cvp = (condvar_impl_t *)t->t_wchan; 88 sleepq_t *sqp = t->t_sleepq; 89 90 ASSERT(THREAD_LOCK_HELD(t)); 91 ASSERT(&SQHASH(cvp)->sq_queue == sqp); 92 93 if (cvp == NULL) 94 panic("cv_change_pri: %p not on sleep queue", t); 95 sleepq_dequeue(t); 96 *t_prip = pri; 97 sleepq_insert(sqp, t); 98 } 99 100 /* 101 * The sobj_ops vector exports a set of functions needed when a thread 102 * is asleep on a synchronization object of this type. 103 */ 104 static sobj_ops_t cv_sobj_ops = { 105 SOBJ_CV, cv_owner, cv_unsleep, cv_change_pri 106 }; 107 108 /* ARGSUSED */ 109 void 110 cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) 111 { 112 ((condvar_impl_t *)cvp)->cv_waiters = 0; 113 } 114 115 /* 116 * cv_destroy is not currently needed, but is part of the DDI. 117 * This is in case cv_init ever needs to allocate something for a cv. 118 */ 119 /* ARGSUSED */ 120 void 121 cv_destroy(kcondvar_t *cvp) 122 { 123 ASSERT((((condvar_impl_t *)cvp)->cv_waiters & CV_WAITERS_MASK) == 0); 124 } 125 126 /* 127 * The cv_block() function blocks a thread on a condition variable 128 * by putting it in a hashed sleep queue associated with the 129 * synchronization object. 130 * 131 * Threads are taken off the hashed sleep queues via calls to 132 * cv_signal(), cv_broadcast(), or cv_unsleep(). 133 */ 134 static void 135 cv_block(condvar_impl_t *cvp) 136 { 137 kthread_t *t = curthread; 138 klwp_t *lwp = ttolwp(t); 139 sleepq_head_t *sqh; 140 141 ASSERT(THREAD_LOCK_HELD(t)); 142 ASSERT(t != CPU->cpu_idle_thread); 143 ASSERT(CPU_ON_INTR(CPU) == 0); 144 ASSERT(t->t_wchan0 == NULL && t->t_wchan == NULL); 145 ASSERT(t->t_state == TS_ONPROC); 146 147 t->t_schedflag &= ~TS_SIGNALLED; 148 CL_SLEEP(t); /* assign kernel priority */ 149 t->t_wchan = (caddr_t)cvp; 150 t->t_sobj_ops = &cv_sobj_ops; 151 DTRACE_SCHED(sleep); 152 153 /* 154 * The check for t_intr is to avoid doing the 155 * account for an interrupt thread on the still-pinned 156 * lwp's statistics. 157 */ 158 if (lwp != NULL && t->t_intr == NULL) { 159 lwp->lwp_ru.nvcsw++; 160 (void) new_mstate(t, LMS_SLEEP); 161 } 162 163 sqh = SQHASH(cvp); 164 disp_lock_enter_high(&sqh->sq_lock); 165 if (cvp->cv_waiters < CV_MAX_WAITERS) 166 cvp->cv_waiters++; 167 ASSERT(cvp->cv_waiters <= CV_MAX_WAITERS); 168 THREAD_SLEEP(t, &sqh->sq_lock); 169 sleepq_insert(&sqh->sq_queue, t); 170 /* 171 * THREAD_SLEEP() moves curthread->t_lockp to point to the 172 * lock sqh->sq_lock. This lock is later released by the caller 173 * when it calls thread_unlock() on curthread. 174 */ 175 } 176 177 #define cv_block_sig(t, cvp) \ 178 { (t)->t_flag |= T_WAKEABLE; cv_block(cvp); } 179 180 /* 181 * Block on the indicated condition variable and release the 182 * associated kmutex while blocked. 183 */ 184 void 185 cv_wait(kcondvar_t *cvp, kmutex_t *mp) 186 { 187 if (panicstr) 188 return; 189 190 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 191 thread_lock(curthread); /* lock the thread */ 192 cv_block((condvar_impl_t *)cvp); 193 thread_unlock_nopreempt(curthread); /* unlock the waiters field */ 194 mutex_exit(mp); 195 swtch(); 196 mutex_enter(mp); 197 } 198 199 /* 200 * Same as cv_wait except the thread will unblock at 'tim' 201 * (an absolute time) if it hasn't already unblocked. 202 * 203 * Returns the amount of time left from the original 'tim' value 204 * when it was unblocked. 205 */ 206 clock_t 207 cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t tim) 208 { 209 kthread_t *t = curthread; 210 timeout_id_t id; 211 clock_t timeleft; 212 int signalled; 213 214 if (panicstr) 215 return (-1); 216 217 timeleft = tim - lbolt; 218 if (timeleft <= 0) 219 return (-1); 220 id = realtime_timeout((void (*)(void *))setrun, t, timeleft); 221 thread_lock(t); /* lock the thread */ 222 cv_block((condvar_impl_t *)cvp); 223 thread_unlock_nopreempt(t); 224 mutex_exit(mp); 225 if ((tim - lbolt) <= 0) /* allow for wrap */ 226 setrun(t); 227 swtch(); 228 signalled = (t->t_schedflag & TS_SIGNALLED); 229 /* 230 * Get the time left. untimeout() returns -1 if the timeout has 231 * occured or the time remaining. If the time remaining is zero, 232 * the timeout has occured between when we were awoken and 233 * we called untimeout. We will treat this as if the timeout 234 * has occured and set timeleft to -1. 235 */ 236 timeleft = untimeout(id); 237 mutex_enter(mp); 238 if (timeleft <= 0) { 239 timeleft = -1; 240 if (signalled) /* avoid consuming the cv_signal() */ 241 cv_signal(cvp); 242 } 243 return (timeleft); 244 } 245 246 int 247 cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) 248 { 249 kthread_t *t = curthread; 250 proc_t *p = ttoproc(t); 251 klwp_t *lwp = ttolwp(t); 252 int rval = 1; 253 int signalled = 0; 254 255 if (panicstr) 256 return (rval); 257 258 /* 259 * The check for t_intr is to catch an interrupt thread 260 * that has not yet unpinned the thread underneath. 261 */ 262 if (lwp == NULL || t->t_intr) { 263 cv_wait(cvp, mp); 264 return (rval); 265 } 266 267 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 268 lwp->lwp_asleep = 1; 269 lwp->lwp_sysabort = 0; 270 thread_lock(t); 271 cv_block_sig(t, (condvar_impl_t *)cvp); 272 thread_unlock_nopreempt(t); 273 mutex_exit(mp); 274 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 275 setrun(t); 276 /* ASSERT(no locks are held) */ 277 swtch(); 278 signalled = (t->t_schedflag & TS_SIGNALLED); 279 t->t_flag &= ~T_WAKEABLE; 280 mutex_enter(mp); 281 if (ISSIG_PENDING(t, lwp, p)) { 282 mutex_exit(mp); 283 if (issig(FORREAL)) 284 rval = 0; 285 mutex_enter(mp); 286 } 287 if (lwp->lwp_sysabort || MUSTRETURN(p, t)) 288 rval = 0; 289 lwp->lwp_asleep = 0; 290 lwp->lwp_sysabort = 0; 291 if (rval == 0 && signalled) /* avoid consuming the cv_signal() */ 292 cv_signal(cvp); 293 return (rval); 294 } 295 296 /* 297 * Returns: 298 * Function result in order of presidence: 299 * 0 if a signal was received 300 * -1 if timeout occured 301 * >0 if awakened via cv_signal() or cv_broadcast(). 302 * (returns time remaining) 303 * 304 * cv_timedwait_sig() is now part of the DDI. 305 */ 306 clock_t 307 cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t tim) 308 { 309 kthread_t *t = curthread; 310 proc_t *p = ttoproc(t); 311 klwp_t *lwp = ttolwp(t); 312 timeout_id_t id; 313 clock_t rval = 1; 314 clock_t timeleft; 315 int signalled = 0; 316 317 if (panicstr) 318 return (rval); 319 320 /* 321 * If there is no lwp, then we don't need to wait for a signal. 322 * The check for t_intr is to catch an interrupt thread 323 * that has not yet unpinned the thread underneath. 324 */ 325 if (lwp == NULL || t->t_intr) 326 return (cv_timedwait(cvp, mp, tim)); 327 328 /* 329 * If tim is less than or equal to lbolt, then the timeout 330 * has already occured. So just check to see if there is a signal 331 * pending. If so return 0 indicating that there is a signal pending. 332 * Else return -1 indicating that the timeout occured. No need to 333 * wait on anything. 334 */ 335 timeleft = tim - lbolt; 336 if (timeleft <= 0) { 337 lwp->lwp_asleep = 1; 338 lwp->lwp_sysabort = 0; 339 rval = -1; 340 goto out; 341 } 342 343 /* 344 * Set the timeout and wait. 345 */ 346 id = realtime_timeout((void (*)(void *))setrun, t, timeleft); 347 lwp->lwp_asleep = 1; 348 lwp->lwp_sysabort = 0; 349 thread_lock(t); 350 cv_block_sig(t, (condvar_impl_t *)cvp); 351 thread_unlock_nopreempt(t); 352 mutex_exit(mp); 353 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || (tim - lbolt <= 0)) 354 setrun(t); 355 /* ASSERT(no locks are held) */ 356 swtch(); 357 signalled = (t->t_schedflag & TS_SIGNALLED); 358 t->t_flag &= ~T_WAKEABLE; 359 mutex_enter(mp); 360 361 /* 362 * Untimeout the thread. untimeout() returns -1 if the timeout has 363 * occured or the time remaining. If the time remaining is zero, 364 * the timeout has occured between when we were awoken and 365 * we called untimeout. We will treat this as if the timeout 366 * has occured and set rval to -1. 367 */ 368 rval = untimeout(id); 369 if (rval <= 0) 370 rval = -1; 371 372 /* 373 * Check to see if a signal is pending. If so, regardless of whether 374 * or not we were awoken due to the signal, the signal is now pending 375 * and a return of 0 has the highest priority. 376 */ 377 out: 378 if (ISSIG_PENDING(t, lwp, p)) { 379 mutex_exit(mp); 380 if (issig(FORREAL)) 381 rval = 0; 382 mutex_enter(mp); 383 } 384 if (lwp->lwp_sysabort || MUSTRETURN(p, t)) 385 rval = 0; 386 lwp->lwp_asleep = 0; 387 lwp->lwp_sysabort = 0; 388 if (rval <= 0 && signalled) /* avoid consuming the cv_signal() */ 389 cv_signal(cvp); 390 return (rval); 391 } 392 393 /* 394 * Like cv_wait_sig_swap but allows the caller to indicate (with a 395 * non-NULL sigret) that they will take care of signalling the cv 396 * after wakeup, if necessary. This is a vile hack that should only 397 * be used when no other option is available; almost all callers 398 * should just use cv_wait_sig_swap (which takes care of the cv_signal 399 * stuff automatically) instead. 400 */ 401 int 402 cv_wait_sig_swap_core(kcondvar_t *cvp, kmutex_t *mp, int *sigret) 403 { 404 kthread_t *t = curthread; 405 proc_t *p = ttoproc(t); 406 klwp_t *lwp = ttolwp(t); 407 int rval = 1; 408 int signalled = 0; 409 410 if (panicstr) 411 return (rval); 412 413 /* 414 * The check for t_intr is to catch an interrupt thread 415 * that has not yet unpinned the thread underneath. 416 */ 417 if (lwp == NULL || t->t_intr) { 418 cv_wait(cvp, mp); 419 return (rval); 420 } 421 422 lwp->lwp_asleep = 1; 423 lwp->lwp_sysabort = 0; 424 thread_lock(t); 425 t->t_kpri_req = 0; /* don't need kernel priority */ 426 cv_block_sig(t, (condvar_impl_t *)cvp); 427 /* I can be swapped now */ 428 curthread->t_schedflag &= ~TS_DONT_SWAP; 429 thread_unlock_nopreempt(t); 430 mutex_exit(mp); 431 if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t)) 432 setrun(t); 433 /* ASSERT(no locks are held) */ 434 swtch(); 435 signalled = (t->t_schedflag & TS_SIGNALLED); 436 t->t_flag &= ~T_WAKEABLE; 437 /* TS_DONT_SWAP set by disp() */ 438 ASSERT(curthread->t_schedflag & TS_DONT_SWAP); 439 mutex_enter(mp); 440 if (ISSIG_PENDING(t, lwp, p)) { 441 mutex_exit(mp); 442 if (issig(FORREAL)) 443 rval = 0; 444 mutex_enter(mp); 445 } 446 if (lwp->lwp_sysabort || MUSTRETURN(p, t)) 447 rval = 0; 448 lwp->lwp_asleep = 0; 449 lwp->lwp_sysabort = 0; 450 if (rval == 0) { 451 if (sigret != NULL) 452 *sigret = signalled; /* just tell the caller */ 453 else if (signalled) 454 cv_signal(cvp); /* avoid consuming the cv_signal() */ 455 } 456 return (rval); 457 } 458 459 /* 460 * Same as cv_wait_sig but the thread can be swapped out while waiting. 461 * This should only be used when we know we aren't holding any locks. 462 */ 463 int 464 cv_wait_sig_swap(kcondvar_t *cvp, kmutex_t *mp) 465 { 466 return (cv_wait_sig_swap_core(cvp, mp, NULL)); 467 } 468 469 void 470 cv_signal(kcondvar_t *cvp) 471 { 472 condvar_impl_t *cp = (condvar_impl_t *)cvp; 473 474 /* make sure the cv_waiters field looks sane */ 475 ASSERT(cp->cv_waiters <= CV_MAX_WAITERS); 476 if (cp->cv_waiters > 0) { 477 sleepq_head_t *sqh = SQHASH(cp); 478 disp_lock_enter(&sqh->sq_lock); 479 ASSERT(CPU_ON_INTR(CPU) == 0); 480 if (cp->cv_waiters & CV_WAITERS_MASK) { 481 kthread_t *t; 482 cp->cv_waiters--; 483 t = sleepq_wakeone_chan(&sqh->sq_queue, cp); 484 /* 485 * If cv_waiters is non-zero (and less than 486 * CV_MAX_WAITERS) there should be a thread 487 * in the queue. 488 */ 489 ASSERT(t != NULL); 490 } else if (sleepq_wakeone_chan(&sqh->sq_queue, cp) == NULL) { 491 cp->cv_waiters = 0; 492 } 493 disp_lock_exit(&sqh->sq_lock); 494 } 495 } 496 497 void 498 cv_broadcast(kcondvar_t *cvp) 499 { 500 condvar_impl_t *cp = (condvar_impl_t *)cvp; 501 502 /* make sure the cv_waiters field looks sane */ 503 ASSERT(cp->cv_waiters <= CV_MAX_WAITERS); 504 if (cp->cv_waiters > 0) { 505 sleepq_head_t *sqh = SQHASH(cp); 506 disp_lock_enter(&sqh->sq_lock); 507 ASSERT(CPU_ON_INTR(CPU) == 0); 508 sleepq_wakeall_chan(&sqh->sq_queue, cp); 509 cp->cv_waiters = 0; 510 disp_lock_exit(&sqh->sq_lock); 511 } 512 } 513 514 /* 515 * Same as cv_wait(), but wakes up (after wakeup_time milliseconds) to check 516 * for requests to stop, like cv_wait_sig() but without dealing with signals. 517 * This is a horrible kludge. It is evil. It is vile. It is swill. 518 * If your code has to call this function then your code is the same. 519 */ 520 void 521 cv_wait_stop(kcondvar_t *cvp, kmutex_t *mp, int wakeup_time) 522 { 523 kthread_t *t = curthread; 524 klwp_t *lwp = ttolwp(t); 525 proc_t *p = ttoproc(t); 526 timeout_id_t id; 527 clock_t tim; 528 529 if (panicstr) 530 return; 531 532 /* 533 * If there is no lwp, then we don't need to eventually stop it 534 * The check for t_intr is to catch an interrupt thread 535 * that has not yet unpinned the thread underneath. 536 */ 537 if (lwp == NULL || t->t_intr) { 538 cv_wait(cvp, mp); 539 return; 540 } 541 542 /* 543 * Wakeup in wakeup_time milliseconds, i.e., human time. 544 */ 545 tim = lbolt + MSEC_TO_TICK(wakeup_time); 546 id = realtime_timeout((void (*)(void *))setrun, t, tim - lbolt); 547 thread_lock(t); /* lock the thread */ 548 cv_block((condvar_impl_t *)cvp); 549 thread_unlock_nopreempt(t); 550 mutex_exit(mp); 551 /* ASSERT(no locks are held); */ 552 if ((tim - lbolt) <= 0) /* allow for wrap */ 553 setrun(t); 554 swtch(); 555 (void) untimeout(id); 556 557 /* 558 * Check for reasons to stop, if lwp_nostop is not true. 559 * See issig_forreal() for explanations of the various stops. 560 */ 561 mutex_enter(&p->p_lock); 562 while (lwp->lwp_nostop == 0 && !(p->p_flag & SEXITLWPS)) { 563 /* 564 * Hold the lwp here for watchpoint manipulation. 565 */ 566 if (t->t_proc_flag & TP_PAUSE) { 567 stop(PR_SUSPENDED, SUSPEND_PAUSE); 568 continue; 569 } 570 /* 571 * System checkpoint. 572 */ 573 if (t->t_proc_flag & TP_CHKPT) { 574 stop(PR_CHECKPOINT, 0); 575 continue; 576 } 577 /* 578 * Honor fork1(), watchpoint activity (remapping a page), 579 * and lwp_suspend() requests. 580 */ 581 if ((p->p_flag & (SHOLDFORK1|SHOLDWATCH)) || 582 (t->t_proc_flag & TP_HOLDLWP)) { 583 stop(PR_SUSPENDED, SUSPEND_NORMAL); 584 continue; 585 } 586 /* 587 * Honor /proc requested stop. 588 */ 589 if (t->t_proc_flag & TP_PRSTOP) { 590 stop(PR_REQUESTED, 0); 591 } 592 /* 593 * If some lwp in the process has already stopped 594 * showing PR_JOBCONTROL, stop in sympathy with it. 595 */ 596 if (p->p_stopsig && t != p->p_agenttp) { 597 stop(PR_JOBCONTROL, p->p_stopsig); 598 continue; 599 } 600 break; 601 } 602 mutex_exit(&p->p_lock); 603 mutex_enter(mp); 604 } 605 606 /* 607 * Like cv_timedwait_sig(), but takes an absolute hires future time 608 * rather than a future time in clock ticks. Will not return showing 609 * that a timeout occurred until the future time is passed. 610 * If 'when' is a NULL pointer, no timeout will occur. 611 * Returns: 612 * Function result in order of presidence: 613 * 0 if a signal was received 614 * -1 if timeout occured 615 * >0 if awakened via cv_signal() or cv_broadcast() 616 * or by a spurious wakeup. 617 * (might return time remaining) 618 * As a special test, if someone abruptly resets the system time 619 * (but not through adjtime(2); drifting of the clock is allowed and 620 * expected [see timespectohz_adj()]), then we force a return of -1 621 * so the caller can return a premature timeout to the calling process 622 * so it can reevaluate the situation in light of the new system time. 623 * (The system clock has been reset if timecheck != timechanged.) 624 */ 625 int 626 cv_waituntil_sig(kcondvar_t *cvp, kmutex_t *mp, 627 timestruc_t *when, int timecheck) 628 { 629 timestruc_t now; 630 timestruc_t delta; 631 int rval; 632 633 if (when == NULL) 634 return (cv_wait_sig_swap(cvp, mp)); 635 636 gethrestime(&now); 637 delta = *when; 638 timespecsub(&delta, &now); 639 if (delta.tv_sec < 0 || (delta.tv_sec == 0 && delta.tv_nsec == 0)) { 640 /* 641 * We have already reached the absolute future time. 642 * Call cv_timedwait_sig() just to check for signals. 643 * We will return immediately with either 0 or -1. 644 */ 645 rval = cv_timedwait_sig(cvp, mp, lbolt); 646 } else { 647 if (timecheck == timechanged) { 648 rval = cv_timedwait_sig(cvp, mp, 649 lbolt + timespectohz_adj(when, now)); 650 } else { 651 /* 652 * Someone reset the system time; 653 * just force an immediate timeout. 654 */ 655 rval = -1; 656 } 657 if (rval == -1 && timecheck == timechanged) { 658 /* 659 * Even though cv_timedwait_sig() returned showing a 660 * timeout, the future time may not have passed yet. 661 * If not, change rval to indicate a normal wakeup. 662 */ 663 gethrestime(&now); 664 delta = *when; 665 timespecsub(&delta, &now); 666 if (delta.tv_sec > 0 || (delta.tv_sec == 0 && 667 delta.tv_nsec > 0)) 668 rval = 1; 669 } 670 } 671 return (rval); 672 } 673