1 /*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include "opt_ktrace.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/condvar.h> 39 #include <sys/signalvar.h> 40 #include <sys/resourcevar.h> 41 #ifdef KTRACE 42 #include <sys/uio.h> 43 #include <sys/ktrace.h> 44 #endif 45 46 /* 47 * Common sanity checks for cv_wait* functions. 48 */ 49 #define CV_ASSERT(cvp, mp, td) do { \ 50 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 51 KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \ 52 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 53 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 54 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 55 } while (0) 56 57 #ifdef INVARIANTS 58 #define CV_WAIT_VALIDATE(cvp, mp) do { \ 59 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 60 /* Only waiter. */ \ 61 (cvp)->cv_mtx = (mp); \ 62 } else { \ 63 /* \ 64 * Other waiter; assert that we're using the \ 65 * same mutex. \ 66 */ \ 67 KASSERT((cvp)->cv_mtx == (mp), \ 68 ("%s: Multiple mutexes", __func__)); \ 69 } \ 70 } while (0) 71 #define CV_SIGNAL_VALIDATE(cvp) do { \ 72 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 73 KASSERT(mtx_owned((cvp)->cv_mtx), \ 74 ("%s: Mutex not owned", __func__)); \ 75 } \ 76 } while (0) 77 #else 78 #define CV_WAIT_VALIDATE(cvp, mp) 79 #define CV_SIGNAL_VALIDATE(cvp) 80 #endif 81 82 static void cv_timedwait_end(void *arg); 83 static void cv_check_upcall(struct thread *td); 84 85 /* 86 * Initialize a condition variable. Must be called before use. 87 */ 88 void 89 cv_init(struct cv *cvp, const char *desc) 90 { 91 92 TAILQ_INIT(&cvp->cv_waitq); 93 cvp->cv_mtx = NULL; 94 cvp->cv_description = desc; 95 } 96 97 /* 98 * Destroy a condition variable. The condition variable must be re-initialized 99 * in order to be re-used. 100 */ 101 void 102 cv_destroy(struct cv *cvp) 103 { 104 105 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 106 } 107 108 /* 109 * Common code for cv_wait* functions. All require sched_lock. 110 */ 111 112 /* 113 * Decide if we need to queue an upcall. 114 * This is copied from msleep(), perhaps this should be a common function. 115 */ 116 static void 117 cv_check_upcall(struct thread *td) 118 { 119 120 /* 121 * If we are capable of async syscalls and there isn't already 122 * another one ready to return, start a new thread 123 * and queue it as ready to run. Note that there is danger here 124 * because we need to make sure that we don't sleep allocating 125 * the thread (recursion here might be bad). 126 * Hence the TDF_INMSLEEP flag. 127 */ 128 if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox && 129 (td->td_flags & TDF_INMSLEEP) == 0) { 130 /* 131 * If we have no queued work to do, 132 * upcall to the UTS to see if it has more work. 133 * We don't need to upcall now, just queue it. 134 */ 135 if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) { 136 /* Don't recurse here! */ 137 td->td_flags |= TDF_INMSLEEP; 138 thread_schedule_upcall(td, td->td_kse); 139 td->td_flags &= ~TDF_INMSLEEP; 140 } 141 } 142 } 143 144 /* 145 * Switch context. 146 */ 147 static __inline void 148 cv_switch(struct thread *td) 149 { 150 151 td->td_state = TDS_SLP; 152 td->td_proc->p_stats->p_ru.ru_nvcsw++; 153 cv_check_upcall(td); 154 mi_switch(); 155 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 156 td->td_proc->p_pid, td->td_proc->p_comm); 157 } 158 159 /* 160 * Switch context, catching signals. 161 */ 162 static __inline int 163 cv_switch_catch(struct thread *td) 164 { 165 struct proc *p; 166 int sig; 167 168 /* 169 * We put ourselves on the sleep queue and start our timeout before 170 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 171 * both) could occur while we were stopped. A SIGCONT would cause us to 172 * be marked as TDS_SLP without resuming us, thus we must be ready for 173 * sleep when cursig is called. If the wakeup happens while we're 174 * stopped, td->td_wchan will be 0 upon return from cursig. 175 */ 176 td->td_flags |= TDF_SINTR; 177 mtx_unlock_spin(&sched_lock); 178 p = td->td_proc; 179 PROC_LOCK(p); 180 sig = cursig(td); /* XXXKSE */ 181 if (thread_suspend_check(1)) 182 sig = SIGSTOP; 183 mtx_lock_spin(&sched_lock); 184 PROC_UNLOCK(p); 185 if (sig != 0) { 186 if (td->td_wchan != NULL) 187 cv_waitq_remove(td); 188 td->td_state = TDS_RUNNING; /* XXXKSE */ 189 } else if (td->td_wchan != NULL) { 190 cv_switch(td); 191 } 192 td->td_flags &= ~TDF_SINTR; 193 194 return sig; 195 } 196 197 /* 198 * Add a thread to the wait queue of a condition variable. 199 */ 200 static __inline void 201 cv_waitq_add(struct cv *cvp, struct thread *td) 202 { 203 204 td->td_flags |= TDF_CVWAITQ; 205 td->td_wchan = cvp; 206 td->td_wmesg = cvp->cv_description; 207 td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ 208 td->td_base_pri = td->td_priority; 209 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 210 td->td_proc->p_pid, td->td_proc->p_comm); 211 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 212 } 213 214 /* 215 * Wait on a condition variable. The current thread is placed on the condition 216 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 217 * condition variable will resume the thread. The mutex is released before 218 * sleeping and will be held on return. It is recommended that the mutex be 219 * held when cv_signal or cv_broadcast are called. 220 */ 221 void 222 cv_wait(struct cv *cvp, struct mtx *mp) 223 { 224 struct thread *td; 225 WITNESS_SAVE_DECL(mp); 226 227 td = curthread; 228 #ifdef KTRACE 229 if (KTRPOINT(td, KTR_CSW)) 230 ktrcsw(1, 0); 231 #endif 232 CV_ASSERT(cvp, mp, td); 233 WITNESS_SLEEP(0, &mp->mtx_object); 234 WITNESS_SAVE(&mp->mtx_object, mp); 235 236 if (cold ) { 237 /* 238 * During autoconfiguration, just give interrupts 239 * a chance, then just return. Don't run any other 240 * thread or panic below, in case this is the idle 241 * process and already asleep. 242 */ 243 return; 244 } 245 246 mtx_lock_spin(&sched_lock); 247 248 CV_WAIT_VALIDATE(cvp, mp); 249 250 DROP_GIANT(); 251 mtx_unlock(mp); 252 253 cv_waitq_add(cvp, td); 254 cv_switch(td); 255 256 mtx_unlock_spin(&sched_lock); 257 #ifdef KTRACE 258 if (KTRPOINT(td, KTR_CSW)) 259 ktrcsw(0, 0); 260 #endif 261 PICKUP_GIANT(); 262 mtx_lock(mp); 263 WITNESS_RESTORE(&mp->mtx_object, mp); 264 } 265 266 /* 267 * Wait on a condition variable, allowing interruption by signals. Return 0 if 268 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 269 * a signal was caught. If ERESTART is returned the system call should be 270 * restarted if possible. 271 */ 272 int 273 cv_wait_sig(struct cv *cvp, struct mtx *mp) 274 { 275 struct thread *td; 276 struct proc *p; 277 int rval; 278 int sig; 279 WITNESS_SAVE_DECL(mp); 280 281 td = curthread; 282 p = td->td_proc; 283 rval = 0; 284 #ifdef KTRACE 285 if (KTRPOINT(td, KTR_CSW)) 286 ktrcsw(1, 0); 287 #endif 288 CV_ASSERT(cvp, mp, td); 289 WITNESS_SLEEP(0, &mp->mtx_object); 290 WITNESS_SAVE(&mp->mtx_object, mp); 291 292 if (cold || panicstr) { 293 /* 294 * After a panic, or during autoconfiguration, just give 295 * interrupts a chance, then just return; don't run any other 296 * procs or panic below, in case this is the idle process and 297 * already asleep. 298 */ 299 return 0; 300 } 301 302 mtx_lock_spin(&sched_lock); 303 304 CV_WAIT_VALIDATE(cvp, mp); 305 306 DROP_GIANT(); 307 mtx_unlock(mp); 308 309 cv_waitq_add(cvp, td); 310 sig = cv_switch_catch(td); 311 312 mtx_unlock_spin(&sched_lock); 313 314 PROC_LOCK(p); 315 if (sig == 0) 316 sig = cursig(td); /* XXXKSE */ 317 if (sig != 0) { 318 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 319 rval = EINTR; 320 else 321 rval = ERESTART; 322 } 323 PROC_UNLOCK(p); 324 if (p->p_flag & P_WEXIT) 325 rval = EINTR; 326 327 #ifdef KTRACE 328 if (KTRPOINT(td, KTR_CSW)) 329 ktrcsw(0, 0); 330 #endif 331 PICKUP_GIANT(); 332 mtx_lock(mp); 333 WITNESS_RESTORE(&mp->mtx_object, mp); 334 335 return (rval); 336 } 337 338 /* 339 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 340 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 341 * expires. 342 */ 343 int 344 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 345 { 346 struct thread *td; 347 int rval; 348 WITNESS_SAVE_DECL(mp); 349 350 td = curthread; 351 rval = 0; 352 #ifdef KTRACE 353 if (KTRPOINT(td, KTR_CSW)) 354 ktrcsw(1, 0); 355 #endif 356 CV_ASSERT(cvp, mp, td); 357 WITNESS_SLEEP(0, &mp->mtx_object); 358 WITNESS_SAVE(&mp->mtx_object, mp); 359 360 if (cold || panicstr) { 361 /* 362 * After a panic, or during autoconfiguration, just give 363 * interrupts a chance, then just return; don't run any other 364 * thread or panic below, in case this is the idle process and 365 * already asleep. 366 */ 367 return 0; 368 } 369 370 mtx_lock_spin(&sched_lock); 371 372 CV_WAIT_VALIDATE(cvp, mp); 373 374 DROP_GIANT(); 375 mtx_unlock(mp); 376 377 cv_waitq_add(cvp, td); 378 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 379 cv_switch(td); 380 381 if (td->td_flags & TDF_TIMEOUT) { 382 td->td_flags &= ~TDF_TIMEOUT; 383 rval = EWOULDBLOCK; 384 } else if (td->td_flags & TDF_TIMOFAIL) 385 td->td_flags &= ~TDF_TIMOFAIL; 386 else if (callout_stop(&td->td_slpcallout) == 0) { 387 /* 388 * Work around race with cv_timedwait_end similar to that 389 * between msleep and endtsleep. 390 * Go back to sleep. 391 */ 392 td->td_flags |= TDF_TIMEOUT; 393 td->td_state = TDS_SLP; 394 td->td_proc->p_stats->p_ru.ru_nivcsw++; 395 mi_switch(); 396 } 397 398 if (td->td_proc->p_flag & P_WEXIT) 399 rval = EWOULDBLOCK; 400 mtx_unlock_spin(&sched_lock); 401 #ifdef KTRACE 402 if (KTRPOINT(td, KTR_CSW)) 403 ktrcsw(0, 0); 404 #endif 405 PICKUP_GIANT(); 406 mtx_lock(mp); 407 WITNESS_RESTORE(&mp->mtx_object, mp); 408 409 return (rval); 410 } 411 412 /* 413 * Wait on a condition variable for at most timo/hz seconds, allowing 414 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 415 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 416 * a signal was caught. 417 */ 418 int 419 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 420 { 421 struct thread *td; 422 struct proc *p; 423 int rval; 424 int sig; 425 WITNESS_SAVE_DECL(mp); 426 427 td = curthread; 428 p = td->td_proc; 429 rval = 0; 430 #ifdef KTRACE 431 if (KTRPOINT(td, KTR_CSW)) 432 ktrcsw(1, 0); 433 #endif 434 CV_ASSERT(cvp, mp, td); 435 WITNESS_SLEEP(0, &mp->mtx_object); 436 WITNESS_SAVE(&mp->mtx_object, mp); 437 438 if (cold || panicstr) { 439 /* 440 * After a panic, or during autoconfiguration, just give 441 * interrupts a chance, then just return; don't run any other 442 * thread or panic below, in case this is the idle process and 443 * already asleep. 444 */ 445 return 0; 446 } 447 448 mtx_lock_spin(&sched_lock); 449 450 CV_WAIT_VALIDATE(cvp, mp); 451 452 DROP_GIANT(); 453 mtx_unlock(mp); 454 455 cv_waitq_add(cvp, td); 456 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 457 sig = cv_switch_catch(td); 458 459 if (td->td_flags & TDF_TIMEOUT) { 460 td->td_flags &= ~TDF_TIMEOUT; 461 rval = EWOULDBLOCK; 462 } else if (td->td_flags & TDF_TIMOFAIL) 463 td->td_flags &= ~TDF_TIMOFAIL; 464 else if (callout_stop(&td->td_slpcallout) == 0) { 465 /* 466 * Work around race with cv_timedwait_end similar to that 467 * between msleep and endtsleep. 468 * Go back to sleep. 469 */ 470 td->td_flags |= TDF_TIMEOUT; 471 td->td_state = TDS_SLP; 472 td->td_proc->p_stats->p_ru.ru_nivcsw++; 473 mi_switch(); 474 } 475 mtx_unlock_spin(&sched_lock); 476 477 PROC_LOCK(p); 478 if (sig == 0) 479 sig = cursig(td); 480 if (sig != 0) { 481 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 482 rval = EINTR; 483 else 484 rval = ERESTART; 485 } 486 PROC_UNLOCK(p); 487 488 if (p->p_flag & P_WEXIT) 489 rval = EINTR; 490 491 #ifdef KTRACE 492 if (KTRPOINT(td, KTR_CSW)) 493 ktrcsw(0, 0); 494 #endif 495 PICKUP_GIANT(); 496 mtx_lock(mp); 497 WITNESS_RESTORE(&mp->mtx_object, mp); 498 499 return (rval); 500 } 501 502 /* 503 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 504 * called with sched_lock held. 505 */ 506 static __inline void 507 cv_wakeup(struct cv *cvp) 508 { 509 struct thread *td; 510 511 mtx_assert(&sched_lock, MA_OWNED); 512 td = TAILQ_FIRST(&cvp->cv_waitq); 513 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 514 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 515 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 516 td->td_flags &= ~TDF_CVWAITQ; 517 td->td_wchan = 0; 518 if (td->td_state == TDS_SLP) { 519 /* OPTIMIZED EXPANSION OF setrunnable(td); */ 520 CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)", 521 td, td->td_proc->p_pid, td->td_proc->p_comm); 522 if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */ 523 updatepri(td); 524 td->td_ksegrp->kg_slptime = 0; 525 if (td->td_proc->p_sflag & PS_INMEM) { 526 setrunqueue(td); 527 maybe_resched(td); 528 } else { 529 td->td_state = TDS_SWAPPED; 530 if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) { 531 td->td_proc->p_sflag |= PS_SWAPINREQ; 532 wakeup(&proc0); 533 } 534 } 535 /* END INLINE EXPANSION */ 536 } 537 } 538 539 /* 540 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 541 * the swapper if the process is not in memory, so that it can bring the 542 * sleeping process in. Note that this may also result in additional threads 543 * being made runnable. Should be called with the same mutex as was passed to 544 * cv_wait held. 545 */ 546 void 547 cv_signal(struct cv *cvp) 548 { 549 550 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 551 mtx_lock_spin(&sched_lock); 552 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 553 CV_SIGNAL_VALIDATE(cvp); 554 cv_wakeup(cvp); 555 } 556 mtx_unlock_spin(&sched_lock); 557 } 558 559 /* 560 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 561 * Should be called with the same mutex as was passed to cv_wait held. 562 */ 563 void 564 cv_broadcast(struct cv *cvp) 565 { 566 567 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 568 mtx_lock_spin(&sched_lock); 569 CV_SIGNAL_VALIDATE(cvp); 570 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 571 cv_wakeup(cvp); 572 mtx_unlock_spin(&sched_lock); 573 } 574 575 /* 576 * Remove a thread from the wait queue of its condition variable. This may be 577 * called externally. 578 */ 579 void 580 cv_waitq_remove(struct thread *td) 581 { 582 struct cv *cvp; 583 584 mtx_lock_spin(&sched_lock); 585 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 586 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 587 td->td_flags &= ~TDF_CVWAITQ; 588 td->td_wchan = NULL; 589 } 590 mtx_unlock_spin(&sched_lock); 591 } 592 593 /* 594 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 595 * its timeout flag. 596 */ 597 static void 598 cv_timedwait_end(void *arg) 599 { 600 struct thread *td; 601 602 td = arg; 603 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid, 604 td->td_proc->p_comm); 605 mtx_lock_spin(&sched_lock); 606 if (td->td_flags & TDF_TIMEOUT) { 607 td->td_flags &= ~TDF_TIMEOUT; 608 if (td->td_proc->p_sflag & PS_INMEM) { 609 setrunqueue(td); 610 maybe_resched(td); 611 } else { 612 td->td_state = TDS_SWAPPED; 613 if ((td->td_proc->p_sflag & PS_SWAPPINGIN) == 0) { 614 td->td_proc->p_sflag |= PS_SWAPINREQ; 615 wakeup(&proc0); 616 } 617 } 618 } else if (td->td_wchan != NULL) { 619 if (td->td_state == TDS_SLP) /* XXXKSE */ 620 setrunnable(td); 621 else 622 cv_waitq_remove(td); 623 td->td_flags |= TDF_TIMEOUT; 624 } else 625 td->td_flags |= TDF_TIMOFAIL; 626 mtx_unlock_spin(&sched_lock); 627 } 628 629 /* 630 * For now only abort interruptable waits. 631 * The others will have to either complete on their own or have a timeout. 632 */ 633 void 634 cv_abort(struct thread *td) 635 { 636 637 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 638 td->td_proc->p_pid, 639 td->td_proc->p_comm); 640 mtx_lock_spin(&sched_lock); 641 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 642 if (td->td_wchan != NULL) { 643 if (td->td_state == TDS_SLP) 644 setrunnable(td); 645 else 646 cv_waitq_remove(td); 647 } 648 } 649 mtx_unlock_spin(&sched_lock); 650 } 651 652