1 /*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include "opt_ktrace.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/condvar.h> 39 #include <sys/signalvar.h> 40 #include <sys/resourcevar.h> 41 #ifdef KTRACE 42 #include <sys/uio.h> 43 #include <sys/ktrace.h> 44 #endif 45 46 /* 47 * Common sanity checks for cv_wait* functions. 48 */ 49 #define CV_ASSERT(cvp, mp, td) do { \ 50 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 51 KASSERT((td)->td_state == TDS_RUNNING, ("%s: not TDS_RUNNING", __func__)); \ 52 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 53 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 54 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 55 } while (0) 56 57 #ifdef INVARIANTS 58 #define CV_WAIT_VALIDATE(cvp, mp) do { \ 59 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 60 /* Only waiter. */ \ 61 (cvp)->cv_mtx = (mp); \ 62 } else { \ 63 /* \ 64 * Other waiter; assert that we're using the \ 65 * same mutex. \ 66 */ \ 67 KASSERT((cvp)->cv_mtx == (mp), \ 68 ("%s: Multiple mutexes", __func__)); \ 69 } \ 70 } while (0) 71 #define CV_SIGNAL_VALIDATE(cvp) do { \ 72 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 73 KASSERT(mtx_owned((cvp)->cv_mtx), \ 74 ("%s: Mutex not owned", __func__)); \ 75 } \ 76 } while (0) 77 #else 78 #define CV_WAIT_VALIDATE(cvp, mp) 79 #define CV_SIGNAL_VALIDATE(cvp) 80 #endif 81 82 static void cv_timedwait_end(void *arg); 83 static void cv_check_upcall(struct thread *td); 84 85 /* 86 * Initialize a condition variable. Must be called before use. 87 */ 88 void 89 cv_init(struct cv *cvp, const char *desc) 90 { 91 92 TAILQ_INIT(&cvp->cv_waitq); 93 cvp->cv_mtx = NULL; 94 cvp->cv_description = desc; 95 } 96 97 /* 98 * Destroy a condition variable. The condition variable must be re-initialized 99 * in order to be re-used. 100 */ 101 void 102 cv_destroy(struct cv *cvp) 103 { 104 105 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 106 } 107 108 /* 109 * Common code for cv_wait* functions. All require sched_lock. 110 */ 111 112 /* 113 * Decide if we need to queue an upcall. 114 * This is copied from msleep(), perhaps this should be a common function. 115 */ 116 static void 117 cv_check_upcall(struct thread *td) 118 { 119 120 /* 121 * If we are capable of async syscalls and there isn't already 122 * another one ready to return, start a new thread 123 * and queue it as ready to run. Note that there is danger here 124 * because we need to make sure that we don't sleep allocating 125 * the thread (recursion here might be bad). 126 * Hence the TDF_INMSLEEP flag. 127 */ 128 if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox && 129 (td->td_flags & TDF_INMSLEEP) == 0) { 130 /* 131 * If we have no queued work to do, 132 * upcall to the UTS to see if it has more work. 133 * We don't need to upcall now, just queue it. 134 */ 135 if (TAILQ_FIRST(&td->td_ksegrp->kg_runq) == NULL) { 136 /* Don't recurse here! */ 137 td->td_flags |= TDF_INMSLEEP; 138 thread_schedule_upcall(td, td->td_kse); 139 td->td_flags &= ~TDF_INMSLEEP; 140 } 141 } 142 } 143 144 /* 145 * Switch context. 146 */ 147 static __inline void 148 cv_switch(struct thread *td) 149 { 150 151 td->td_state = TDS_SLP; 152 td->td_proc->p_stats->p_ru.ru_nvcsw++; 153 cv_check_upcall(td); 154 mi_switch(); 155 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 156 td->td_proc->p_pid, td->td_proc->p_comm); 157 } 158 159 /* 160 * Switch context, catching signals. 161 */ 162 static __inline int 163 cv_switch_catch(struct thread *td) 164 { 165 struct proc *p; 166 int sig; 167 168 /* 169 * We put ourselves on the sleep queue and start our timeout before 170 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 171 * both) could occur while we were stopped. A SIGCONT would cause us to 172 * be marked as TDS_SLP without resuming us, thus we must be ready for 173 * sleep when cursig is called. If the wakeup happens while we're 174 * stopped, td->td_wchan will be 0 upon return from cursig. 175 */ 176 td->td_flags |= TDF_SINTR; 177 mtx_unlock_spin(&sched_lock); 178 p = td->td_proc; 179 PROC_LOCK(p); 180 sig = cursig(td); /* XXXKSE */ 181 if (thread_suspend_check(1)) 182 sig = SIGSTOP; 183 mtx_lock_spin(&sched_lock); 184 PROC_UNLOCK(p); 185 if (sig != 0) { 186 if (td->td_wchan != NULL) 187 cv_waitq_remove(td); 188 td->td_state = TDS_RUNNING; /* XXXKSE */ 189 } else if (td->td_wchan != NULL) { 190 cv_switch(td); 191 } 192 td->td_flags &= ~TDF_SINTR; 193 194 return sig; 195 } 196 197 /* 198 * Add a thread to the wait queue of a condition variable. 199 */ 200 static __inline void 201 cv_waitq_add(struct cv *cvp, struct thread *td) 202 { 203 204 /* 205 * Process may be sitting on a slpque if asleep() was called, remove it 206 * before re-adding. 207 */ 208 if (td->td_wchan != NULL) 209 unsleep(td); 210 211 td->td_flags |= TDF_CVWAITQ; 212 td->td_wchan = cvp; 213 td->td_wmesg = cvp->cv_description; 214 td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ 215 td->td_base_pri = td->td_priority; 216 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 217 td->td_proc->p_pid, td->td_proc->p_comm); 218 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 219 } 220 221 /* 222 * Wait on a condition variable. The current thread is placed on the condition 223 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 224 * condition variable will resume the thread. The mutex is released before 225 * sleeping and will be held on return. It is recommended that the mutex be 226 * held when cv_signal or cv_broadcast are called. 227 */ 228 void 229 cv_wait(struct cv *cvp, struct mtx *mp) 230 { 231 struct thread *td; 232 WITNESS_SAVE_DECL(mp); 233 234 td = curthread; 235 #ifdef KTRACE 236 if (KTRPOINT(td, KTR_CSW)) 237 ktrcsw(1, 0); 238 #endif 239 CV_ASSERT(cvp, mp, td); 240 WITNESS_SLEEP(0, &mp->mtx_object); 241 WITNESS_SAVE(&mp->mtx_object, mp); 242 243 if (cold ) { 244 /* 245 * During autoconfiguration, just give interrupts 246 * a chance, then just return. Don't run any other 247 * thread or panic below, in case this is the idle 248 * process and already asleep. 249 */ 250 return; 251 } 252 253 mtx_lock_spin(&sched_lock); 254 255 CV_WAIT_VALIDATE(cvp, mp); 256 257 DROP_GIANT(); 258 mtx_unlock(mp); 259 260 cv_waitq_add(cvp, td); 261 cv_switch(td); 262 263 mtx_unlock_spin(&sched_lock); 264 #ifdef KTRACE 265 if (KTRPOINT(td, KTR_CSW)) 266 ktrcsw(0, 0); 267 #endif 268 PICKUP_GIANT(); 269 mtx_lock(mp); 270 WITNESS_RESTORE(&mp->mtx_object, mp); 271 } 272 273 /* 274 * Wait on a condition variable, allowing interruption by signals. Return 0 if 275 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 276 * a signal was caught. If ERESTART is returned the system call should be 277 * restarted if possible. 278 */ 279 int 280 cv_wait_sig(struct cv *cvp, struct mtx *mp) 281 { 282 struct thread *td; 283 struct proc *p; 284 int rval; 285 int sig; 286 WITNESS_SAVE_DECL(mp); 287 288 td = curthread; 289 p = td->td_proc; 290 rval = 0; 291 #ifdef KTRACE 292 if (KTRPOINT(td, KTR_CSW)) 293 ktrcsw(1, 0); 294 #endif 295 CV_ASSERT(cvp, mp, td); 296 WITNESS_SLEEP(0, &mp->mtx_object); 297 WITNESS_SAVE(&mp->mtx_object, mp); 298 299 if (cold || panicstr) { 300 /* 301 * After a panic, or during autoconfiguration, just give 302 * interrupts a chance, then just return; don't run any other 303 * procs or panic below, in case this is the idle process and 304 * already asleep. 305 */ 306 return 0; 307 } 308 309 mtx_lock_spin(&sched_lock); 310 311 CV_WAIT_VALIDATE(cvp, mp); 312 313 DROP_GIANT(); 314 mtx_unlock(mp); 315 316 cv_waitq_add(cvp, td); 317 sig = cv_switch_catch(td); 318 319 mtx_unlock_spin(&sched_lock); 320 321 PROC_LOCK(p); 322 if (sig == 0) 323 sig = cursig(td); /* XXXKSE */ 324 if (sig != 0) { 325 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 326 rval = EINTR; 327 else 328 rval = ERESTART; 329 } 330 PROC_UNLOCK(p); 331 if (p->p_flag & P_WEXIT) 332 rval = EINTR; 333 334 #ifdef KTRACE 335 if (KTRPOINT(td, KTR_CSW)) 336 ktrcsw(0, 0); 337 #endif 338 PICKUP_GIANT(); 339 mtx_lock(mp); 340 WITNESS_RESTORE(&mp->mtx_object, mp); 341 342 return (rval); 343 } 344 345 /* 346 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 347 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 348 * expires. 349 */ 350 int 351 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 352 { 353 struct thread *td; 354 int rval; 355 WITNESS_SAVE_DECL(mp); 356 357 td = curthread; 358 rval = 0; 359 #ifdef KTRACE 360 if (KTRPOINT(td, KTR_CSW)) 361 ktrcsw(1, 0); 362 #endif 363 CV_ASSERT(cvp, mp, td); 364 WITNESS_SLEEP(0, &mp->mtx_object); 365 WITNESS_SAVE(&mp->mtx_object, mp); 366 367 if (cold || panicstr) { 368 /* 369 * After a panic, or during autoconfiguration, just give 370 * interrupts a chance, then just return; don't run any other 371 * thread or panic below, in case this is the idle process and 372 * already asleep. 373 */ 374 return 0; 375 } 376 377 mtx_lock_spin(&sched_lock); 378 379 CV_WAIT_VALIDATE(cvp, mp); 380 381 DROP_GIANT(); 382 mtx_unlock(mp); 383 384 cv_waitq_add(cvp, td); 385 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 386 cv_switch(td); 387 388 if (td->td_flags & TDF_TIMEOUT) { 389 td->td_flags &= ~TDF_TIMEOUT; 390 rval = EWOULDBLOCK; 391 } else if (td->td_flags & TDF_TIMOFAIL) 392 td->td_flags &= ~TDF_TIMOFAIL; 393 else if (callout_stop(&td->td_slpcallout) == 0) { 394 /* 395 * Work around race with cv_timedwait_end similar to that 396 * between msleep and endtsleep. 397 * Go back to sleep. 398 */ 399 td->td_flags |= TDF_TIMEOUT; 400 td->td_state = TDS_SLP; 401 td->td_proc->p_stats->p_ru.ru_nivcsw++; 402 mi_switch(); 403 } 404 405 if (td->td_proc->p_flag & P_WEXIT) 406 rval = EWOULDBLOCK; 407 mtx_unlock_spin(&sched_lock); 408 #ifdef KTRACE 409 if (KTRPOINT(td, KTR_CSW)) 410 ktrcsw(0, 0); 411 #endif 412 PICKUP_GIANT(); 413 mtx_lock(mp); 414 WITNESS_RESTORE(&mp->mtx_object, mp); 415 416 return (rval); 417 } 418 419 /* 420 * Wait on a condition variable for at most timo/hz seconds, allowing 421 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 422 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 423 * a signal was caught. 424 */ 425 int 426 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 427 { 428 struct thread *td; 429 struct proc *p; 430 int rval; 431 int sig; 432 WITNESS_SAVE_DECL(mp); 433 434 td = curthread; 435 p = td->td_proc; 436 rval = 0; 437 #ifdef KTRACE 438 if (KTRPOINT(td, KTR_CSW)) 439 ktrcsw(1, 0); 440 #endif 441 CV_ASSERT(cvp, mp, td); 442 WITNESS_SLEEP(0, &mp->mtx_object); 443 WITNESS_SAVE(&mp->mtx_object, mp); 444 445 if (cold || panicstr) { 446 /* 447 * After a panic, or during autoconfiguration, just give 448 * interrupts a chance, then just return; don't run any other 449 * thread or panic below, in case this is the idle process and 450 * already asleep. 451 */ 452 return 0; 453 } 454 455 mtx_lock_spin(&sched_lock); 456 457 CV_WAIT_VALIDATE(cvp, mp); 458 459 DROP_GIANT(); 460 mtx_unlock(mp); 461 462 cv_waitq_add(cvp, td); 463 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 464 sig = cv_switch_catch(td); 465 466 if (td->td_flags & TDF_TIMEOUT) { 467 td->td_flags &= ~TDF_TIMEOUT; 468 rval = EWOULDBLOCK; 469 } else if (td->td_flags & TDF_TIMOFAIL) 470 td->td_flags &= ~TDF_TIMOFAIL; 471 else if (callout_stop(&td->td_slpcallout) == 0) { 472 /* 473 * Work around race with cv_timedwait_end similar to that 474 * between msleep and endtsleep. 475 * Go back to sleep. 476 */ 477 td->td_flags |= TDF_TIMEOUT; 478 td->td_state = TDS_SLP; 479 td->td_proc->p_stats->p_ru.ru_nivcsw++; 480 mi_switch(); 481 } 482 mtx_unlock_spin(&sched_lock); 483 484 PROC_LOCK(p); 485 if (sig == 0) 486 sig = cursig(td); 487 if (sig != 0) { 488 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 489 rval = EINTR; 490 else 491 rval = ERESTART; 492 } 493 PROC_UNLOCK(p); 494 495 if (p->p_flag & P_WEXIT) 496 rval = EINTR; 497 498 #ifdef KTRACE 499 if (KTRPOINT(td, KTR_CSW)) 500 ktrcsw(0, 0); 501 #endif 502 PICKUP_GIANT(); 503 mtx_lock(mp); 504 WITNESS_RESTORE(&mp->mtx_object, mp); 505 506 return (rval); 507 } 508 509 /* 510 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 511 * called with sched_lock held. 512 */ 513 static __inline void 514 cv_wakeup(struct cv *cvp) 515 { 516 struct thread *td; 517 518 mtx_assert(&sched_lock, MA_OWNED); 519 td = TAILQ_FIRST(&cvp->cv_waitq); 520 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 521 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 522 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 523 td->td_flags &= ~TDF_CVWAITQ; 524 td->td_wchan = 0; 525 if (td->td_state == TDS_SLP) { 526 /* OPTIMIZED EXPANSION OF setrunnable(td); */ 527 CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)", 528 td, td->td_proc->p_pid, td->td_proc->p_comm); 529 if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */ 530 updatepri(td); 531 td->td_ksegrp->kg_slptime = 0; 532 if (td->td_proc->p_sflag & PS_INMEM) { 533 setrunqueue(td); 534 maybe_resched(td); 535 } else { 536 td->td_proc->p_sflag |= PS_SWAPINREQ; 537 wakeup(&proc0); /* XXXKSE */ 538 } 539 /* END INLINE EXPANSION */ 540 } 541 } 542 543 /* 544 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 545 * the swapper if the process is not in memory, so that it can bring the 546 * sleeping process in. Note that this may also result in additional threads 547 * being made runnable. Should be called with the same mutex as was passed to 548 * cv_wait held. 549 */ 550 void 551 cv_signal(struct cv *cvp) 552 { 553 554 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 555 mtx_lock_spin(&sched_lock); 556 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 557 CV_SIGNAL_VALIDATE(cvp); 558 cv_wakeup(cvp); 559 } 560 mtx_unlock_spin(&sched_lock); 561 } 562 563 /* 564 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 565 * Should be called with the same mutex as was passed to cv_wait held. 566 */ 567 void 568 cv_broadcast(struct cv *cvp) 569 { 570 571 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 572 mtx_lock_spin(&sched_lock); 573 CV_SIGNAL_VALIDATE(cvp); 574 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 575 cv_wakeup(cvp); 576 mtx_unlock_spin(&sched_lock); 577 } 578 579 /* 580 * Remove a thread from the wait queue of its condition variable. This may be 581 * called externally. 582 */ 583 void 584 cv_waitq_remove(struct thread *td) 585 { 586 struct cv *cvp; 587 588 mtx_lock_spin(&sched_lock); 589 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 590 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 591 td->td_flags &= ~TDF_CVWAITQ; 592 td->td_wchan = NULL; 593 } 594 mtx_unlock_spin(&sched_lock); 595 } 596 597 /* 598 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 599 * its timeout flag. 600 */ 601 static void 602 cv_timedwait_end(void *arg) 603 { 604 struct thread *td; 605 606 td = arg; 607 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid, 608 td->td_proc->p_comm); 609 mtx_lock_spin(&sched_lock); 610 if (td->td_flags & TDF_TIMEOUT) { 611 td->td_flags &= ~TDF_TIMEOUT; 612 setrunqueue(td); 613 } else if (td->td_wchan != NULL) { 614 if (td->td_state == TDS_SLP) /* XXXKSE */ 615 setrunnable(td); 616 else 617 cv_waitq_remove(td); 618 td->td_flags |= TDF_TIMEOUT; 619 } else 620 td->td_flags |= TDF_TIMOFAIL; 621 mtx_unlock_spin(&sched_lock); 622 } 623 624 /* 625 * For now only abort interruptable waits. 626 * The others will have to either complete on their own or have a timeout. 627 */ 628 void 629 cv_abort(struct thread *td) 630 { 631 632 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 633 td->td_proc->p_pid, 634 td->td_proc->p_comm); 635 mtx_lock_spin(&sched_lock); 636 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 637 if (td->td_wchan != NULL) { 638 if (td->td_state == TDS_SLP) 639 setrunnable(td); 640 else 641 cv_waitq_remove(td); 642 } 643 } 644 mtx_unlock_spin(&sched_lock); 645 } 646 647