1 /*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_ktrace.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/lock.h> 35 #include <sys/mutex.h> 36 #include <sys/proc.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/condvar.h> 40 #include <sys/sched.h> 41 #include <sys/signalvar.h> 42 #include <sys/resourcevar.h> 43 #ifdef KTRACE 44 #include <sys/uio.h> 45 #include <sys/ktrace.h> 46 #endif 47 48 /* 49 * Common sanity checks for cv_wait* functions. 50 */ 51 #define CV_ASSERT(cvp, mp, td) do { \ 52 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 53 KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ 54 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 55 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 56 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 57 } while (0) 58 59 #ifdef INVARIANTS 60 #define CV_WAIT_VALIDATE(cvp, mp) do { \ 61 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 62 /* Only waiter. */ \ 63 (cvp)->cv_mtx = (mp); \ 64 } else { \ 65 /* \ 66 * Other waiter; assert that we're using the \ 67 * same mutex. \ 68 */ \ 69 KASSERT((cvp)->cv_mtx == (mp), \ 70 ("%s: Multiple mutexes", __func__)); \ 71 } \ 72 } while (0) 73 74 #define CV_SIGNAL_VALIDATE(cvp) do { \ 75 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 76 KASSERT(mtx_owned((cvp)->cv_mtx), \ 77 ("%s: Mutex not owned", __func__)); \ 78 } \ 79 } while (0) 80 81 #else 82 #define CV_WAIT_VALIDATE(cvp, mp) 83 #define CV_SIGNAL_VALIDATE(cvp) 84 #endif 85 86 static void cv_timedwait_end(void *arg); 87 88 /* 89 * Initialize a condition variable. Must be called before use. 90 */ 91 void 92 cv_init(struct cv *cvp, const char *desc) 93 { 94 95 TAILQ_INIT(&cvp->cv_waitq); 96 cvp->cv_mtx = NULL; 97 cvp->cv_description = desc; 98 } 99 100 /* 101 * Destroy a condition variable. The condition variable must be re-initialized 102 * in order to be re-used. 103 */ 104 void 105 cv_destroy(struct cv *cvp) 106 { 107 108 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 109 } 110 111 /* 112 * Common code for cv_wait* functions. All require sched_lock. 113 */ 114 115 /* 116 * Switch context. 117 */ 118 static __inline void 119 cv_switch(struct thread *td) 120 { 121 TD_SET_SLEEPING(td); 122 td->td_proc->p_stats->p_ru.ru_nvcsw++; 123 mi_switch(); 124 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 125 td->td_proc->p_pid, td->td_proc->p_comm); 126 } 127 128 /* 129 * Switch context, catching signals. 130 */ 131 static __inline int 132 cv_switch_catch(struct thread *td) 133 { 134 struct proc *p; 135 int sig; 136 137 /* 138 * We put ourselves on the sleep queue and start our timeout before 139 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 140 * both) could occur while we were stopped. A SIGCONT would cause us to 141 * be marked as TDS_SLP without resuming us, thus we must be ready for 142 * sleep when cursig is called. If the wakeup happens while we're 143 * stopped, td->td_wchan will be 0 upon return from cursig, 144 * and TD_ON_SLEEPQ() will return false. 145 */ 146 td->td_flags |= TDF_SINTR; 147 mtx_unlock_spin(&sched_lock); 148 p = td->td_proc; 149 PROC_LOCK(p); 150 mtx_lock(&p->p_sigacts->ps_mtx); 151 sig = cursig(td); 152 mtx_unlock(&p->p_sigacts->ps_mtx); 153 if (thread_suspend_check(1)) 154 sig = SIGSTOP; 155 mtx_lock_spin(&sched_lock); 156 PROC_UNLOCK(p); 157 if (sig != 0) { 158 if (TD_ON_SLEEPQ(td)) 159 cv_waitq_remove(td); 160 TD_SET_RUNNING(td); 161 } else if (TD_ON_SLEEPQ(td)) { 162 cv_switch(td); 163 } 164 td->td_flags &= ~TDF_SINTR; 165 166 return sig; 167 } 168 169 /* 170 * Add a thread to the wait queue of a condition variable. 171 */ 172 static __inline void 173 cv_waitq_add(struct cv *cvp, struct thread *td) 174 { 175 176 td->td_flags |= TDF_CVWAITQ; 177 TD_SET_ON_SLEEPQ(td); 178 td->td_wchan = cvp; 179 td->td_wmesg = cvp->cv_description; 180 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 181 td->td_proc->p_pid, td->td_proc->p_comm); 182 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 183 sched_sleep(td, td->td_priority); 184 } 185 186 /* 187 * Wait on a condition variable. The current thread is placed on the condition 188 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 189 * condition variable will resume the thread. The mutex is released before 190 * sleeping and will be held on return. It is recommended that the mutex be 191 * held when cv_signal or cv_broadcast are called. 192 */ 193 void 194 cv_wait(struct cv *cvp, struct mtx *mp) 195 { 196 struct thread *td; 197 WITNESS_SAVE_DECL(mp); 198 199 td = curthread; 200 #ifdef KTRACE 201 if (KTRPOINT(td, KTR_CSW)) 202 ktrcsw(1, 0); 203 #endif 204 CV_ASSERT(cvp, mp, td); 205 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 206 "Waiting on \"%s\"", cvp->cv_description); 207 WITNESS_SAVE(&mp->mtx_object, mp); 208 209 if (cold ) { 210 /* 211 * During autoconfiguration, just give interrupts 212 * a chance, then just return. Don't run any other 213 * thread or panic below, in case this is the idle 214 * process and already asleep. 215 */ 216 return; 217 } 218 219 mtx_lock_spin(&sched_lock); 220 221 CV_WAIT_VALIDATE(cvp, mp); 222 223 DROP_GIANT(); 224 mtx_unlock(mp); 225 226 cv_waitq_add(cvp, td); 227 cv_switch(td); 228 229 mtx_unlock_spin(&sched_lock); 230 #ifdef KTRACE 231 if (KTRPOINT(td, KTR_CSW)) 232 ktrcsw(0, 0); 233 #endif 234 PICKUP_GIANT(); 235 mtx_lock(mp); 236 WITNESS_RESTORE(&mp->mtx_object, mp); 237 } 238 239 /* 240 * Wait on a condition variable, allowing interruption by signals. Return 0 if 241 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 242 * a signal was caught. If ERESTART is returned the system call should be 243 * restarted if possible. 244 */ 245 int 246 cv_wait_sig(struct cv *cvp, struct mtx *mp) 247 { 248 struct thread *td; 249 struct proc *p; 250 int rval; 251 int sig; 252 WITNESS_SAVE_DECL(mp); 253 254 td = curthread; 255 p = td->td_proc; 256 rval = 0; 257 #ifdef KTRACE 258 if (KTRPOINT(td, KTR_CSW)) 259 ktrcsw(1, 0); 260 #endif 261 CV_ASSERT(cvp, mp, td); 262 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 263 "Waiting on \"%s\"", cvp->cv_description); 264 WITNESS_SAVE(&mp->mtx_object, mp); 265 266 if (cold || panicstr) { 267 /* 268 * After a panic, or during autoconfiguration, just give 269 * interrupts a chance, then just return; don't run any other 270 * procs or panic below, in case this is the idle process and 271 * already asleep. 272 */ 273 return 0; 274 } 275 276 mtx_lock_spin(&sched_lock); 277 278 CV_WAIT_VALIDATE(cvp, mp); 279 280 DROP_GIANT(); 281 mtx_unlock(mp); 282 283 cv_waitq_add(cvp, td); 284 sig = cv_switch_catch(td); 285 286 mtx_unlock_spin(&sched_lock); 287 288 PROC_LOCK(p); 289 mtx_lock(&p->p_sigacts->ps_mtx); 290 if (sig == 0) 291 sig = cursig(td); /* XXXKSE */ 292 if (sig != 0) { 293 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 294 rval = EINTR; 295 else 296 rval = ERESTART; 297 } 298 mtx_unlock(&p->p_sigacts->ps_mtx); 299 if (p->p_flag & P_WEXIT) 300 rval = EINTR; 301 PROC_UNLOCK(p); 302 303 #ifdef KTRACE 304 if (KTRPOINT(td, KTR_CSW)) 305 ktrcsw(0, 0); 306 #endif 307 PICKUP_GIANT(); 308 mtx_lock(mp); 309 WITNESS_RESTORE(&mp->mtx_object, mp); 310 311 return (rval); 312 } 313 314 /* 315 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 316 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 317 * expires. 318 */ 319 int 320 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 321 { 322 struct thread *td; 323 int rval; 324 WITNESS_SAVE_DECL(mp); 325 326 td = curthread; 327 rval = 0; 328 #ifdef KTRACE 329 if (KTRPOINT(td, KTR_CSW)) 330 ktrcsw(1, 0); 331 #endif 332 CV_ASSERT(cvp, mp, td); 333 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 334 "Waiting on \"%s\"", cvp->cv_description); 335 WITNESS_SAVE(&mp->mtx_object, mp); 336 337 if (cold || panicstr) { 338 /* 339 * After a panic, or during autoconfiguration, just give 340 * interrupts a chance, then just return; don't run any other 341 * thread or panic below, in case this is the idle process and 342 * already asleep. 343 */ 344 return 0; 345 } 346 347 mtx_lock_spin(&sched_lock); 348 349 CV_WAIT_VALIDATE(cvp, mp); 350 351 DROP_GIANT(); 352 mtx_unlock(mp); 353 354 cv_waitq_add(cvp, td); 355 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 356 cv_switch(td); 357 358 if (td->td_flags & TDF_TIMEOUT) { 359 td->td_flags &= ~TDF_TIMEOUT; 360 rval = EWOULDBLOCK; 361 } else if (td->td_flags & TDF_TIMOFAIL) 362 td->td_flags &= ~TDF_TIMOFAIL; 363 else if (callout_stop(&td->td_slpcallout) == 0) { 364 /* 365 * Work around race with cv_timedwait_end similar to that 366 * between msleep and endtsleep. 367 * Go back to sleep. 368 */ 369 TD_SET_SLEEPING(td); 370 td->td_proc->p_stats->p_ru.ru_nivcsw++; 371 mi_switch(); 372 td->td_flags &= ~TDF_TIMOFAIL; 373 } 374 375 mtx_unlock_spin(&sched_lock); 376 #ifdef KTRACE 377 if (KTRPOINT(td, KTR_CSW)) 378 ktrcsw(0, 0); 379 #endif 380 PICKUP_GIANT(); 381 mtx_lock(mp); 382 WITNESS_RESTORE(&mp->mtx_object, mp); 383 384 return (rval); 385 } 386 387 /* 388 * Wait on a condition variable for at most timo/hz seconds, allowing 389 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 390 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 391 * a signal was caught. 392 */ 393 int 394 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 395 { 396 struct thread *td; 397 struct proc *p; 398 int rval; 399 int sig; 400 WITNESS_SAVE_DECL(mp); 401 402 td = curthread; 403 p = td->td_proc; 404 rval = 0; 405 #ifdef KTRACE 406 if (KTRPOINT(td, KTR_CSW)) 407 ktrcsw(1, 0); 408 #endif 409 CV_ASSERT(cvp, mp, td); 410 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 411 "Waiting on \"%s\"", cvp->cv_description); 412 WITNESS_SAVE(&mp->mtx_object, mp); 413 414 if (cold || panicstr) { 415 /* 416 * After a panic, or during autoconfiguration, just give 417 * interrupts a chance, then just return; don't run any other 418 * thread or panic below, in case this is the idle process and 419 * already asleep. 420 */ 421 return 0; 422 } 423 424 mtx_lock_spin(&sched_lock); 425 426 CV_WAIT_VALIDATE(cvp, mp); 427 428 DROP_GIANT(); 429 mtx_unlock(mp); 430 431 cv_waitq_add(cvp, td); 432 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 433 sig = cv_switch_catch(td); 434 435 if (td->td_flags & TDF_TIMEOUT) { 436 td->td_flags &= ~TDF_TIMEOUT; 437 rval = EWOULDBLOCK; 438 } else if (td->td_flags & TDF_TIMOFAIL) 439 td->td_flags &= ~TDF_TIMOFAIL; 440 else if (callout_stop(&td->td_slpcallout) == 0) { 441 /* 442 * Work around race with cv_timedwait_end similar to that 443 * between msleep and endtsleep. 444 * Go back to sleep. 445 */ 446 TD_SET_SLEEPING(td); 447 td->td_proc->p_stats->p_ru.ru_nivcsw++; 448 mi_switch(); 449 td->td_flags &= ~TDF_TIMOFAIL; 450 } 451 mtx_unlock_spin(&sched_lock); 452 453 PROC_LOCK(p); 454 mtx_lock(&p->p_sigacts->ps_mtx); 455 if (sig == 0) 456 sig = cursig(td); 457 if (sig != 0) { 458 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 459 rval = EINTR; 460 else 461 rval = ERESTART; 462 } 463 mtx_unlock(&p->p_sigacts->ps_mtx); 464 if (p->p_flag & P_WEXIT) 465 rval = EINTR; 466 PROC_UNLOCK(p); 467 468 #ifdef KTRACE 469 if (KTRPOINT(td, KTR_CSW)) 470 ktrcsw(0, 0); 471 #endif 472 PICKUP_GIANT(); 473 mtx_lock(mp); 474 WITNESS_RESTORE(&mp->mtx_object, mp); 475 476 return (rval); 477 } 478 479 /* 480 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 481 * called with sched_lock held. 482 */ 483 static __inline void 484 cv_wakeup(struct cv *cvp) 485 { 486 struct thread *td; 487 488 mtx_assert(&sched_lock, MA_OWNED); 489 td = TAILQ_FIRST(&cvp->cv_waitq); 490 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 491 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 492 cv_waitq_remove(td); 493 TD_CLR_SLEEPING(td); 494 setrunnable(td); 495 } 496 497 /* 498 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 499 * the swapper if the process is not in memory, so that it can bring the 500 * sleeping process in. Note that this may also result in additional threads 501 * being made runnable. Should be called with the same mutex as was passed to 502 * cv_wait held. 503 */ 504 void 505 cv_signal(struct cv *cvp) 506 { 507 508 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 509 mtx_lock_spin(&sched_lock); 510 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 511 CV_SIGNAL_VALIDATE(cvp); 512 cv_wakeup(cvp); 513 } 514 mtx_unlock_spin(&sched_lock); 515 } 516 517 /* 518 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 519 * Should be called with the same mutex as was passed to cv_wait held. 520 */ 521 void 522 cv_broadcast(struct cv *cvp) 523 { 524 525 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 526 mtx_lock_spin(&sched_lock); 527 CV_SIGNAL_VALIDATE(cvp); 528 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 529 cv_wakeup(cvp); 530 mtx_unlock_spin(&sched_lock); 531 } 532 533 /* 534 * Remove a thread from the wait queue of its condition variable. This may be 535 * called externally. 536 */ 537 void 538 cv_waitq_remove(struct thread *td) 539 { 540 struct cv *cvp; 541 542 mtx_assert(&sched_lock, MA_OWNED); 543 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 544 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 545 td->td_flags &= ~TDF_CVWAITQ; 546 td->td_wmesg = NULL; 547 TD_CLR_ON_SLEEPQ(td); 548 } 549 } 550 551 /* 552 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 553 * its timeout flag. 554 */ 555 static void 556 cv_timedwait_end(void *arg) 557 { 558 struct thread *td; 559 560 td = arg; 561 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", 562 td, td->td_proc->p_pid, td->td_proc->p_comm); 563 mtx_lock_spin(&sched_lock); 564 if (TD_ON_SLEEPQ(td)) { 565 cv_waitq_remove(td); 566 td->td_flags |= TDF_TIMEOUT; 567 } else { 568 td->td_flags |= TDF_TIMOFAIL; 569 } 570 TD_CLR_SLEEPING(td); 571 setrunnable(td); 572 mtx_unlock_spin(&sched_lock); 573 } 574 575 /* 576 * For now only abort interruptable waits. 577 * The others will have to either complete on their own or have a timeout. 578 */ 579 void 580 cv_abort(struct thread *td) 581 { 582 583 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 584 td->td_proc->p_pid, td->td_proc->p_comm); 585 mtx_lock_spin(&sched_lock); 586 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 587 if (TD_ON_SLEEPQ(td)) { 588 cv_waitq_remove(td); 589 } 590 TD_CLR_SLEEPING(td); 591 setrunnable(td); 592 } 593 mtx_unlock_spin(&sched_lock); 594 } 595 596