1 /*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include "opt_ktrace.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/condvar.h> 39 #include <sys/sched.h> 40 #include <sys/signalvar.h> 41 #include <sys/resourcevar.h> 42 #ifdef KTRACE 43 #include <sys/uio.h> 44 #include <sys/ktrace.h> 45 #endif 46 47 /* 48 * Common sanity checks for cv_wait* functions. 49 */ 50 #define CV_ASSERT(cvp, mp, td) do { \ 51 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 52 KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ 53 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 54 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 55 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 56 } while (0) 57 58 #ifdef INVARIANTS 59 #define CV_WAIT_VALIDATE(cvp, mp) do { \ 60 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 61 /* Only waiter. */ \ 62 (cvp)->cv_mtx = (mp); \ 63 } else { \ 64 /* \ 65 * Other waiter; assert that we're using the \ 66 * same mutex. \ 67 */ \ 68 KASSERT((cvp)->cv_mtx == (mp), \ 69 ("%s: Multiple mutexes", __func__)); \ 70 } \ 71 } while (0) 72 73 #define CV_SIGNAL_VALIDATE(cvp) do { \ 74 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 75 KASSERT(mtx_owned((cvp)->cv_mtx), \ 76 ("%s: Mutex not owned", __func__)); \ 77 } \ 78 } while (0) 79 80 #else 81 #define CV_WAIT_VALIDATE(cvp, mp) 82 #define CV_SIGNAL_VALIDATE(cvp) 83 #endif 84 85 static void cv_timedwait_end(void *arg); 86 87 /* 88 * Initialize a condition variable. Must be called before use. 89 */ 90 void 91 cv_init(struct cv *cvp, const char *desc) 92 { 93 94 TAILQ_INIT(&cvp->cv_waitq); 95 cvp->cv_mtx = NULL; 96 cvp->cv_description = desc; 97 } 98 99 /* 100 * Destroy a condition variable. The condition variable must be re-initialized 101 * in order to be re-used. 102 */ 103 void 104 cv_destroy(struct cv *cvp) 105 { 106 107 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 108 } 109 110 /* 111 * Common code for cv_wait* functions. All require sched_lock. 112 */ 113 114 /* 115 * Switch context. 116 */ 117 static __inline void 118 cv_switch(struct thread *td) 119 { 120 TD_SET_SLEEPING(td); 121 td->td_proc->p_stats->p_ru.ru_nvcsw++; 122 mi_switch(); 123 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 124 td->td_proc->p_pid, td->td_proc->p_comm); 125 } 126 127 /* 128 * Switch context, catching signals. 129 */ 130 static __inline int 131 cv_switch_catch(struct thread *td) 132 { 133 struct proc *p; 134 int sig; 135 136 /* 137 * We put ourselves on the sleep queue and start our timeout before 138 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 139 * both) could occur while we were stopped. A SIGCONT would cause us to 140 * be marked as TDS_SLP without resuming us, thus we must be ready for 141 * sleep when cursig is called. If the wakeup happens while we're 142 * stopped, td->td_wchan will be 0 upon return from cursig, 143 * and TD_ON_SLEEPQ() will return false. 144 */ 145 td->td_flags |= TDF_SINTR; 146 mtx_unlock_spin(&sched_lock); 147 p = td->td_proc; 148 PROC_LOCK(p); 149 sig = cursig(td); 150 if (thread_suspend_check(1)) 151 sig = SIGSTOP; 152 mtx_lock_spin(&sched_lock); 153 PROC_UNLOCK(p); 154 if (sig != 0) { 155 if (TD_ON_SLEEPQ(td)) 156 cv_waitq_remove(td); 157 TD_SET_RUNNING(td); 158 } else if (TD_ON_SLEEPQ(td)) { 159 cv_switch(td); 160 } 161 td->td_flags &= ~TDF_SINTR; 162 163 return sig; 164 } 165 166 /* 167 * Add a thread to the wait queue of a condition variable. 168 */ 169 static __inline void 170 cv_waitq_add(struct cv *cvp, struct thread *td) 171 { 172 173 td->td_flags |= TDF_CVWAITQ; 174 TD_SET_ON_SLEEPQ(td); 175 td->td_wchan = cvp; 176 td->td_wmesg = cvp->cv_description; 177 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 178 td->td_proc->p_pid, td->td_proc->p_comm); 179 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 180 sched_sleep(td, td->td_priority); 181 } 182 183 /* 184 * Wait on a condition variable. The current thread is placed on the condition 185 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 186 * condition variable will resume the thread. The mutex is released before 187 * sleeping and will be held on return. It is recommended that the mutex be 188 * held when cv_signal or cv_broadcast are called. 189 */ 190 void 191 cv_wait(struct cv *cvp, struct mtx *mp) 192 { 193 struct thread *td; 194 WITNESS_SAVE_DECL(mp); 195 196 td = curthread; 197 #ifdef KTRACE 198 if (KTRPOINT(td, KTR_CSW)) 199 ktrcsw(1, 0); 200 #endif 201 CV_ASSERT(cvp, mp, td); 202 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 203 "Waiting on \"%s\"", cvp->cv_description); 204 WITNESS_SAVE(&mp->mtx_object, mp); 205 206 if (cold ) { 207 /* 208 * During autoconfiguration, just give interrupts 209 * a chance, then just return. Don't run any other 210 * thread or panic below, in case this is the idle 211 * process and already asleep. 212 */ 213 return; 214 } 215 216 mtx_lock_spin(&sched_lock); 217 218 CV_WAIT_VALIDATE(cvp, mp); 219 220 DROP_GIANT(); 221 mtx_unlock(mp); 222 223 cv_waitq_add(cvp, td); 224 cv_switch(td); 225 226 mtx_unlock_spin(&sched_lock); 227 #ifdef KTRACE 228 if (KTRPOINT(td, KTR_CSW)) 229 ktrcsw(0, 0); 230 #endif 231 PICKUP_GIANT(); 232 mtx_lock(mp); 233 WITNESS_RESTORE(&mp->mtx_object, mp); 234 } 235 236 /* 237 * Wait on a condition variable, allowing interruption by signals. Return 0 if 238 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 239 * a signal was caught. If ERESTART is returned the system call should be 240 * restarted if possible. 241 */ 242 int 243 cv_wait_sig(struct cv *cvp, struct mtx *mp) 244 { 245 struct thread *td; 246 struct proc *p; 247 int rval; 248 int sig; 249 WITNESS_SAVE_DECL(mp); 250 251 td = curthread; 252 p = td->td_proc; 253 rval = 0; 254 #ifdef KTRACE 255 if (KTRPOINT(td, KTR_CSW)) 256 ktrcsw(1, 0); 257 #endif 258 CV_ASSERT(cvp, mp, td); 259 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 260 "Waiting on \"%s\"", cvp->cv_description); 261 WITNESS_SAVE(&mp->mtx_object, mp); 262 263 if (cold || panicstr) { 264 /* 265 * After a panic, or during autoconfiguration, just give 266 * interrupts a chance, then just return; don't run any other 267 * procs or panic below, in case this is the idle process and 268 * already asleep. 269 */ 270 return 0; 271 } 272 273 mtx_lock_spin(&sched_lock); 274 275 CV_WAIT_VALIDATE(cvp, mp); 276 277 DROP_GIANT(); 278 mtx_unlock(mp); 279 280 cv_waitq_add(cvp, td); 281 sig = cv_switch_catch(td); 282 283 mtx_unlock_spin(&sched_lock); 284 285 PROC_LOCK(p); 286 if (sig == 0) 287 sig = cursig(td); /* XXXKSE */ 288 if (sig != 0) { 289 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 290 rval = EINTR; 291 else 292 rval = ERESTART; 293 } 294 PROC_UNLOCK(p); 295 if (p->p_flag & P_WEXIT) 296 rval = EINTR; 297 298 #ifdef KTRACE 299 if (KTRPOINT(td, KTR_CSW)) 300 ktrcsw(0, 0); 301 #endif 302 PICKUP_GIANT(); 303 mtx_lock(mp); 304 WITNESS_RESTORE(&mp->mtx_object, mp); 305 306 return (rval); 307 } 308 309 /* 310 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 311 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 312 * expires. 313 */ 314 int 315 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 316 { 317 struct thread *td; 318 int rval; 319 WITNESS_SAVE_DECL(mp); 320 321 td = curthread; 322 rval = 0; 323 #ifdef KTRACE 324 if (KTRPOINT(td, KTR_CSW)) 325 ktrcsw(1, 0); 326 #endif 327 CV_ASSERT(cvp, mp, td); 328 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 329 "Waiting on \"%s\"", cvp->cv_description); 330 WITNESS_SAVE(&mp->mtx_object, mp); 331 332 if (cold || panicstr) { 333 /* 334 * After a panic, or during autoconfiguration, just give 335 * interrupts a chance, then just return; don't run any other 336 * thread or panic below, in case this is the idle process and 337 * already asleep. 338 */ 339 return 0; 340 } 341 342 mtx_lock_spin(&sched_lock); 343 344 CV_WAIT_VALIDATE(cvp, mp); 345 346 DROP_GIANT(); 347 mtx_unlock(mp); 348 349 cv_waitq_add(cvp, td); 350 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 351 cv_switch(td); 352 353 if (td->td_flags & TDF_TIMEOUT) { 354 td->td_flags &= ~TDF_TIMEOUT; 355 rval = EWOULDBLOCK; 356 } else if (td->td_flags & TDF_TIMOFAIL) 357 td->td_flags &= ~TDF_TIMOFAIL; 358 else if (callout_stop(&td->td_slpcallout) == 0) { 359 /* 360 * Work around race with cv_timedwait_end similar to that 361 * between msleep and endtsleep. 362 * Go back to sleep. 363 */ 364 TD_SET_SLEEPING(td); 365 td->td_proc->p_stats->p_ru.ru_nivcsw++; 366 mi_switch(); 367 td->td_flags &= ~TDF_TIMOFAIL; 368 } 369 370 if (td->td_proc->p_flag & P_WEXIT) 371 rval = EWOULDBLOCK; 372 mtx_unlock_spin(&sched_lock); 373 #ifdef KTRACE 374 if (KTRPOINT(td, KTR_CSW)) 375 ktrcsw(0, 0); 376 #endif 377 PICKUP_GIANT(); 378 mtx_lock(mp); 379 WITNESS_RESTORE(&mp->mtx_object, mp); 380 381 return (rval); 382 } 383 384 /* 385 * Wait on a condition variable for at most timo/hz seconds, allowing 386 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 387 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 388 * a signal was caught. 389 */ 390 int 391 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 392 { 393 struct thread *td; 394 struct proc *p; 395 int rval; 396 int sig; 397 WITNESS_SAVE_DECL(mp); 398 399 td = curthread; 400 p = td->td_proc; 401 rval = 0; 402 #ifdef KTRACE 403 if (KTRPOINT(td, KTR_CSW)) 404 ktrcsw(1, 0); 405 #endif 406 CV_ASSERT(cvp, mp, td); 407 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object, 408 "Waiting on \"%s\"", cvp->cv_description); 409 WITNESS_SAVE(&mp->mtx_object, mp); 410 411 if (cold || panicstr) { 412 /* 413 * After a panic, or during autoconfiguration, just give 414 * interrupts a chance, then just return; don't run any other 415 * thread or panic below, in case this is the idle process and 416 * already asleep. 417 */ 418 return 0; 419 } 420 421 mtx_lock_spin(&sched_lock); 422 423 CV_WAIT_VALIDATE(cvp, mp); 424 425 DROP_GIANT(); 426 mtx_unlock(mp); 427 428 cv_waitq_add(cvp, td); 429 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 430 sig = cv_switch_catch(td); 431 432 if (td->td_flags & TDF_TIMEOUT) { 433 td->td_flags &= ~TDF_TIMEOUT; 434 rval = EWOULDBLOCK; 435 } else if (td->td_flags & TDF_TIMOFAIL) 436 td->td_flags &= ~TDF_TIMOFAIL; 437 else if (callout_stop(&td->td_slpcallout) == 0) { 438 /* 439 * Work around race with cv_timedwait_end similar to that 440 * between msleep and endtsleep. 441 * Go back to sleep. 442 */ 443 TD_SET_SLEEPING(td); 444 td->td_proc->p_stats->p_ru.ru_nivcsw++; 445 mi_switch(); 446 td->td_flags &= ~TDF_TIMOFAIL; 447 } 448 mtx_unlock_spin(&sched_lock); 449 450 PROC_LOCK(p); 451 if (sig == 0) 452 sig = cursig(td); 453 if (sig != 0) { 454 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 455 rval = EINTR; 456 else 457 rval = ERESTART; 458 } 459 PROC_UNLOCK(p); 460 461 if (p->p_flag & P_WEXIT) 462 rval = EINTR; 463 464 #ifdef KTRACE 465 if (KTRPOINT(td, KTR_CSW)) 466 ktrcsw(0, 0); 467 #endif 468 PICKUP_GIANT(); 469 mtx_lock(mp); 470 WITNESS_RESTORE(&mp->mtx_object, mp); 471 472 return (rval); 473 } 474 475 /* 476 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 477 * called with sched_lock held. 478 */ 479 static __inline void 480 cv_wakeup(struct cv *cvp) 481 { 482 struct thread *td; 483 484 mtx_assert(&sched_lock, MA_OWNED); 485 td = TAILQ_FIRST(&cvp->cv_waitq); 486 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 487 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 488 cv_waitq_remove(td); 489 TD_CLR_SLEEPING(td); 490 setrunnable(td); 491 } 492 493 /* 494 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 495 * the swapper if the process is not in memory, so that it can bring the 496 * sleeping process in. Note that this may also result in additional threads 497 * being made runnable. Should be called with the same mutex as was passed to 498 * cv_wait held. 499 */ 500 void 501 cv_signal(struct cv *cvp) 502 { 503 504 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 505 mtx_lock_spin(&sched_lock); 506 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 507 CV_SIGNAL_VALIDATE(cvp); 508 cv_wakeup(cvp); 509 } 510 mtx_unlock_spin(&sched_lock); 511 } 512 513 /* 514 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 515 * Should be called with the same mutex as was passed to cv_wait held. 516 */ 517 void 518 cv_broadcast(struct cv *cvp) 519 { 520 521 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 522 mtx_lock_spin(&sched_lock); 523 CV_SIGNAL_VALIDATE(cvp); 524 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 525 cv_wakeup(cvp); 526 mtx_unlock_spin(&sched_lock); 527 } 528 529 /* 530 * Remove a thread from the wait queue of its condition variable. This may be 531 * called externally. 532 */ 533 void 534 cv_waitq_remove(struct thread *td) 535 { 536 struct cv *cvp; 537 538 mtx_assert(&sched_lock, MA_OWNED); 539 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 540 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 541 td->td_flags &= ~TDF_CVWAITQ; 542 td->td_wmesg = NULL; 543 TD_CLR_ON_SLEEPQ(td); 544 } 545 } 546 547 /* 548 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 549 * its timeout flag. 550 */ 551 static void 552 cv_timedwait_end(void *arg) 553 { 554 struct thread *td; 555 556 td = arg; 557 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", 558 td, td->td_proc->p_pid, td->td_proc->p_comm); 559 mtx_lock_spin(&sched_lock); 560 if (TD_ON_SLEEPQ(td)) { 561 cv_waitq_remove(td); 562 td->td_flags |= TDF_TIMEOUT; 563 } else { 564 td->td_flags |= TDF_TIMOFAIL; 565 } 566 TD_CLR_SLEEPING(td); 567 setrunnable(td); 568 mtx_unlock_spin(&sched_lock); 569 } 570 571 /* 572 * For now only abort interruptable waits. 573 * The others will have to either complete on their own or have a timeout. 574 */ 575 void 576 cv_abort(struct thread *td) 577 { 578 579 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 580 td->td_proc->p_pid, td->td_proc->p_comm); 581 mtx_lock_spin(&sched_lock); 582 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 583 if (TD_ON_SLEEPQ(td)) { 584 cv_waitq_remove(td); 585 } 586 TD_CLR_SLEEPING(td); 587 setrunnable(td); 588 } 589 mtx_unlock_spin(&sched_lock); 590 } 591 592