1 /*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include "opt_ktrace.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/condvar.h> 39 #include <sys/signalvar.h> 40 #include <sys/resourcevar.h> 41 #ifdef KTRACE 42 #include <sys/uio.h> 43 #include <sys/ktrace.h> 44 #endif 45 46 /* 47 * Common sanity checks for cv_wait* functions. 48 */ 49 #define CV_ASSERT(cvp, mp, td) do { \ 50 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 51 KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ 52 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 53 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 54 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 55 } while (0) 56 57 #ifdef INVARIANTS 58 #define CV_WAIT_VALIDATE(cvp, mp) do { \ 59 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 60 /* Only waiter. */ \ 61 (cvp)->cv_mtx = (mp); \ 62 } else { \ 63 /* \ 64 * Other waiter; assert that we're using the \ 65 * same mutex. \ 66 */ \ 67 KASSERT((cvp)->cv_mtx == (mp), \ 68 ("%s: Multiple mutexes", __func__)); \ 69 } \ 70 } while (0) 71 72 #define CV_SIGNAL_VALIDATE(cvp) do { \ 73 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 74 KASSERT(mtx_owned((cvp)->cv_mtx), \ 75 ("%s: Mutex not owned", __func__)); \ 76 } \ 77 } while (0) 78 79 #else 80 #define CV_WAIT_VALIDATE(cvp, mp) 81 #define CV_SIGNAL_VALIDATE(cvp) 82 #endif 83 84 static void cv_timedwait_end(void *arg); 85 86 /* 87 * Initialize a condition variable. Must be called before use. 88 */ 89 void 90 cv_init(struct cv *cvp, const char *desc) 91 { 92 93 TAILQ_INIT(&cvp->cv_waitq); 94 cvp->cv_mtx = NULL; 95 cvp->cv_description = desc; 96 } 97 98 /* 99 * Destroy a condition variable. The condition variable must be re-initialized 100 * in order to be re-used. 101 */ 102 void 103 cv_destroy(struct cv *cvp) 104 { 105 106 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 107 } 108 109 /* 110 * Common code for cv_wait* functions. All require sched_lock. 111 */ 112 113 /* 114 * Switch context. 115 */ 116 static __inline void 117 cv_switch(struct thread *td) 118 { 119 120 /* 121 * If we are capable of async syscalls and there isn't already 122 * another one ready to return, start a new thread 123 * and queue it as ready to run. Note that there is danger here 124 * because we need to make sure that we don't sleep allocating 125 * the thread (recursion here might be bad). 126 * Hence the TDF_INMSLEEP flag. 127 */ 128 if ((td->td_flags & (TDF_UNBOUND|TDF_INMSLEEP)) == TDF_UNBOUND) { 129 /* 130 * We don't need to upcall now, just queue it. 131 * The upcall will happen when other n-kernel work 132 * in this SKEGRP has completed. 133 * Don't recurse here! 134 */ 135 td->td_flags |= TDF_INMSLEEP; 136 thread_schedule_upcall(td, td->td_kse); 137 td->td_flags &= ~TDF_INMSLEEP; 138 } 139 TD_SET_SLEEPING(td); 140 td->td_proc->p_stats->p_ru.ru_nvcsw++; 141 mi_switch(); 142 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 143 td->td_proc->p_pid, td->td_proc->p_comm); 144 } 145 146 /* 147 * Switch context, catching signals. 148 */ 149 static __inline int 150 cv_switch_catch(struct thread *td) 151 { 152 struct proc *p; 153 int sig; 154 155 /* 156 * We put ourselves on the sleep queue and start our timeout before 157 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 158 * both) could occur while we were stopped. A SIGCONT would cause us to 159 * be marked as TDS_SLP without resuming us, thus we must be ready for 160 * sleep when cursig is called. If the wakeup happens while we're 161 * stopped, td->td_wchan will be 0 upon return from cursig, 162 * and TD_ON_SLEEPQ() will return false. 163 */ 164 td->td_flags |= TDF_SINTR; 165 mtx_unlock_spin(&sched_lock); 166 p = td->td_proc; 167 PROC_LOCK(p); 168 sig = cursig(td); 169 if (thread_suspend_check(1)) 170 sig = SIGSTOP; 171 mtx_lock_spin(&sched_lock); 172 PROC_UNLOCK(p); 173 if (sig != 0) { 174 if (TD_ON_SLEEPQ(td)) 175 cv_waitq_remove(td); 176 TD_SET_RUNNING(td); 177 } else if (TD_ON_SLEEPQ(td)) { 178 cv_switch(td); 179 } 180 td->td_flags &= ~TDF_SINTR; 181 182 return sig; 183 } 184 185 /* 186 * Add a thread to the wait queue of a condition variable. 187 */ 188 static __inline void 189 cv_waitq_add(struct cv *cvp, struct thread *td) 190 { 191 192 td->td_flags |= TDF_CVWAITQ; 193 TD_SET_ON_SLEEPQ(td); 194 td->td_wchan = cvp; 195 td->td_wmesg = cvp->cv_description; 196 td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ 197 td->td_base_pri = td->td_priority; 198 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 199 td->td_proc->p_pid, td->td_proc->p_comm); 200 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 201 } 202 203 /* 204 * Wait on a condition variable. The current thread is placed on the condition 205 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 206 * condition variable will resume the thread. The mutex is released before 207 * sleeping and will be held on return. It is recommended that the mutex be 208 * held when cv_signal or cv_broadcast are called. 209 */ 210 void 211 cv_wait(struct cv *cvp, struct mtx *mp) 212 { 213 struct thread *td; 214 WITNESS_SAVE_DECL(mp); 215 216 td = curthread; 217 #ifdef KTRACE 218 if (KTRPOINT(td, KTR_CSW)) 219 ktrcsw(1, 0); 220 #endif 221 CV_ASSERT(cvp, mp, td); 222 WITNESS_SLEEP(0, &mp->mtx_object); 223 WITNESS_SAVE(&mp->mtx_object, mp); 224 225 if (cold ) { 226 /* 227 * During autoconfiguration, just give interrupts 228 * a chance, then just return. Don't run any other 229 * thread or panic below, in case this is the idle 230 * process and already asleep. 231 */ 232 return; 233 } 234 235 mtx_lock_spin(&sched_lock); 236 237 CV_WAIT_VALIDATE(cvp, mp); 238 239 DROP_GIANT(); 240 mtx_unlock(mp); 241 242 cv_waitq_add(cvp, td); 243 cv_switch(td); 244 245 mtx_unlock_spin(&sched_lock); 246 #ifdef KTRACE 247 if (KTRPOINT(td, KTR_CSW)) 248 ktrcsw(0, 0); 249 #endif 250 PICKUP_GIANT(); 251 mtx_lock(mp); 252 WITNESS_RESTORE(&mp->mtx_object, mp); 253 } 254 255 /* 256 * Wait on a condition variable, allowing interruption by signals. Return 0 if 257 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 258 * a signal was caught. If ERESTART is returned the system call should be 259 * restarted if possible. 260 */ 261 int 262 cv_wait_sig(struct cv *cvp, struct mtx *mp) 263 { 264 struct thread *td; 265 struct proc *p; 266 int rval; 267 int sig; 268 WITNESS_SAVE_DECL(mp); 269 270 td = curthread; 271 p = td->td_proc; 272 rval = 0; 273 #ifdef KTRACE 274 if (KTRPOINT(td, KTR_CSW)) 275 ktrcsw(1, 0); 276 #endif 277 CV_ASSERT(cvp, mp, td); 278 WITNESS_SLEEP(0, &mp->mtx_object); 279 WITNESS_SAVE(&mp->mtx_object, mp); 280 281 if (cold || panicstr) { 282 /* 283 * After a panic, or during autoconfiguration, just give 284 * interrupts a chance, then just return; don't run any other 285 * procs or panic below, in case this is the idle process and 286 * already asleep. 287 */ 288 return 0; 289 } 290 291 mtx_lock_spin(&sched_lock); 292 293 CV_WAIT_VALIDATE(cvp, mp); 294 295 DROP_GIANT(); 296 mtx_unlock(mp); 297 298 cv_waitq_add(cvp, td); 299 sig = cv_switch_catch(td); 300 301 mtx_unlock_spin(&sched_lock); 302 303 PROC_LOCK(p); 304 if (sig == 0) 305 sig = cursig(td); /* XXXKSE */ 306 if (sig != 0) { 307 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 308 rval = EINTR; 309 else 310 rval = ERESTART; 311 } 312 PROC_UNLOCK(p); 313 if (p->p_flag & P_WEXIT) 314 rval = EINTR; 315 316 #ifdef KTRACE 317 if (KTRPOINT(td, KTR_CSW)) 318 ktrcsw(0, 0); 319 #endif 320 PICKUP_GIANT(); 321 mtx_lock(mp); 322 WITNESS_RESTORE(&mp->mtx_object, mp); 323 324 return (rval); 325 } 326 327 /* 328 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 329 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 330 * expires. 331 */ 332 int 333 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 334 { 335 struct thread *td; 336 int rval; 337 WITNESS_SAVE_DECL(mp); 338 339 td = curthread; 340 rval = 0; 341 #ifdef KTRACE 342 if (KTRPOINT(td, KTR_CSW)) 343 ktrcsw(1, 0); 344 #endif 345 CV_ASSERT(cvp, mp, td); 346 WITNESS_SLEEP(0, &mp->mtx_object); 347 WITNESS_SAVE(&mp->mtx_object, mp); 348 349 if (cold || panicstr) { 350 /* 351 * After a panic, or during autoconfiguration, just give 352 * interrupts a chance, then just return; don't run any other 353 * thread or panic below, in case this is the idle process and 354 * already asleep. 355 */ 356 return 0; 357 } 358 359 mtx_lock_spin(&sched_lock); 360 361 CV_WAIT_VALIDATE(cvp, mp); 362 363 DROP_GIANT(); 364 mtx_unlock(mp); 365 366 cv_waitq_add(cvp, td); 367 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 368 cv_switch(td); 369 370 if (td->td_flags & TDF_TIMEOUT) { 371 td->td_flags &= ~TDF_TIMEOUT; 372 rval = EWOULDBLOCK; 373 } else if (td->td_flags & TDF_TIMOFAIL) 374 td->td_flags &= ~TDF_TIMOFAIL; 375 else if (callout_stop(&td->td_slpcallout) == 0) { 376 /* 377 * Work around race with cv_timedwait_end similar to that 378 * between msleep and endtsleep. 379 * Go back to sleep. 380 */ 381 TD_SET_SLEEPING(td); 382 td->td_proc->p_stats->p_ru.ru_nivcsw++; 383 mi_switch(); 384 td->td_flags &= ~TDF_TIMOFAIL; 385 } 386 387 if (td->td_proc->p_flag & P_WEXIT) 388 rval = EWOULDBLOCK; 389 mtx_unlock_spin(&sched_lock); 390 #ifdef KTRACE 391 if (KTRPOINT(td, KTR_CSW)) 392 ktrcsw(0, 0); 393 #endif 394 PICKUP_GIANT(); 395 mtx_lock(mp); 396 WITNESS_RESTORE(&mp->mtx_object, mp); 397 398 return (rval); 399 } 400 401 /* 402 * Wait on a condition variable for at most timo/hz seconds, allowing 403 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 404 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 405 * a signal was caught. 406 */ 407 int 408 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 409 { 410 struct thread *td; 411 struct proc *p; 412 int rval; 413 int sig; 414 WITNESS_SAVE_DECL(mp); 415 416 td = curthread; 417 p = td->td_proc; 418 rval = 0; 419 #ifdef KTRACE 420 if (KTRPOINT(td, KTR_CSW)) 421 ktrcsw(1, 0); 422 #endif 423 CV_ASSERT(cvp, mp, td); 424 WITNESS_SLEEP(0, &mp->mtx_object); 425 WITNESS_SAVE(&mp->mtx_object, mp); 426 427 if (cold || panicstr) { 428 /* 429 * After a panic, or during autoconfiguration, just give 430 * interrupts a chance, then just return; don't run any other 431 * thread or panic below, in case this is the idle process and 432 * already asleep. 433 */ 434 return 0; 435 } 436 437 mtx_lock_spin(&sched_lock); 438 439 CV_WAIT_VALIDATE(cvp, mp); 440 441 DROP_GIANT(); 442 mtx_unlock(mp); 443 444 cv_waitq_add(cvp, td); 445 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 446 sig = cv_switch_catch(td); 447 448 if (td->td_flags & TDF_TIMEOUT) { 449 td->td_flags &= ~TDF_TIMEOUT; 450 rval = EWOULDBLOCK; 451 } else if (td->td_flags & TDF_TIMOFAIL) 452 td->td_flags &= ~TDF_TIMOFAIL; 453 else if (callout_stop(&td->td_slpcallout) == 0) { 454 /* 455 * Work around race with cv_timedwait_end similar to that 456 * between msleep and endtsleep. 457 * Go back to sleep. 458 */ 459 TD_SET_SLEEPING(td); 460 td->td_proc->p_stats->p_ru.ru_nivcsw++; 461 mi_switch(); 462 td->td_flags &= ~TDF_TIMOFAIL; 463 } 464 mtx_unlock_spin(&sched_lock); 465 466 PROC_LOCK(p); 467 if (sig == 0) 468 sig = cursig(td); 469 if (sig != 0) { 470 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 471 rval = EINTR; 472 else 473 rval = ERESTART; 474 } 475 PROC_UNLOCK(p); 476 477 if (p->p_flag & P_WEXIT) 478 rval = EINTR; 479 480 #ifdef KTRACE 481 if (KTRPOINT(td, KTR_CSW)) 482 ktrcsw(0, 0); 483 #endif 484 PICKUP_GIANT(); 485 mtx_lock(mp); 486 WITNESS_RESTORE(&mp->mtx_object, mp); 487 488 return (rval); 489 } 490 491 /* 492 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 493 * called with sched_lock held. 494 */ 495 static __inline void 496 cv_wakeup(struct cv *cvp) 497 { 498 struct thread *td; 499 500 mtx_assert(&sched_lock, MA_OWNED); 501 td = TAILQ_FIRST(&cvp->cv_waitq); 502 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 503 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 504 cv_waitq_remove(td); 505 TD_CLR_SLEEPING(td); 506 setrunnable(td); 507 } 508 509 /* 510 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 511 * the swapper if the process is not in memory, so that it can bring the 512 * sleeping process in. Note that this may also result in additional threads 513 * being made runnable. Should be called with the same mutex as was passed to 514 * cv_wait held. 515 */ 516 void 517 cv_signal(struct cv *cvp) 518 { 519 520 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 521 mtx_lock_spin(&sched_lock); 522 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 523 CV_SIGNAL_VALIDATE(cvp); 524 cv_wakeup(cvp); 525 } 526 mtx_unlock_spin(&sched_lock); 527 } 528 529 /* 530 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 531 * Should be called with the same mutex as was passed to cv_wait held. 532 */ 533 void 534 cv_broadcast(struct cv *cvp) 535 { 536 537 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 538 mtx_lock_spin(&sched_lock); 539 CV_SIGNAL_VALIDATE(cvp); 540 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 541 cv_wakeup(cvp); 542 mtx_unlock_spin(&sched_lock); 543 } 544 545 /* 546 * Remove a thread from the wait queue of its condition variable. This may be 547 * called externally. 548 */ 549 void 550 cv_waitq_remove(struct thread *td) 551 { 552 struct cv *cvp; 553 554 mtx_assert(&sched_lock, MA_OWNED); 555 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 556 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 557 td->td_flags &= ~TDF_CVWAITQ; 558 TD_CLR_ON_SLEEPQ(td); 559 } 560 } 561 562 /* 563 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 564 * its timeout flag. 565 */ 566 static void 567 cv_timedwait_end(void *arg) 568 { 569 struct thread *td; 570 571 td = arg; 572 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", 573 td, td->td_proc->p_pid, td->td_proc->p_comm); 574 mtx_lock_spin(&sched_lock); 575 if (TD_ON_SLEEPQ(td)) { 576 cv_waitq_remove(td); 577 td->td_flags |= TDF_TIMEOUT; 578 } else { 579 td->td_flags |= TDF_TIMOFAIL; 580 } 581 TD_CLR_SLEEPING(td); 582 setrunnable(td); 583 mtx_unlock_spin(&sched_lock); 584 } 585 586 /* 587 * For now only abort interruptable waits. 588 * The others will have to either complete on their own or have a timeout. 589 */ 590 void 591 cv_abort(struct thread *td) 592 { 593 594 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 595 td->td_proc->p_pid, td->td_proc->p_comm); 596 mtx_lock_spin(&sched_lock); 597 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 598 if (TD_ON_SLEEPQ(td)) { 599 cv_waitq_remove(td); 600 } 601 TD_CLR_SLEEPING(td); 602 setrunnable(td); 603 } 604 mtx_unlock_spin(&sched_lock); 605 } 606 607