1 /*- 2 * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include "opt_ktrace.h" 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/mutex.h> 35 #include <sys/proc.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/condvar.h> 39 #include <sys/signalvar.h> 40 #include <sys/resourcevar.h> 41 #ifdef KTRACE 42 #include <sys/uio.h> 43 #include <sys/ktrace.h> 44 #endif 45 46 /* 47 * Common sanity checks for cv_wait* functions. 48 */ 49 #define CV_ASSERT(cvp, mp, td) do { \ 50 KASSERT((td) != NULL, ("%s: curthread NULL", __func__)); \ 51 KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__)); \ 52 KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__)); \ 53 KASSERT((mp) != NULL, ("%s: mp NULL", __func__)); \ 54 mtx_assert((mp), MA_OWNED | MA_NOTRECURSED); \ 55 } while (0) 56 57 #ifdef INVARIANTS 58 #define CV_WAIT_VALIDATE(cvp, mp) do { \ 59 if (TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 60 /* Only waiter. */ \ 61 (cvp)->cv_mtx = (mp); \ 62 } else { \ 63 /* \ 64 * Other waiter; assert that we're using the \ 65 * same mutex. \ 66 */ \ 67 KASSERT((cvp)->cv_mtx == (mp), \ 68 ("%s: Multiple mutexes", __func__)); \ 69 } \ 70 } while (0) 71 72 #define CV_SIGNAL_VALIDATE(cvp) do { \ 73 if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) { \ 74 KASSERT(mtx_owned((cvp)->cv_mtx), \ 75 ("%s: Mutex not owned", __func__)); \ 76 } \ 77 } while (0) 78 79 #else 80 #define CV_WAIT_VALIDATE(cvp, mp) 81 #define CV_SIGNAL_VALIDATE(cvp) 82 #endif 83 84 static void cv_timedwait_end(void *arg); 85 static void cv_check_upcall(struct thread *td); 86 87 /* 88 * Initialize a condition variable. Must be called before use. 89 */ 90 void 91 cv_init(struct cv *cvp, const char *desc) 92 { 93 94 TAILQ_INIT(&cvp->cv_waitq); 95 cvp->cv_mtx = NULL; 96 cvp->cv_description = desc; 97 } 98 99 /* 100 * Destroy a condition variable. The condition variable must be re-initialized 101 * in order to be re-used. 102 */ 103 void 104 cv_destroy(struct cv *cvp) 105 { 106 107 KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__)); 108 } 109 110 /* 111 * Common code for cv_wait* functions. All require sched_lock. 112 */ 113 114 /* 115 * Decide if we need to queue an upcall. 116 * This is copied from msleep(), perhaps this should be a common function. 117 */ 118 static void 119 cv_check_upcall(struct thread *td) 120 { 121 122 /* 123 * If we are capable of async syscalls and there isn't already 124 * another one ready to return, start a new thread 125 * and queue it as ready to run. Note that there is danger here 126 * because we need to make sure that we don't sleep allocating 127 * the thread (recursion here might be bad). 128 * Hence the TDF_INMSLEEP flag. 129 */ 130 if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox && 131 (td->td_flags & TDF_INMSLEEP) == 0) { 132 /* 133 * We don't need to upcall now, just queue it. 134 * The upcall will happen when other n-kernel work 135 * in this SKEGRP has completed. 136 * Don't recurse here! 137 */ 138 td->td_flags |= TDF_INMSLEEP; 139 thread_schedule_upcall(td, td->td_kse); 140 td->td_flags &= ~TDF_INMSLEEP; 141 } 142 } 143 144 /* 145 * Switch context. 146 */ 147 static __inline void 148 cv_switch(struct thread *td) 149 { 150 151 cv_check_upcall(td); 152 TD_SET_SLEEPING(td); 153 td->td_proc->p_stats->p_ru.ru_nvcsw++; 154 mi_switch(); 155 CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td, 156 td->td_proc->p_pid, td->td_proc->p_comm); 157 } 158 159 /* 160 * Switch context, catching signals. 161 */ 162 static __inline int 163 cv_switch_catch(struct thread *td) 164 { 165 struct proc *p; 166 int sig; 167 168 /* 169 * We put ourselves on the sleep queue and start our timeout before 170 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or 171 * both) could occur while we were stopped. A SIGCONT would cause us to 172 * be marked as TDS_SLP without resuming us, thus we must be ready for 173 * sleep when cursig is called. If the wakeup happens while we're 174 * stopped, td->td_wchan will be 0 upon return from cursig, 175 * and TD_ON_SLEEPQ() will return false. 176 */ 177 td->td_flags |= TDF_SINTR; 178 mtx_unlock_spin(&sched_lock); 179 p = td->td_proc; 180 PROC_LOCK(p); 181 sig = cursig(td); 182 if (thread_suspend_check(1)) 183 sig = SIGSTOP; 184 mtx_lock_spin(&sched_lock); 185 PROC_UNLOCK(p); 186 if (sig != 0) { 187 if (TD_ON_SLEEPQ(td)) 188 cv_waitq_remove(td); 189 TD_SET_RUNNING(td); 190 } else if (TD_ON_SLEEPQ(td)) { 191 cv_switch(td); 192 } 193 td->td_flags &= ~TDF_SINTR; 194 195 return sig; 196 } 197 198 /* 199 * Add a thread to the wait queue of a condition variable. 200 */ 201 static __inline void 202 cv_waitq_add(struct cv *cvp, struct thread *td) 203 { 204 205 td->td_flags |= TDF_CVWAITQ; 206 TD_SET_ON_SLEEPQ(td); 207 td->td_wchan = cvp; 208 td->td_wmesg = cvp->cv_description; 209 td->td_ksegrp->kg_slptime = 0; /* XXXKSE */ 210 td->td_base_pri = td->td_priority; 211 CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td, 212 td->td_proc->p_pid, td->td_proc->p_comm); 213 TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq); 214 } 215 216 /* 217 * Wait on a condition variable. The current thread is placed on the condition 218 * variable's wait queue and suspended. A cv_signal or cv_broadcast on the same 219 * condition variable will resume the thread. The mutex is released before 220 * sleeping and will be held on return. It is recommended that the mutex be 221 * held when cv_signal or cv_broadcast are called. 222 */ 223 void 224 cv_wait(struct cv *cvp, struct mtx *mp) 225 { 226 struct thread *td; 227 WITNESS_SAVE_DECL(mp); 228 229 td = curthread; 230 #ifdef KTRACE 231 if (KTRPOINT(td, KTR_CSW)) 232 ktrcsw(1, 0); 233 #endif 234 CV_ASSERT(cvp, mp, td); 235 WITNESS_SLEEP(0, &mp->mtx_object); 236 WITNESS_SAVE(&mp->mtx_object, mp); 237 238 if (cold ) { 239 /* 240 * During autoconfiguration, just give interrupts 241 * a chance, then just return. Don't run any other 242 * thread or panic below, in case this is the idle 243 * process and already asleep. 244 */ 245 return; 246 } 247 248 mtx_lock_spin(&sched_lock); 249 250 CV_WAIT_VALIDATE(cvp, mp); 251 252 DROP_GIANT(); 253 mtx_unlock(mp); 254 255 cv_waitq_add(cvp, td); 256 cv_switch(td); 257 258 mtx_unlock_spin(&sched_lock); 259 #ifdef KTRACE 260 if (KTRPOINT(td, KTR_CSW)) 261 ktrcsw(0, 0); 262 #endif 263 PICKUP_GIANT(); 264 mtx_lock(mp); 265 WITNESS_RESTORE(&mp->mtx_object, mp); 266 } 267 268 /* 269 * Wait on a condition variable, allowing interruption by signals. Return 0 if 270 * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if 271 * a signal was caught. If ERESTART is returned the system call should be 272 * restarted if possible. 273 */ 274 int 275 cv_wait_sig(struct cv *cvp, struct mtx *mp) 276 { 277 struct thread *td; 278 struct proc *p; 279 int rval; 280 int sig; 281 WITNESS_SAVE_DECL(mp); 282 283 td = curthread; 284 p = td->td_proc; 285 rval = 0; 286 #ifdef KTRACE 287 if (KTRPOINT(td, KTR_CSW)) 288 ktrcsw(1, 0); 289 #endif 290 CV_ASSERT(cvp, mp, td); 291 WITNESS_SLEEP(0, &mp->mtx_object); 292 WITNESS_SAVE(&mp->mtx_object, mp); 293 294 if (cold || panicstr) { 295 /* 296 * After a panic, or during autoconfiguration, just give 297 * interrupts a chance, then just return; don't run any other 298 * procs or panic below, in case this is the idle process and 299 * already asleep. 300 */ 301 return 0; 302 } 303 304 mtx_lock_spin(&sched_lock); 305 306 CV_WAIT_VALIDATE(cvp, mp); 307 308 DROP_GIANT(); 309 mtx_unlock(mp); 310 311 cv_waitq_add(cvp, td); 312 sig = cv_switch_catch(td); 313 314 mtx_unlock_spin(&sched_lock); 315 316 PROC_LOCK(p); 317 if (sig == 0) 318 sig = cursig(td); /* XXXKSE */ 319 if (sig != 0) { 320 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 321 rval = EINTR; 322 else 323 rval = ERESTART; 324 } 325 PROC_UNLOCK(p); 326 if (p->p_flag & P_WEXIT) 327 rval = EINTR; 328 329 #ifdef KTRACE 330 if (KTRPOINT(td, KTR_CSW)) 331 ktrcsw(0, 0); 332 #endif 333 PICKUP_GIANT(); 334 mtx_lock(mp); 335 WITNESS_RESTORE(&mp->mtx_object, mp); 336 337 return (rval); 338 } 339 340 /* 341 * Wait on a condition variable for at most timo/hz seconds. Returns 0 if the 342 * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout 343 * expires. 344 */ 345 int 346 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo) 347 { 348 struct thread *td; 349 int rval; 350 WITNESS_SAVE_DECL(mp); 351 352 td = curthread; 353 rval = 0; 354 #ifdef KTRACE 355 if (KTRPOINT(td, KTR_CSW)) 356 ktrcsw(1, 0); 357 #endif 358 CV_ASSERT(cvp, mp, td); 359 WITNESS_SLEEP(0, &mp->mtx_object); 360 WITNESS_SAVE(&mp->mtx_object, mp); 361 362 if (cold || panicstr) { 363 /* 364 * After a panic, or during autoconfiguration, just give 365 * interrupts a chance, then just return; don't run any other 366 * thread or panic below, in case this is the idle process and 367 * already asleep. 368 */ 369 return 0; 370 } 371 372 mtx_lock_spin(&sched_lock); 373 374 CV_WAIT_VALIDATE(cvp, mp); 375 376 DROP_GIANT(); 377 mtx_unlock(mp); 378 379 cv_waitq_add(cvp, td); 380 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 381 cv_switch(td); 382 383 if (td->td_flags & TDF_TIMEOUT) { 384 td->td_flags &= ~TDF_TIMEOUT; 385 rval = EWOULDBLOCK; 386 } else if (td->td_flags & TDF_TIMOFAIL) 387 td->td_flags &= ~TDF_TIMOFAIL; 388 else if (callout_stop(&td->td_slpcallout) == 0) { 389 /* 390 * Work around race with cv_timedwait_end similar to that 391 * between msleep and endtsleep. 392 * Go back to sleep. 393 */ 394 TD_SET_SLEEPING(td); 395 td->td_proc->p_stats->p_ru.ru_nivcsw++; 396 mi_switch(); 397 td->td_flags &= ~TDF_TIMOFAIL; 398 } 399 400 if (td->td_proc->p_flag & P_WEXIT) 401 rval = EWOULDBLOCK; 402 mtx_unlock_spin(&sched_lock); 403 #ifdef KTRACE 404 if (KTRPOINT(td, KTR_CSW)) 405 ktrcsw(0, 0); 406 #endif 407 PICKUP_GIANT(); 408 mtx_lock(mp); 409 WITNESS_RESTORE(&mp->mtx_object, mp); 410 411 return (rval); 412 } 413 414 /* 415 * Wait on a condition variable for at most timo/hz seconds, allowing 416 * interruption by signals. Returns 0 if the thread was resumed by cv_signal 417 * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if 418 * a signal was caught. 419 */ 420 int 421 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo) 422 { 423 struct thread *td; 424 struct proc *p; 425 int rval; 426 int sig; 427 WITNESS_SAVE_DECL(mp); 428 429 td = curthread; 430 p = td->td_proc; 431 rval = 0; 432 #ifdef KTRACE 433 if (KTRPOINT(td, KTR_CSW)) 434 ktrcsw(1, 0); 435 #endif 436 CV_ASSERT(cvp, mp, td); 437 WITNESS_SLEEP(0, &mp->mtx_object); 438 WITNESS_SAVE(&mp->mtx_object, mp); 439 440 if (cold || panicstr) { 441 /* 442 * After a panic, or during autoconfiguration, just give 443 * interrupts a chance, then just return; don't run any other 444 * thread or panic below, in case this is the idle process and 445 * already asleep. 446 */ 447 return 0; 448 } 449 450 mtx_lock_spin(&sched_lock); 451 452 CV_WAIT_VALIDATE(cvp, mp); 453 454 DROP_GIANT(); 455 mtx_unlock(mp); 456 457 cv_waitq_add(cvp, td); 458 callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td); 459 sig = cv_switch_catch(td); 460 461 if (td->td_flags & TDF_TIMEOUT) { 462 td->td_flags &= ~TDF_TIMEOUT; 463 rval = EWOULDBLOCK; 464 } else if (td->td_flags & TDF_TIMOFAIL) 465 td->td_flags &= ~TDF_TIMOFAIL; 466 else if (callout_stop(&td->td_slpcallout) == 0) { 467 /* 468 * Work around race with cv_timedwait_end similar to that 469 * between msleep and endtsleep. 470 * Go back to sleep. 471 */ 472 TD_SET_SLEEPING(td); 473 td->td_proc->p_stats->p_ru.ru_nivcsw++; 474 mi_switch(); 475 td->td_flags &= ~TDF_TIMOFAIL; 476 } 477 mtx_unlock_spin(&sched_lock); 478 479 PROC_LOCK(p); 480 if (sig == 0) 481 sig = cursig(td); 482 if (sig != 0) { 483 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig)) 484 rval = EINTR; 485 else 486 rval = ERESTART; 487 } 488 PROC_UNLOCK(p); 489 490 if (p->p_flag & P_WEXIT) 491 rval = EINTR; 492 493 #ifdef KTRACE 494 if (KTRPOINT(td, KTR_CSW)) 495 ktrcsw(0, 0); 496 #endif 497 PICKUP_GIANT(); 498 mtx_lock(mp); 499 WITNESS_RESTORE(&mp->mtx_object, mp); 500 501 return (rval); 502 } 503 504 /* 505 * Common code for signal and broadcast. Assumes waitq is not empty. Must be 506 * called with sched_lock held. 507 */ 508 static __inline void 509 cv_wakeup(struct cv *cvp) 510 { 511 struct thread *td; 512 513 mtx_assert(&sched_lock, MA_OWNED); 514 td = TAILQ_FIRST(&cvp->cv_waitq); 515 KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__)); 516 KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__)); 517 cv_waitq_remove(td); 518 TD_CLR_SLEEPING(td); 519 setrunnable(td); 520 } 521 522 /* 523 * Signal a condition variable, wakes up one waiting thread. Will also wakeup 524 * the swapper if the process is not in memory, so that it can bring the 525 * sleeping process in. Note that this may also result in additional threads 526 * being made runnable. Should be called with the same mutex as was passed to 527 * cv_wait held. 528 */ 529 void 530 cv_signal(struct cv *cvp) 531 { 532 533 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 534 mtx_lock_spin(&sched_lock); 535 if (!TAILQ_EMPTY(&cvp->cv_waitq)) { 536 CV_SIGNAL_VALIDATE(cvp); 537 cv_wakeup(cvp); 538 } 539 mtx_unlock_spin(&sched_lock); 540 } 541 542 /* 543 * Broadcast a signal to a condition variable. Wakes up all waiting threads. 544 * Should be called with the same mutex as was passed to cv_wait held. 545 */ 546 void 547 cv_broadcast(struct cv *cvp) 548 { 549 550 KASSERT(cvp != NULL, ("%s: cvp NULL", __func__)); 551 mtx_lock_spin(&sched_lock); 552 CV_SIGNAL_VALIDATE(cvp); 553 while (!TAILQ_EMPTY(&cvp->cv_waitq)) 554 cv_wakeup(cvp); 555 mtx_unlock_spin(&sched_lock); 556 } 557 558 /* 559 * Remove a thread from the wait queue of its condition variable. This may be 560 * called externally. 561 */ 562 void 563 cv_waitq_remove(struct thread *td) 564 { 565 struct cv *cvp; 566 567 mtx_assert(&sched_lock, MA_OWNED); 568 if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) { 569 TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq); 570 td->td_flags &= ~TDF_CVWAITQ; 571 TD_CLR_ON_SLEEPQ(td); 572 } 573 } 574 575 /* 576 * Timeout function for cv_timedwait. Put the thread on the runqueue and set 577 * its timeout flag. 578 */ 579 static void 580 cv_timedwait_end(void *arg) 581 { 582 struct thread *td; 583 584 td = arg; 585 CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", 586 td, td->td_proc->p_pid, td->td_proc->p_comm); 587 mtx_lock_spin(&sched_lock); 588 if (TD_ON_SLEEPQ(td)) { 589 cv_waitq_remove(td); 590 td->td_flags |= TDF_TIMEOUT; 591 } else { 592 td->td_flags |= TDF_TIMOFAIL; 593 } 594 TD_CLR_SLEEPING(td); 595 setrunnable(td); 596 mtx_unlock_spin(&sched_lock); 597 } 598 599 /* 600 * For now only abort interruptable waits. 601 * The others will have to either complete on their own or have a timeout. 602 */ 603 void 604 cv_abort(struct thread *td) 605 { 606 607 CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td, 608 td->td_proc->p_pid, td->td_proc->p_comm); 609 mtx_lock_spin(&sched_lock); 610 if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) { 611 if (TD_ON_SLEEPQ(td)) { 612 cv_waitq_remove(td); 613 } 614 TD_CLR_SLEEPING(td); 615 setrunnable(td); 616 } 617 mtx_unlock_spin(&sched_lock); 618 } 619 620