xref: /freebsd/sys/kern/kern_condvar.c (revision b52f49a9a0f22207ad5130ad8faba08de3ed23d8)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include "opt_ktrace.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/lock.h>
35 #include <sys/mutex.h>
36 #include <sys/proc.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/condvar.h>
40 #include <sys/sched.h>
41 #include <sys/signalvar.h>
42 #include <sys/resourcevar.h>
43 #ifdef KTRACE
44 #include <sys/uio.h>
45 #include <sys/ktrace.h>
46 #endif
47 
48 /*
49  * Common sanity checks for cv_wait* functions.
50  */
51 #define	CV_ASSERT(cvp, mp, td) do {					\
52 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
53 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
54 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
55 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
56 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
57 } while (0)
58 
59 #ifdef INVARIANTS
60 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
61 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
62 		/* Only waiter. */					\
63 		(cvp)->cv_mtx = (mp);					\
64 	} else {							\
65 		/*							\
66 		 * Other waiter; assert that we're using the		\
67 		 * same mutex.						\
68 		 */							\
69 		KASSERT((cvp)->cv_mtx == (mp),				\
70 		    ("%s: Multiple mutexes", __func__));		\
71 	}								\
72 } while (0)
73 
74 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
75 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
76 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
77 		    ("%s: Mutex not owned", __func__));			\
78 	}								\
79 } while (0)
80 
81 #else
82 #define	CV_WAIT_VALIDATE(cvp, mp)
83 #define	CV_SIGNAL_VALIDATE(cvp)
84 #endif
85 
86 static void cv_timedwait_end(void *arg);
87 
88 /*
89  * Initialize a condition variable.  Must be called before use.
90  */
91 void
92 cv_init(struct cv *cvp, const char *desc)
93 {
94 
95 	TAILQ_INIT(&cvp->cv_waitq);
96 	cvp->cv_mtx = NULL;
97 	cvp->cv_description = desc;
98 }
99 
100 /*
101  * Destroy a condition variable.  The condition variable must be re-initialized
102  * in order to be re-used.
103  */
104 void
105 cv_destroy(struct cv *cvp)
106 {
107 
108 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
109 }
110 
111 /*
112  * Common code for cv_wait* functions.  All require sched_lock.
113  */
114 
115 /*
116  * Switch context.
117  */
118 static __inline void
119 cv_switch(struct thread *td)
120 {
121 	TD_SET_SLEEPING(td);
122 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
123 	mi_switch();
124 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
125 	    td->td_proc->p_pid, td->td_proc->p_comm);
126 }
127 
128 /*
129  * Switch context, catching signals.
130  */
131 static __inline int
132 cv_switch_catch(struct thread *td)
133 {
134 	struct proc *p;
135 	int sig;
136 
137 	/*
138 	 * We put ourselves on the sleep queue and start our timeout before
139 	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
140 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
141 	 * be marked as TDS_SLP without resuming us, thus we must be ready for
142 	 * sleep when cursig is called.  If the wakeup happens while we're
143 	 * stopped, td->td_wchan will be 0 upon return from cursig,
144 	 * and TD_ON_SLEEPQ() will return false.
145 	 */
146 	td->td_flags |= TDF_SINTR;
147 	mtx_unlock_spin(&sched_lock);
148 	p = td->td_proc;
149 	PROC_LOCK(p);
150 	mtx_lock(&p->p_sigacts->ps_mtx);
151 	sig = cursig(td);
152 	mtx_unlock(&p->p_sigacts->ps_mtx);
153 	if (thread_suspend_check(1))
154 		sig = SIGSTOP;
155 	mtx_lock_spin(&sched_lock);
156 	PROC_UNLOCK(p);
157 	if (sig != 0) {
158 		if (TD_ON_SLEEPQ(td))
159 			cv_waitq_remove(td);
160 		TD_SET_RUNNING(td);
161 	} else if (TD_ON_SLEEPQ(td)) {
162 		cv_switch(td);
163 	}
164 	td->td_flags &= ~TDF_SINTR;
165 
166 	return sig;
167 }
168 
169 /*
170  * Add a thread to the wait queue of a condition variable.
171  */
172 static __inline void
173 cv_waitq_add(struct cv *cvp, struct thread *td)
174 {
175 
176 	td->td_flags |= TDF_CVWAITQ;
177 	TD_SET_ON_SLEEPQ(td);
178 	td->td_wchan = cvp;
179 	td->td_wmesg = cvp->cv_description;
180 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
181 	    td->td_proc->p_pid, td->td_proc->p_comm);
182 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
183 	sched_sleep(td, td->td_priority);
184 }
185 
186 /*
187  * Wait on a condition variable.  The current thread is placed on the condition
188  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
189  * condition variable will resume the thread.  The mutex is released before
190  * sleeping and will be held on return.  It is recommended that the mutex be
191  * held when cv_signal or cv_broadcast are called.
192  */
193 void
194 cv_wait(struct cv *cvp, struct mtx *mp)
195 {
196 	struct thread *td;
197 	WITNESS_SAVE_DECL(mp);
198 
199 	td = curthread;
200 #ifdef KTRACE
201 	if (KTRPOINT(td, KTR_CSW))
202 		ktrcsw(1, 0);
203 #endif
204 	CV_ASSERT(cvp, mp, td);
205 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
206 	    "Waiting on \"%s\"", cvp->cv_description);
207 	WITNESS_SAVE(&mp->mtx_object, mp);
208 
209 	if (cold ) {
210 		/*
211 		 * During autoconfiguration, just give interrupts
212 		 * a chance, then just return.  Don't run any other
213 		 * thread or panic below, in case this is the idle
214 		 * process and already asleep.
215 		 */
216 		return;
217 	}
218 
219 	mtx_lock_spin(&sched_lock);
220 
221 	CV_WAIT_VALIDATE(cvp, mp);
222 
223 	DROP_GIANT();
224 	mtx_unlock(mp);
225 
226 	cv_waitq_add(cvp, td);
227 	cv_switch(td);
228 
229 	mtx_unlock_spin(&sched_lock);
230 #ifdef KTRACE
231 	if (KTRPOINT(td, KTR_CSW))
232 		ktrcsw(0, 0);
233 #endif
234 	PICKUP_GIANT();
235 	mtx_lock(mp);
236 	WITNESS_RESTORE(&mp->mtx_object, mp);
237 }
238 
239 /*
240  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
241  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
242  * a signal was caught.  If ERESTART is returned the system call should be
243  * restarted if possible.
244  */
245 int
246 cv_wait_sig(struct cv *cvp, struct mtx *mp)
247 {
248 	struct thread *td;
249 	struct proc *p;
250 	int rval;
251 	int sig;
252 	WITNESS_SAVE_DECL(mp);
253 
254 	td = curthread;
255 	p = td->td_proc;
256 	rval = 0;
257 #ifdef KTRACE
258 	if (KTRPOINT(td, KTR_CSW))
259 		ktrcsw(1, 0);
260 #endif
261 	CV_ASSERT(cvp, mp, td);
262 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
263 	    "Waiting on \"%s\"", cvp->cv_description);
264 	WITNESS_SAVE(&mp->mtx_object, mp);
265 
266 	if (cold || panicstr) {
267 		/*
268 		 * After a panic, or during autoconfiguration, just give
269 		 * interrupts a chance, then just return; don't run any other
270 		 * procs or panic below, in case this is the idle process and
271 		 * already asleep.
272 		 */
273 		return 0;
274 	}
275 
276 	mtx_lock_spin(&sched_lock);
277 
278 	CV_WAIT_VALIDATE(cvp, mp);
279 
280 	DROP_GIANT();
281 	mtx_unlock(mp);
282 
283 	cv_waitq_add(cvp, td);
284 	sig = cv_switch_catch(td);
285 
286 	mtx_unlock_spin(&sched_lock);
287 
288 	PROC_LOCK(p);
289 	mtx_lock(&p->p_sigacts->ps_mtx);
290 	if (sig == 0)
291 		sig = cursig(td);	/* XXXKSE */
292 	if (sig != 0) {
293 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
294 			rval = EINTR;
295 		else
296 			rval = ERESTART;
297 	}
298 	mtx_unlock(&p->p_sigacts->ps_mtx);
299 	if (p->p_flag & P_WEXIT)
300 		rval = EINTR;
301 	PROC_UNLOCK(p);
302 
303 #ifdef KTRACE
304 	if (KTRPOINT(td, KTR_CSW))
305 		ktrcsw(0, 0);
306 #endif
307 	PICKUP_GIANT();
308 	mtx_lock(mp);
309 	WITNESS_RESTORE(&mp->mtx_object, mp);
310 
311 	return (rval);
312 }
313 
314 /*
315  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
316  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
317  * expires.
318  */
319 int
320 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
321 {
322 	struct thread *td;
323 	int rval;
324 	WITNESS_SAVE_DECL(mp);
325 
326 	td = curthread;
327 	rval = 0;
328 #ifdef KTRACE
329 	if (KTRPOINT(td, KTR_CSW))
330 		ktrcsw(1, 0);
331 #endif
332 	CV_ASSERT(cvp, mp, td);
333 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
334 	    "Waiting on \"%s\"", cvp->cv_description);
335 	WITNESS_SAVE(&mp->mtx_object, mp);
336 
337 	if (cold || panicstr) {
338 		/*
339 		 * After a panic, or during autoconfiguration, just give
340 		 * interrupts a chance, then just return; don't run any other
341 		 * thread or panic below, in case this is the idle process and
342 		 * already asleep.
343 		 */
344 		return 0;
345 	}
346 
347 	mtx_lock_spin(&sched_lock);
348 
349 	CV_WAIT_VALIDATE(cvp, mp);
350 
351 	DROP_GIANT();
352 	mtx_unlock(mp);
353 
354 	cv_waitq_add(cvp, td);
355 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
356 	cv_switch(td);
357 
358 	if (td->td_flags & TDF_TIMEOUT) {
359 		td->td_flags &= ~TDF_TIMEOUT;
360 		rval = EWOULDBLOCK;
361 	} else if (td->td_flags & TDF_TIMOFAIL)
362 		td->td_flags &= ~TDF_TIMOFAIL;
363 	else if (callout_stop(&td->td_slpcallout) == 0) {
364 		/*
365 		 * Work around race with cv_timedwait_end similar to that
366 		 * between msleep and endtsleep.
367 		 * Go back to sleep.
368 		 */
369 		TD_SET_SLEEPING(td);
370 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
371 		mi_switch();
372 		td->td_flags &= ~TDF_TIMOFAIL;
373 	}
374 
375 	mtx_unlock_spin(&sched_lock);
376 #ifdef KTRACE
377 	if (KTRPOINT(td, KTR_CSW))
378 		ktrcsw(0, 0);
379 #endif
380 	PICKUP_GIANT();
381 	mtx_lock(mp);
382 	WITNESS_RESTORE(&mp->mtx_object, mp);
383 
384 	return (rval);
385 }
386 
387 /*
388  * Wait on a condition variable for at most timo/hz seconds, allowing
389  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
390  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
391  * a signal was caught.
392  */
393 int
394 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
395 {
396 	struct thread *td;
397 	struct proc *p;
398 	int rval;
399 	int sig;
400 	WITNESS_SAVE_DECL(mp);
401 
402 	td = curthread;
403 	p = td->td_proc;
404 	rval = 0;
405 #ifdef KTRACE
406 	if (KTRPOINT(td, KTR_CSW))
407 		ktrcsw(1, 0);
408 #endif
409 	CV_ASSERT(cvp, mp, td);
410 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
411 	    "Waiting on \"%s\"", cvp->cv_description);
412 	WITNESS_SAVE(&mp->mtx_object, mp);
413 
414 	if (cold || panicstr) {
415 		/*
416 		 * After a panic, or during autoconfiguration, just give
417 		 * interrupts a chance, then just return; don't run any other
418 		 * thread or panic below, in case this is the idle process and
419 		 * already asleep.
420 		 */
421 		return 0;
422 	}
423 
424 	mtx_lock_spin(&sched_lock);
425 
426 	CV_WAIT_VALIDATE(cvp, mp);
427 
428 	DROP_GIANT();
429 	mtx_unlock(mp);
430 
431 	cv_waitq_add(cvp, td);
432 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
433 	sig = cv_switch_catch(td);
434 
435 	if (td->td_flags & TDF_TIMEOUT) {
436 		td->td_flags &= ~TDF_TIMEOUT;
437 		rval = EWOULDBLOCK;
438 	} else if (td->td_flags & TDF_TIMOFAIL)
439 		td->td_flags &= ~TDF_TIMOFAIL;
440 	else if (callout_stop(&td->td_slpcallout) == 0) {
441 		/*
442 		 * Work around race with cv_timedwait_end similar to that
443 		 * between msleep and endtsleep.
444 		 * Go back to sleep.
445 		 */
446 		TD_SET_SLEEPING(td);
447 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
448 		mi_switch();
449 		td->td_flags &= ~TDF_TIMOFAIL;
450 	}
451 	mtx_unlock_spin(&sched_lock);
452 
453 	PROC_LOCK(p);
454 	mtx_lock(&p->p_sigacts->ps_mtx);
455 	if (sig == 0)
456 		sig = cursig(td);
457 	if (sig != 0) {
458 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
459 			rval = EINTR;
460 		else
461 			rval = ERESTART;
462 	}
463 	mtx_unlock(&p->p_sigacts->ps_mtx);
464 	if (p->p_flag & P_WEXIT)
465 		rval = EINTR;
466 	PROC_UNLOCK(p);
467 
468 #ifdef KTRACE
469 	if (KTRPOINT(td, KTR_CSW))
470 		ktrcsw(0, 0);
471 #endif
472 	PICKUP_GIANT();
473 	mtx_lock(mp);
474 	WITNESS_RESTORE(&mp->mtx_object, mp);
475 
476 	return (rval);
477 }
478 
479 /*
480  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
481  * called with sched_lock held.
482  */
483 static __inline void
484 cv_wakeup(struct cv *cvp)
485 {
486 	struct thread *td;
487 
488 	mtx_assert(&sched_lock, MA_OWNED);
489 	td = TAILQ_FIRST(&cvp->cv_waitq);
490 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
491 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
492 	cv_waitq_remove(td);
493 	TD_CLR_SLEEPING(td);
494 	setrunnable(td);
495 }
496 
497 /*
498  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
499  * the swapper if the process is not in memory, so that it can bring the
500  * sleeping process in.  Note that this may also result in additional threads
501  * being made runnable.  Should be called with the same mutex as was passed to
502  * cv_wait held.
503  */
504 void
505 cv_signal(struct cv *cvp)
506 {
507 
508 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
509 	mtx_lock_spin(&sched_lock);
510 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
511 		CV_SIGNAL_VALIDATE(cvp);
512 		cv_wakeup(cvp);
513 	}
514 	mtx_unlock_spin(&sched_lock);
515 }
516 
517 /*
518  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
519  * Should be called with the same mutex as was passed to cv_wait held.
520  */
521 void
522 cv_broadcast(struct cv *cvp)
523 {
524 
525 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
526 	mtx_lock_spin(&sched_lock);
527 	CV_SIGNAL_VALIDATE(cvp);
528 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
529 		cv_wakeup(cvp);
530 	mtx_unlock_spin(&sched_lock);
531 }
532 
533 /*
534  * Remove a thread from the wait queue of its condition variable.  This may be
535  * called externally.
536  */
537 void
538 cv_waitq_remove(struct thread *td)
539 {
540 	struct cv *cvp;
541 
542 	mtx_assert(&sched_lock, MA_OWNED);
543 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
544 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
545 		td->td_flags &= ~TDF_CVWAITQ;
546 		td->td_wmesg = NULL;
547 		TD_CLR_ON_SLEEPQ(td);
548 	}
549 }
550 
551 /*
552  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
553  * its timeout flag.
554  */
555 static void
556 cv_timedwait_end(void *arg)
557 {
558 	struct thread *td;
559 
560 	td = arg;
561 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
562 	    td, td->td_proc->p_pid, td->td_proc->p_comm);
563 	mtx_lock_spin(&sched_lock);
564 	if (TD_ON_SLEEPQ(td)) {
565 		cv_waitq_remove(td);
566 		td->td_flags |= TDF_TIMEOUT;
567 	} else {
568 		td->td_flags |= TDF_TIMOFAIL;
569 	}
570 	TD_CLR_SLEEPING(td);
571 	setrunnable(td);
572 	mtx_unlock_spin(&sched_lock);
573 }
574 
575 /*
576  * For now only abort interruptable waits.
577  * The others will have to either complete on their own or have a timeout.
578  */
579 void
580 cv_abort(struct thread *td)
581 {
582 
583 	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
584 	    td->td_proc->p_pid, td->td_proc->p_comm);
585 	mtx_lock_spin(&sched_lock);
586 	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
587 		if (TD_ON_SLEEPQ(td)) {
588 			cv_waitq_remove(td);
589 		}
590 		TD_CLR_SLEEPING(td);
591 		setrunnable(td);
592 	}
593 	mtx_unlock_spin(&sched_lock);
594 }
595 
596