xref: /freebsd/sys/kern/kern_condvar.c (revision 77b7cdf1999ee965ad494fddd184b18f532ac91a)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/sched.h>
40 #include <sys/signalvar.h>
41 #include <sys/resourcevar.h>
42 #ifdef KTRACE
43 #include <sys/uio.h>
44 #include <sys/ktrace.h>
45 #endif
46 
47 /*
48  * Common sanity checks for cv_wait* functions.
49  */
50 #define	CV_ASSERT(cvp, mp, td) do {					\
51 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
52 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
53 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
54 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
55 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
56 } while (0)
57 
58 #ifdef INVARIANTS
59 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
60 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
61 		/* Only waiter. */					\
62 		(cvp)->cv_mtx = (mp);					\
63 	} else {							\
64 		/*							\
65 		 * Other waiter; assert that we're using the		\
66 		 * same mutex.						\
67 		 */							\
68 		KASSERT((cvp)->cv_mtx == (mp),				\
69 		    ("%s: Multiple mutexes", __func__));		\
70 	}								\
71 } while (0)
72 
73 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
74 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
75 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
76 		    ("%s: Mutex not owned", __func__));			\
77 	}								\
78 } while (0)
79 
80 #else
81 #define	CV_WAIT_VALIDATE(cvp, mp)
82 #define	CV_SIGNAL_VALIDATE(cvp)
83 #endif
84 
85 static void cv_timedwait_end(void *arg);
86 
87 /*
88  * Initialize a condition variable.  Must be called before use.
89  */
90 void
91 cv_init(struct cv *cvp, const char *desc)
92 {
93 
94 	TAILQ_INIT(&cvp->cv_waitq);
95 	cvp->cv_mtx = NULL;
96 	cvp->cv_description = desc;
97 }
98 
99 /*
100  * Destroy a condition variable.  The condition variable must be re-initialized
101  * in order to be re-used.
102  */
103 void
104 cv_destroy(struct cv *cvp)
105 {
106 
107 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
108 }
109 
110 /*
111  * Common code for cv_wait* functions.  All require sched_lock.
112  */
113 
114 /*
115  * Switch context.
116  */
117 static __inline void
118 cv_switch(struct thread *td)
119 {
120 	TD_SET_SLEEPING(td);
121 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
122 	mi_switch();
123 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
124 	    td->td_proc->p_pid, td->td_proc->p_comm);
125 }
126 
127 /*
128  * Switch context, catching signals.
129  */
130 static __inline int
131 cv_switch_catch(struct thread *td)
132 {
133 	struct proc *p;
134 	int sig;
135 
136 	/*
137 	 * We put ourselves on the sleep queue and start our timeout before
138 	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
139 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
140 	 * be marked as TDS_SLP without resuming us, thus we must be ready for
141 	 * sleep when cursig is called.  If the wakeup happens while we're
142 	 * stopped, td->td_wchan will be 0 upon return from cursig,
143 	 * and TD_ON_SLEEPQ() will return false.
144 	 */
145 	td->td_flags |= TDF_SINTR;
146 	mtx_unlock_spin(&sched_lock);
147 	p = td->td_proc;
148 	PROC_LOCK(p);
149 	sig = cursig(td);
150 	if (thread_suspend_check(1))
151 		sig = SIGSTOP;
152 	mtx_lock_spin(&sched_lock);
153 	PROC_UNLOCK(p);
154 	if (sig != 0) {
155 		if (TD_ON_SLEEPQ(td))
156 			cv_waitq_remove(td);
157 		TD_SET_RUNNING(td);
158 	} else if (TD_ON_SLEEPQ(td)) {
159 		cv_switch(td);
160 	}
161 	td->td_flags &= ~TDF_SINTR;
162 
163 	return sig;
164 }
165 
166 /*
167  * Add a thread to the wait queue of a condition variable.
168  */
169 static __inline void
170 cv_waitq_add(struct cv *cvp, struct thread *td)
171 {
172 
173 	td->td_flags |= TDF_CVWAITQ;
174 	TD_SET_ON_SLEEPQ(td);
175 	td->td_wchan = cvp;
176 	td->td_wmesg = cvp->cv_description;
177 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
178 	    td->td_proc->p_pid, td->td_proc->p_comm);
179 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
180 	sched_sleep(td, td->td_priority);
181 }
182 
183 /*
184  * Wait on a condition variable.  The current thread is placed on the condition
185  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
186  * condition variable will resume the thread.  The mutex is released before
187  * sleeping and will be held on return.  It is recommended that the mutex be
188  * held when cv_signal or cv_broadcast are called.
189  */
190 void
191 cv_wait(struct cv *cvp, struct mtx *mp)
192 {
193 	struct thread *td;
194 	WITNESS_SAVE_DECL(mp);
195 
196 	td = curthread;
197 #ifdef KTRACE
198 	if (KTRPOINT(td, KTR_CSW))
199 		ktrcsw(1, 0);
200 #endif
201 	CV_ASSERT(cvp, mp, td);
202 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
203 	    "Waiting on \"%s\"", cvp->cv_description);
204 	WITNESS_SAVE(&mp->mtx_object, mp);
205 
206 	if (cold ) {
207 		/*
208 		 * During autoconfiguration, just give interrupts
209 		 * a chance, then just return.  Don't run any other
210 		 * thread or panic below, in case this is the idle
211 		 * process and already asleep.
212 		 */
213 		return;
214 	}
215 
216 	mtx_lock_spin(&sched_lock);
217 
218 	CV_WAIT_VALIDATE(cvp, mp);
219 
220 	DROP_GIANT();
221 	mtx_unlock(mp);
222 
223 	cv_waitq_add(cvp, td);
224 	cv_switch(td);
225 
226 	mtx_unlock_spin(&sched_lock);
227 #ifdef KTRACE
228 	if (KTRPOINT(td, KTR_CSW))
229 		ktrcsw(0, 0);
230 #endif
231 	PICKUP_GIANT();
232 	mtx_lock(mp);
233 	WITNESS_RESTORE(&mp->mtx_object, mp);
234 }
235 
236 /*
237  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
238  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
239  * a signal was caught.  If ERESTART is returned the system call should be
240  * restarted if possible.
241  */
242 int
243 cv_wait_sig(struct cv *cvp, struct mtx *mp)
244 {
245 	struct thread *td;
246 	struct proc *p;
247 	int rval;
248 	int sig;
249 	WITNESS_SAVE_DECL(mp);
250 
251 	td = curthread;
252 	p = td->td_proc;
253 	rval = 0;
254 #ifdef KTRACE
255 	if (KTRPOINT(td, KTR_CSW))
256 		ktrcsw(1, 0);
257 #endif
258 	CV_ASSERT(cvp, mp, td);
259 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
260 	    "Waiting on \"%s\"", cvp->cv_description);
261 	WITNESS_SAVE(&mp->mtx_object, mp);
262 
263 	if (cold || panicstr) {
264 		/*
265 		 * After a panic, or during autoconfiguration, just give
266 		 * interrupts a chance, then just return; don't run any other
267 		 * procs or panic below, in case this is the idle process and
268 		 * already asleep.
269 		 */
270 		return 0;
271 	}
272 
273 	mtx_lock_spin(&sched_lock);
274 
275 	CV_WAIT_VALIDATE(cvp, mp);
276 
277 	DROP_GIANT();
278 	mtx_unlock(mp);
279 
280 	cv_waitq_add(cvp, td);
281 	sig = cv_switch_catch(td);
282 
283 	mtx_unlock_spin(&sched_lock);
284 
285 	PROC_LOCK(p);
286 	if (sig == 0)
287 		sig = cursig(td);	/* XXXKSE */
288 	if (sig != 0) {
289 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
290 			rval = EINTR;
291 		else
292 			rval = ERESTART;
293 	}
294 	if (p->p_flag & P_WEXIT)
295 		rval = EINTR;
296 	PROC_UNLOCK(p);
297 
298 #ifdef KTRACE
299 	if (KTRPOINT(td, KTR_CSW))
300 		ktrcsw(0, 0);
301 #endif
302 	PICKUP_GIANT();
303 	mtx_lock(mp);
304 	WITNESS_RESTORE(&mp->mtx_object, mp);
305 
306 	return (rval);
307 }
308 
309 /*
310  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
311  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
312  * expires.
313  */
314 int
315 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
316 {
317 	struct thread *td;
318 	int rval;
319 	WITNESS_SAVE_DECL(mp);
320 
321 	td = curthread;
322 	rval = 0;
323 #ifdef KTRACE
324 	if (KTRPOINT(td, KTR_CSW))
325 		ktrcsw(1, 0);
326 #endif
327 	CV_ASSERT(cvp, mp, td);
328 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
329 	    "Waiting on \"%s\"", cvp->cv_description);
330 	WITNESS_SAVE(&mp->mtx_object, mp);
331 
332 	if (cold || panicstr) {
333 		/*
334 		 * After a panic, or during autoconfiguration, just give
335 		 * interrupts a chance, then just return; don't run any other
336 		 * thread or panic below, in case this is the idle process and
337 		 * already asleep.
338 		 */
339 		return 0;
340 	}
341 
342 	mtx_lock_spin(&sched_lock);
343 
344 	CV_WAIT_VALIDATE(cvp, mp);
345 
346 	DROP_GIANT();
347 	mtx_unlock(mp);
348 
349 	cv_waitq_add(cvp, td);
350 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
351 	cv_switch(td);
352 
353 	if (td->td_flags & TDF_TIMEOUT) {
354 		td->td_flags &= ~TDF_TIMEOUT;
355 		rval = EWOULDBLOCK;
356 	} else if (td->td_flags & TDF_TIMOFAIL)
357 		td->td_flags &= ~TDF_TIMOFAIL;
358 	else if (callout_stop(&td->td_slpcallout) == 0) {
359 		/*
360 		 * Work around race with cv_timedwait_end similar to that
361 		 * between msleep and endtsleep.
362 		 * Go back to sleep.
363 		 */
364 		TD_SET_SLEEPING(td);
365 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
366 		mi_switch();
367 		td->td_flags &= ~TDF_TIMOFAIL;
368 	}
369 
370 	mtx_unlock_spin(&sched_lock);
371 #ifdef KTRACE
372 	if (KTRPOINT(td, KTR_CSW))
373 		ktrcsw(0, 0);
374 #endif
375 	PICKUP_GIANT();
376 	mtx_lock(mp);
377 	WITNESS_RESTORE(&mp->mtx_object, mp);
378 
379 	return (rval);
380 }
381 
382 /*
383  * Wait on a condition variable for at most timo/hz seconds, allowing
384  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
385  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
386  * a signal was caught.
387  */
388 int
389 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
390 {
391 	struct thread *td;
392 	struct proc *p;
393 	int rval;
394 	int sig;
395 	WITNESS_SAVE_DECL(mp);
396 
397 	td = curthread;
398 	p = td->td_proc;
399 	rval = 0;
400 #ifdef KTRACE
401 	if (KTRPOINT(td, KTR_CSW))
402 		ktrcsw(1, 0);
403 #endif
404 	CV_ASSERT(cvp, mp, td);
405 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
406 	    "Waiting on \"%s\"", cvp->cv_description);
407 	WITNESS_SAVE(&mp->mtx_object, mp);
408 
409 	if (cold || panicstr) {
410 		/*
411 		 * After a panic, or during autoconfiguration, just give
412 		 * interrupts a chance, then just return; don't run any other
413 		 * thread or panic below, in case this is the idle process and
414 		 * already asleep.
415 		 */
416 		return 0;
417 	}
418 
419 	mtx_lock_spin(&sched_lock);
420 
421 	CV_WAIT_VALIDATE(cvp, mp);
422 
423 	DROP_GIANT();
424 	mtx_unlock(mp);
425 
426 	cv_waitq_add(cvp, td);
427 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
428 	sig = cv_switch_catch(td);
429 
430 	if (td->td_flags & TDF_TIMEOUT) {
431 		td->td_flags &= ~TDF_TIMEOUT;
432 		rval = EWOULDBLOCK;
433 	} else if (td->td_flags & TDF_TIMOFAIL)
434 		td->td_flags &= ~TDF_TIMOFAIL;
435 	else if (callout_stop(&td->td_slpcallout) == 0) {
436 		/*
437 		 * Work around race with cv_timedwait_end similar to that
438 		 * between msleep and endtsleep.
439 		 * Go back to sleep.
440 		 */
441 		TD_SET_SLEEPING(td);
442 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
443 		mi_switch();
444 		td->td_flags &= ~TDF_TIMOFAIL;
445 	}
446 	mtx_unlock_spin(&sched_lock);
447 
448 	PROC_LOCK(p);
449 	if (sig == 0)
450 		sig = cursig(td);
451 	if (sig != 0) {
452 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
453 			rval = EINTR;
454 		else
455 			rval = ERESTART;
456 	}
457 	if (p->p_flag & P_WEXIT)
458 		rval = EINTR;
459 	PROC_UNLOCK(p);
460 
461 #ifdef KTRACE
462 	if (KTRPOINT(td, KTR_CSW))
463 		ktrcsw(0, 0);
464 #endif
465 	PICKUP_GIANT();
466 	mtx_lock(mp);
467 	WITNESS_RESTORE(&mp->mtx_object, mp);
468 
469 	return (rval);
470 }
471 
472 /*
473  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
474  * called with sched_lock held.
475  */
476 static __inline void
477 cv_wakeup(struct cv *cvp)
478 {
479 	struct thread *td;
480 
481 	mtx_assert(&sched_lock, MA_OWNED);
482 	td = TAILQ_FIRST(&cvp->cv_waitq);
483 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
484 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
485 	cv_waitq_remove(td);
486 	TD_CLR_SLEEPING(td);
487 	setrunnable(td);
488 }
489 
490 /*
491  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
492  * the swapper if the process is not in memory, so that it can bring the
493  * sleeping process in.  Note that this may also result in additional threads
494  * being made runnable.  Should be called with the same mutex as was passed to
495  * cv_wait held.
496  */
497 void
498 cv_signal(struct cv *cvp)
499 {
500 
501 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
502 	mtx_lock_spin(&sched_lock);
503 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
504 		CV_SIGNAL_VALIDATE(cvp);
505 		cv_wakeup(cvp);
506 	}
507 	mtx_unlock_spin(&sched_lock);
508 }
509 
510 /*
511  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
512  * Should be called with the same mutex as was passed to cv_wait held.
513  */
514 void
515 cv_broadcast(struct cv *cvp)
516 {
517 
518 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
519 	mtx_lock_spin(&sched_lock);
520 	CV_SIGNAL_VALIDATE(cvp);
521 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
522 		cv_wakeup(cvp);
523 	mtx_unlock_spin(&sched_lock);
524 }
525 
526 /*
527  * Remove a thread from the wait queue of its condition variable.  This may be
528  * called externally.
529  */
530 void
531 cv_waitq_remove(struct thread *td)
532 {
533 	struct cv *cvp;
534 
535 	mtx_assert(&sched_lock, MA_OWNED);
536 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
537 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
538 		td->td_flags &= ~TDF_CVWAITQ;
539 		td->td_wmesg = NULL;
540 		TD_CLR_ON_SLEEPQ(td);
541 	}
542 }
543 
544 /*
545  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
546  * its timeout flag.
547  */
548 static void
549 cv_timedwait_end(void *arg)
550 {
551 	struct thread *td;
552 
553 	td = arg;
554 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
555 	    td, td->td_proc->p_pid, td->td_proc->p_comm);
556 	mtx_lock_spin(&sched_lock);
557 	if (TD_ON_SLEEPQ(td)) {
558 		cv_waitq_remove(td);
559 		td->td_flags |= TDF_TIMEOUT;
560 	} else {
561 		td->td_flags |= TDF_TIMOFAIL;
562 	}
563 	TD_CLR_SLEEPING(td);
564 	setrunnable(td);
565 	mtx_unlock_spin(&sched_lock);
566 }
567 
568 /*
569  * For now only abort interruptable waits.
570  * The others will have to either complete on their own or have a timeout.
571  */
572 void
573 cv_abort(struct thread *td)
574 {
575 
576 	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
577 	    td->td_proc->p_pid, td->td_proc->p_comm);
578 	mtx_lock_spin(&sched_lock);
579 	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
580 		if (TD_ON_SLEEPQ(td)) {
581 			cv_waitq_remove(td);
582 		}
583 		TD_CLR_SLEEPING(td);
584 		setrunnable(td);
585 	}
586 	mtx_unlock_spin(&sched_lock);
587 }
588 
589