xref: /freebsd/sys/kern/kern_condvar.c (revision 390e8cc2974df1888369c06339ef8e0e92b312b6)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/sched.h>
40 #include <sys/signalvar.h>
41 #include <sys/resourcevar.h>
42 #ifdef KTRACE
43 #include <sys/uio.h>
44 #include <sys/ktrace.h>
45 #endif
46 
47 /*
48  * Common sanity checks for cv_wait* functions.
49  */
50 #define	CV_ASSERT(cvp, mp, td) do {					\
51 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
52 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
53 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
54 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
55 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
56 } while (0)
57 
58 #ifdef INVARIANTS
59 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
60 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
61 		/* Only waiter. */					\
62 		(cvp)->cv_mtx = (mp);					\
63 	} else {							\
64 		/*							\
65 		 * Other waiter; assert that we're using the		\
66 		 * same mutex.						\
67 		 */							\
68 		KASSERT((cvp)->cv_mtx == (mp),				\
69 		    ("%s: Multiple mutexes", __func__));		\
70 	}								\
71 } while (0)
72 
73 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
74 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
75 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
76 		    ("%s: Mutex not owned", __func__));			\
77 	}								\
78 } while (0)
79 
80 #else
81 #define	CV_WAIT_VALIDATE(cvp, mp)
82 #define	CV_SIGNAL_VALIDATE(cvp)
83 #endif
84 
85 static void cv_timedwait_end(void *arg);
86 
87 /*
88  * Initialize a condition variable.  Must be called before use.
89  */
90 void
91 cv_init(struct cv *cvp, const char *desc)
92 {
93 
94 	TAILQ_INIT(&cvp->cv_waitq);
95 	cvp->cv_mtx = NULL;
96 	cvp->cv_description = desc;
97 }
98 
99 /*
100  * Destroy a condition variable.  The condition variable must be re-initialized
101  * in order to be re-used.
102  */
103 void
104 cv_destroy(struct cv *cvp)
105 {
106 
107 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
108 }
109 
110 /*
111  * Common code for cv_wait* functions.  All require sched_lock.
112  */
113 
114 /*
115  * Switch context.
116  */
117 static __inline void
118 cv_switch(struct thread *td)
119 {
120 	TD_SET_SLEEPING(td);
121 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
122 	mi_switch();
123 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
124 	    td->td_proc->p_pid, td->td_proc->p_comm);
125 }
126 
127 /*
128  * Switch context, catching signals.
129  */
130 static __inline int
131 cv_switch_catch(struct thread *td)
132 {
133 	struct proc *p;
134 	int sig;
135 
136 	/*
137 	 * We put ourselves on the sleep queue and start our timeout before
138 	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
139 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
140 	 * be marked as TDS_SLP without resuming us, thus we must be ready for
141 	 * sleep when cursig is called.  If the wakeup happens while we're
142 	 * stopped, td->td_wchan will be 0 upon return from cursig,
143 	 * and TD_ON_SLEEPQ() will return false.
144 	 */
145 	td->td_flags |= TDF_SINTR;
146 	mtx_unlock_spin(&sched_lock);
147 	p = td->td_proc;
148 	PROC_LOCK(p);
149 	mtx_lock(&p->p_sigacts->ps_mtx);
150 	sig = cursig(td);
151 	mtx_unlock(&p->p_sigacts->ps_mtx);
152 	if (thread_suspend_check(1))
153 		sig = SIGSTOP;
154 	mtx_lock_spin(&sched_lock);
155 	PROC_UNLOCK(p);
156 	if (sig != 0) {
157 		if (TD_ON_SLEEPQ(td))
158 			cv_waitq_remove(td);
159 		TD_SET_RUNNING(td);
160 	} else if (TD_ON_SLEEPQ(td)) {
161 		cv_switch(td);
162 	}
163 	td->td_flags &= ~TDF_SINTR;
164 
165 	return sig;
166 }
167 
168 /*
169  * Add a thread to the wait queue of a condition variable.
170  */
171 static __inline void
172 cv_waitq_add(struct cv *cvp, struct thread *td)
173 {
174 
175 	td->td_flags |= TDF_CVWAITQ;
176 	TD_SET_ON_SLEEPQ(td);
177 	td->td_wchan = cvp;
178 	td->td_wmesg = cvp->cv_description;
179 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
180 	    td->td_proc->p_pid, td->td_proc->p_comm);
181 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
182 	sched_sleep(td, td->td_priority);
183 }
184 
185 /*
186  * Wait on a condition variable.  The current thread is placed on the condition
187  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
188  * condition variable will resume the thread.  The mutex is released before
189  * sleeping and will be held on return.  It is recommended that the mutex be
190  * held when cv_signal or cv_broadcast are called.
191  */
192 void
193 cv_wait(struct cv *cvp, struct mtx *mp)
194 {
195 	struct thread *td;
196 	WITNESS_SAVE_DECL(mp);
197 
198 	td = curthread;
199 #ifdef KTRACE
200 	if (KTRPOINT(td, KTR_CSW))
201 		ktrcsw(1, 0);
202 #endif
203 	CV_ASSERT(cvp, mp, td);
204 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
205 	    "Waiting on \"%s\"", cvp->cv_description);
206 	WITNESS_SAVE(&mp->mtx_object, mp);
207 
208 	if (cold ) {
209 		/*
210 		 * During autoconfiguration, just give interrupts
211 		 * a chance, then just return.  Don't run any other
212 		 * thread or panic below, in case this is the idle
213 		 * process and already asleep.
214 		 */
215 		return;
216 	}
217 
218 	mtx_lock_spin(&sched_lock);
219 
220 	CV_WAIT_VALIDATE(cvp, mp);
221 
222 	DROP_GIANT();
223 	mtx_unlock(mp);
224 
225 	cv_waitq_add(cvp, td);
226 	cv_switch(td);
227 
228 	mtx_unlock_spin(&sched_lock);
229 #ifdef KTRACE
230 	if (KTRPOINT(td, KTR_CSW))
231 		ktrcsw(0, 0);
232 #endif
233 	PICKUP_GIANT();
234 	mtx_lock(mp);
235 	WITNESS_RESTORE(&mp->mtx_object, mp);
236 }
237 
238 /*
239  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
240  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
241  * a signal was caught.  If ERESTART is returned the system call should be
242  * restarted if possible.
243  */
244 int
245 cv_wait_sig(struct cv *cvp, struct mtx *mp)
246 {
247 	struct thread *td;
248 	struct proc *p;
249 	int rval;
250 	int sig;
251 	WITNESS_SAVE_DECL(mp);
252 
253 	td = curthread;
254 	p = td->td_proc;
255 	rval = 0;
256 #ifdef KTRACE
257 	if (KTRPOINT(td, KTR_CSW))
258 		ktrcsw(1, 0);
259 #endif
260 	CV_ASSERT(cvp, mp, td);
261 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
262 	    "Waiting on \"%s\"", cvp->cv_description);
263 	WITNESS_SAVE(&mp->mtx_object, mp);
264 
265 	if (cold || panicstr) {
266 		/*
267 		 * After a panic, or during autoconfiguration, just give
268 		 * interrupts a chance, then just return; don't run any other
269 		 * procs or panic below, in case this is the idle process and
270 		 * already asleep.
271 		 */
272 		return 0;
273 	}
274 
275 	mtx_lock_spin(&sched_lock);
276 
277 	CV_WAIT_VALIDATE(cvp, mp);
278 
279 	DROP_GIANT();
280 	mtx_unlock(mp);
281 
282 	cv_waitq_add(cvp, td);
283 	sig = cv_switch_catch(td);
284 
285 	mtx_unlock_spin(&sched_lock);
286 
287 	PROC_LOCK(p);
288 	mtx_lock(&p->p_sigacts->ps_mtx);
289 	if (sig == 0)
290 		sig = cursig(td);	/* XXXKSE */
291 	if (sig != 0) {
292 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
293 			rval = EINTR;
294 		else
295 			rval = ERESTART;
296 	}
297 	mtx_unlock(&p->p_sigacts->ps_mtx);
298 	if (p->p_flag & P_WEXIT)
299 		rval = EINTR;
300 	PROC_UNLOCK(p);
301 
302 #ifdef KTRACE
303 	if (KTRPOINT(td, KTR_CSW))
304 		ktrcsw(0, 0);
305 #endif
306 	PICKUP_GIANT();
307 	mtx_lock(mp);
308 	WITNESS_RESTORE(&mp->mtx_object, mp);
309 
310 	return (rval);
311 }
312 
313 /*
314  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
315  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
316  * expires.
317  */
318 int
319 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
320 {
321 	struct thread *td;
322 	int rval;
323 	WITNESS_SAVE_DECL(mp);
324 
325 	td = curthread;
326 	rval = 0;
327 #ifdef KTRACE
328 	if (KTRPOINT(td, KTR_CSW))
329 		ktrcsw(1, 0);
330 #endif
331 	CV_ASSERT(cvp, mp, td);
332 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
333 	    "Waiting on \"%s\"", cvp->cv_description);
334 	WITNESS_SAVE(&mp->mtx_object, mp);
335 
336 	if (cold || panicstr) {
337 		/*
338 		 * After a panic, or during autoconfiguration, just give
339 		 * interrupts a chance, then just return; don't run any other
340 		 * thread or panic below, in case this is the idle process and
341 		 * already asleep.
342 		 */
343 		return 0;
344 	}
345 
346 	mtx_lock_spin(&sched_lock);
347 
348 	CV_WAIT_VALIDATE(cvp, mp);
349 
350 	DROP_GIANT();
351 	mtx_unlock(mp);
352 
353 	cv_waitq_add(cvp, td);
354 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
355 	cv_switch(td);
356 
357 	if (td->td_flags & TDF_TIMEOUT) {
358 		td->td_flags &= ~TDF_TIMEOUT;
359 		rval = EWOULDBLOCK;
360 	} else if (td->td_flags & TDF_TIMOFAIL)
361 		td->td_flags &= ~TDF_TIMOFAIL;
362 	else if (callout_stop(&td->td_slpcallout) == 0) {
363 		/*
364 		 * Work around race with cv_timedwait_end similar to that
365 		 * between msleep and endtsleep.
366 		 * Go back to sleep.
367 		 */
368 		TD_SET_SLEEPING(td);
369 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
370 		mi_switch();
371 		td->td_flags &= ~TDF_TIMOFAIL;
372 	}
373 
374 	mtx_unlock_spin(&sched_lock);
375 #ifdef KTRACE
376 	if (KTRPOINT(td, KTR_CSW))
377 		ktrcsw(0, 0);
378 #endif
379 	PICKUP_GIANT();
380 	mtx_lock(mp);
381 	WITNESS_RESTORE(&mp->mtx_object, mp);
382 
383 	return (rval);
384 }
385 
386 /*
387  * Wait on a condition variable for at most timo/hz seconds, allowing
388  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
389  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
390  * a signal was caught.
391  */
392 int
393 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
394 {
395 	struct thread *td;
396 	struct proc *p;
397 	int rval;
398 	int sig;
399 	WITNESS_SAVE_DECL(mp);
400 
401 	td = curthread;
402 	p = td->td_proc;
403 	rval = 0;
404 #ifdef KTRACE
405 	if (KTRPOINT(td, KTR_CSW))
406 		ktrcsw(1, 0);
407 #endif
408 	CV_ASSERT(cvp, mp, td);
409 	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &mp->mtx_object,
410 	    "Waiting on \"%s\"", cvp->cv_description);
411 	WITNESS_SAVE(&mp->mtx_object, mp);
412 
413 	if (cold || panicstr) {
414 		/*
415 		 * After a panic, or during autoconfiguration, just give
416 		 * interrupts a chance, then just return; don't run any other
417 		 * thread or panic below, in case this is the idle process and
418 		 * already asleep.
419 		 */
420 		return 0;
421 	}
422 
423 	mtx_lock_spin(&sched_lock);
424 
425 	CV_WAIT_VALIDATE(cvp, mp);
426 
427 	DROP_GIANT();
428 	mtx_unlock(mp);
429 
430 	cv_waitq_add(cvp, td);
431 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
432 	sig = cv_switch_catch(td);
433 
434 	if (td->td_flags & TDF_TIMEOUT) {
435 		td->td_flags &= ~TDF_TIMEOUT;
436 		rval = EWOULDBLOCK;
437 	} else if (td->td_flags & TDF_TIMOFAIL)
438 		td->td_flags &= ~TDF_TIMOFAIL;
439 	else if (callout_stop(&td->td_slpcallout) == 0) {
440 		/*
441 		 * Work around race with cv_timedwait_end similar to that
442 		 * between msleep and endtsleep.
443 		 * Go back to sleep.
444 		 */
445 		TD_SET_SLEEPING(td);
446 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
447 		mi_switch();
448 		td->td_flags &= ~TDF_TIMOFAIL;
449 	}
450 	mtx_unlock_spin(&sched_lock);
451 
452 	PROC_LOCK(p);
453 	mtx_lock(&p->p_sigacts->ps_mtx);
454 	if (sig == 0)
455 		sig = cursig(td);
456 	if (sig != 0) {
457 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
458 			rval = EINTR;
459 		else
460 			rval = ERESTART;
461 	}
462 	mtx_unlock(&p->p_sigacts->ps_mtx);
463 	if (p->p_flag & P_WEXIT)
464 		rval = EINTR;
465 	PROC_UNLOCK(p);
466 
467 #ifdef KTRACE
468 	if (KTRPOINT(td, KTR_CSW))
469 		ktrcsw(0, 0);
470 #endif
471 	PICKUP_GIANT();
472 	mtx_lock(mp);
473 	WITNESS_RESTORE(&mp->mtx_object, mp);
474 
475 	return (rval);
476 }
477 
478 /*
479  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
480  * called with sched_lock held.
481  */
482 static __inline void
483 cv_wakeup(struct cv *cvp)
484 {
485 	struct thread *td;
486 
487 	mtx_assert(&sched_lock, MA_OWNED);
488 	td = TAILQ_FIRST(&cvp->cv_waitq);
489 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
490 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
491 	cv_waitq_remove(td);
492 	TD_CLR_SLEEPING(td);
493 	setrunnable(td);
494 }
495 
496 /*
497  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
498  * the swapper if the process is not in memory, so that it can bring the
499  * sleeping process in.  Note that this may also result in additional threads
500  * being made runnable.  Should be called with the same mutex as was passed to
501  * cv_wait held.
502  */
503 void
504 cv_signal(struct cv *cvp)
505 {
506 
507 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
508 	mtx_lock_spin(&sched_lock);
509 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
510 		CV_SIGNAL_VALIDATE(cvp);
511 		cv_wakeup(cvp);
512 	}
513 	mtx_unlock_spin(&sched_lock);
514 }
515 
516 /*
517  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
518  * Should be called with the same mutex as was passed to cv_wait held.
519  */
520 void
521 cv_broadcast(struct cv *cvp)
522 {
523 
524 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
525 	mtx_lock_spin(&sched_lock);
526 	CV_SIGNAL_VALIDATE(cvp);
527 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
528 		cv_wakeup(cvp);
529 	mtx_unlock_spin(&sched_lock);
530 }
531 
532 /*
533  * Remove a thread from the wait queue of its condition variable.  This may be
534  * called externally.
535  */
536 void
537 cv_waitq_remove(struct thread *td)
538 {
539 	struct cv *cvp;
540 
541 	mtx_assert(&sched_lock, MA_OWNED);
542 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
543 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
544 		td->td_flags &= ~TDF_CVWAITQ;
545 		td->td_wmesg = NULL;
546 		TD_CLR_ON_SLEEPQ(td);
547 	}
548 }
549 
550 /*
551  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
552  * its timeout flag.
553  */
554 static void
555 cv_timedwait_end(void *arg)
556 {
557 	struct thread *td;
558 
559 	td = arg;
560 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
561 	    td, td->td_proc->p_pid, td->td_proc->p_comm);
562 	mtx_lock_spin(&sched_lock);
563 	if (TD_ON_SLEEPQ(td)) {
564 		cv_waitq_remove(td);
565 		td->td_flags |= TDF_TIMEOUT;
566 	} else {
567 		td->td_flags |= TDF_TIMOFAIL;
568 	}
569 	TD_CLR_SLEEPING(td);
570 	setrunnable(td);
571 	mtx_unlock_spin(&sched_lock);
572 }
573 
574 /*
575  * For now only abort interruptable waits.
576  * The others will have to either complete on their own or have a timeout.
577  */
578 void
579 cv_abort(struct thread *td)
580 {
581 
582 	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
583 	    td->td_proc->p_pid, td->td_proc->p_comm);
584 	mtx_lock_spin(&sched_lock);
585 	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
586 		if (TD_ON_SLEEPQ(td)) {
587 			cv_waitq_remove(td);
588 		}
589 		TD_CLR_SLEEPING(td);
590 		setrunnable(td);
591 	}
592 	mtx_unlock_spin(&sched_lock);
593 }
594 
595