xref: /freebsd/sys/kern/kern_condvar.c (revision 6990ffd8a95caaba6858ad44ff1b3157d1efba8f)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #ifdef KTRACE
42 #include <sys/uio.h>
43 #include <sys/ktrace.h>
44 #endif
45 
46 /*
47  * Common sanity checks for cv_wait* functions.
48  */
49 #define	CV_ASSERT(cvp, mp, td) do {					\
50 	KASSERT((td) != NULL, ("%s: curthread NULL", __FUNCTION__));	\
51 	KASSERT((td)->td_proc->p_stat == SRUN, ("%s: not SRUN", __FUNCTION__));	\
52 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __FUNCTION__));		\
53 	KASSERT((mp) != NULL, ("%s: mp NULL", __FUNCTION__));		\
54 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55 } while (0)
56 
57 #ifdef CV_DEBUG
58 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60 		/* Only waiter. */					\
61 		(cvp)->cv_mtx = (mp);					\
62 	} else {							\
63 		/*							\
64 		 * Other waiter; assert that we're using the		\
65 		 * same mutex.						\
66 		 */							\
67 		KASSERT((cvp)->cv_mtx == (mp),				\
68 		    ("%s: Multiple mutexes", __FUNCTION__));		\
69 	}								\
70 } while (0)
71 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
72 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74 		    ("%s: Mutex not owned", __FUNCTION__));		\
75 	}								\
76 } while (0)
77 #else
78 #define	CV_WAIT_VALIDATE(cvp, mp)
79 #define	CV_SIGNAL_VALIDATE(cvp)
80 #endif
81 
82 static void cv_timedwait_end(void *arg);
83 
84 /*
85  * Initialize a condition variable.  Must be called before use.
86  */
87 void
88 cv_init(struct cv *cvp, const char *desc)
89 {
90 
91 	TAILQ_INIT(&cvp->cv_waitq);
92 	cvp->cv_mtx = NULL;
93 	cvp->cv_description = desc;
94 }
95 
96 /*
97  * Destroy a condition variable.  The condition variable must be re-initialized
98  * in order to be re-used.
99  */
100 void
101 cv_destroy(struct cv *cvp)
102 {
103 
104 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __FUNCTION__));
105 }
106 
107 /*
108  * Common code for cv_wait* functions.  All require sched_lock.
109  */
110 
111 /*
112  * Switch context.
113  */
114 static __inline void
115 cv_switch(struct thread *td)
116 {
117 
118 	td->td_proc->p_stat = SSLEEP;
119 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
120 	mi_switch();
121 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)",
122 				td,
123 				td->td_proc->p_pid,
124 	    			td->td_proc->p_comm);
125 }
126 
127 /*
128  * Switch context, catching signals.
129  */
130 static __inline int
131 cv_switch_catch(struct thread *td)
132 {
133 	int sig;
134 
135 	/*
136 	 * We put ourselves on the sleep queue and start our timeout before
137 	 * calling CURSIG, as we could stop there, and a wakeup or a SIGCONT (or
138 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
139 	 * be marked as SSLEEP without resuming us, thus we must be ready for
140 	 * sleep when CURSIG is called.  If the wakeup happens while we're
141 	 * stopped, td->td_wchan will be 0 upon return from CURSIG.
142 	 */
143 	td->td_flags |= TDF_SINTR;
144 	mtx_unlock_spin(&sched_lock);
145 	PROC_LOCK(td->td_proc);
146 	sig = CURSIG(td->td_proc);	/* XXXKSE */
147 	mtx_lock_spin(&sched_lock);
148 	PROC_UNLOCK_NOSWITCH(td->td_proc);
149 	if (sig != 0) {
150 		if (td->td_wchan != NULL)
151 			cv_waitq_remove(td);
152 		td->td_proc->p_stat = SRUN;
153 	} else if (td->td_wchan != NULL) {
154 		cv_switch(td);
155 	}
156 	td->td_flags &= ~TDF_SINTR;
157 
158 	return sig;
159 }
160 
161 /*
162  * Add a thread to the wait queue of a condition variable.
163  */
164 static __inline void
165 cv_waitq_add(struct cv *cvp, struct thread *td)
166 {
167 
168 	/*
169 	 * Process may be sitting on a slpque if asleep() was called, remove it
170 	 * before re-adding.
171 	 */
172 	if (td->td_wchan != NULL)
173 		unsleep(td);
174 
175 	td->td_flags |= TDF_CVWAITQ;
176 	td->td_wchan = cvp;
177 	td->td_wmesg = cvp->cv_description;
178 	td->td_kse->ke_slptime = 0; /* XXXKSE */
179 	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
180 	td->td_ksegrp->kg_pri.pri_native = td->td_ksegrp->kg_pri.pri_level;
181 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
182 	    td->td_proc->p_pid,
183 	    td->td_proc->p_comm);
184 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
185 }
186 
187 /*
188  * Wait on a condition variable.  The current thread is placed on the condition
189  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
190  * condition variable will resume the thread.  The mutex is released before
191  * sleeping and will be held on return.  It is recommended that the mutex be
192  * held when cv_signal or cv_broadcast are called.
193  */
194 void
195 cv_wait(struct cv *cvp, struct mtx *mp)
196 {
197 	struct thread *td;
198 	WITNESS_SAVE_DECL(mp);
199 
200 	td = curthread;
201 #ifdef KTRACE
202 	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
203 		ktrcsw(td->td_proc->p_tracep, 1, 0);
204 #endif
205 	CV_ASSERT(cvp, mp, td);
206 	WITNESS_SLEEP(0, &mp->mtx_object);
207 	WITNESS_SAVE(&mp->mtx_object, mp);
208 
209 	mtx_lock_spin(&sched_lock);
210 	if (cold || panicstr) {
211 		/*
212 		 * After a panic, or during autoconfiguration, just give
213 		 * interrupts a chance, then just return; don't run any other
214 		 * thread or panic below, in case this is the idle process and
215 		 * already asleep.
216 		 */
217 		mtx_unlock_spin(&sched_lock);
218 		return;
219 	}
220 	CV_WAIT_VALIDATE(cvp, mp);
221 
222 	DROP_GIANT_NOSWITCH();
223 	mtx_unlock_flags(mp, MTX_NOSWITCH);
224 
225 	cv_waitq_add(cvp, td);
226 	cv_switch(td);
227 
228 	mtx_unlock_spin(&sched_lock);
229 #ifdef KTRACE
230 	if (KTRPOINT(td->td_proc, KTR_CSW))
231 		ktrcsw(td->td_proc->p_tracep, 0, 0);
232 #endif
233 	PICKUP_GIANT();
234 	mtx_lock(mp);
235 	WITNESS_RESTORE(&mp->mtx_object, mp);
236 }
237 
238 /*
239  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
240  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
241  * a signal was caught.  If ERESTART is returned the system call should be
242  * restarted if possible.
243  */
244 int
245 cv_wait_sig(struct cv *cvp, struct mtx *mp)
246 {
247 	struct thread *td;
248 	int rval;
249 	int sig;
250 	WITNESS_SAVE_DECL(mp);
251 
252 	td = curthread;
253 	rval = 0;
254 #ifdef KTRACE
255 	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
256 		ktrcsw(td->td_proc->p_tracep, 1, 0);
257 #endif
258 	CV_ASSERT(cvp, mp, td);
259 	WITNESS_SLEEP(0, &mp->mtx_object);
260 	WITNESS_SAVE(&mp->mtx_object, mp);
261 
262 	mtx_lock_spin(&sched_lock);
263 	if (cold || panicstr) {
264 		/*
265 		 * After a panic, or during autoconfiguration, just give
266 		 * interrupts a chance, then just return; don't run any other
267 		 * procs or panic below, in case this is the idle process and
268 		 * already asleep.
269 		 */
270 		mtx_unlock_spin(&sched_lock);
271 		return 0;
272 	}
273 	CV_WAIT_VALIDATE(cvp, mp);
274 
275 	DROP_GIANT_NOSWITCH();
276 	mtx_unlock_flags(mp, MTX_NOSWITCH);
277 
278 	cv_waitq_add(cvp, td);
279 	sig = cv_switch_catch(td);
280 
281 	mtx_unlock_spin(&sched_lock);
282 	PICKUP_GIANT();
283 
284 	PROC_LOCK(td->td_proc);
285 	if (sig == 0)
286 		sig = CURSIG(td->td_proc);  /* XXXKSE */
287 	if (sig != 0) {
288 		if (SIGISMEMBER(td->td_proc->p_sigacts->ps_sigintr, sig))
289 			rval = EINTR;
290 		else
291 			rval = ERESTART;
292 	}
293 	PROC_UNLOCK(td->td_proc);
294 
295 #ifdef KTRACE
296 	mtx_lock(&Giant);
297 	if (KTRPOINT(td->td_proc, KTR_CSW))
298 		ktrcsw(td->td_proc->p_tracep, 0, 0);
299 	mtx_unlock(&Giant);
300 #endif
301 	mtx_lock(mp);
302 	WITNESS_RESTORE(&mp->mtx_object, mp);
303 
304 	return (rval);
305 }
306 
307 /*
308  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
309  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
310  * expires.
311  */
312 int
313 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
314 {
315 	struct thread *td;
316 	int rval;
317 	WITNESS_SAVE_DECL(mp);
318 
319 	td = curthread;
320 	rval = 0;
321 #ifdef KTRACE
322 		ktrcsw(td->td_proc->p_tracep, 1, 0);
323 #endif
324 	CV_ASSERT(cvp, mp, td);
325 	WITNESS_SLEEP(0, &mp->mtx_object);
326 	WITNESS_SAVE(&mp->mtx_object, mp);
327 
328 	mtx_lock_spin(&sched_lock);
329 	if (cold || panicstr) {
330 		/*
331 		 * After a panic, or during autoconfiguration, just give
332 		 * interrupts a chance, then just return; don't run any other
333 		 * thread or panic below, in case this is the idle process and
334 		 * already asleep.
335 		 */
336 		mtx_unlock_spin(&sched_lock);
337 		return 0;
338 	}
339 	CV_WAIT_VALIDATE(cvp, mp);
340 
341 	DROP_GIANT_NOSWITCH();
342 	mtx_unlock_flags(mp, MTX_NOSWITCH);
343 
344 	cv_waitq_add(cvp, td);
345 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
346 	cv_switch(td);
347 
348 	if (td->td_flags & TDF_TIMEOUT) {
349 		td->td_flags &= ~TDF_TIMEOUT;
350 		rval = EWOULDBLOCK;
351 	} else if (td->td_flags & TDF_TIMOFAIL)
352 		td->td_flags &= ~TDF_TIMOFAIL;
353 	else if (callout_stop(&td->td_slpcallout) == 0) {
354 		/*
355 		 * Work around race with cv_timedwait_end similar to that
356 		 * between msleep and endtsleep.
357 		 */
358 		td->td_flags |= TDF_TIMEOUT;
359 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
360 		mi_switch();
361 	}
362 
363 	mtx_unlock_spin(&sched_lock);
364 #ifdef KTRACE
365 	if (KTRPOINT(td->td_proc, KTR_CSW))
366 		ktrcsw(td->td_proc->p_tracep, 0, 0);
367 #endif
368 	PICKUP_GIANT();
369 	mtx_lock(mp);
370 	WITNESS_RESTORE(&mp->mtx_object, mp);
371 
372 	return (rval);
373 }
374 
375 /*
376  * Wait on a condition variable for at most timo/hz seconds, allowing
377  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
378  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
379  * a signal was caught.
380  */
381 int
382 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
383 {
384 	struct thread *td;
385 	int rval;
386 	int sig;
387 	WITNESS_SAVE_DECL(mp);
388 
389 	td = curthread;
390 	rval = 0;
391 #ifdef KTRACE
392 	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
393 		ktrcsw(td->td_proc->p_tracep, 1, 0);
394 #endif
395 	CV_ASSERT(cvp, mp, td);
396 	WITNESS_SLEEP(0, &mp->mtx_object);
397 	WITNESS_SAVE(&mp->mtx_object, mp);
398 
399 	mtx_lock_spin(&sched_lock);
400 	if (cold || panicstr) {
401 		/*
402 		 * After a panic, or during autoconfiguration, just give
403 		 * interrupts a chance, then just return; don't run any other
404 		 * thread or panic below, in case this is the idle process and
405 		 * already asleep.
406 		 */
407 		mtx_unlock_spin(&sched_lock);
408 		return 0;
409 	}
410 	CV_WAIT_VALIDATE(cvp, mp);
411 
412 	DROP_GIANT_NOSWITCH();
413 	mtx_unlock_flags(mp, MTX_NOSWITCH);
414 
415 	cv_waitq_add(cvp, td);
416 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
417 	sig = cv_switch_catch(td);
418 
419 	if (td->td_flags & TDF_TIMEOUT) {
420 		td->td_flags &= ~TDF_TIMEOUT;
421 		rval = EWOULDBLOCK;
422 	} else if (td->td_flags & TDF_TIMOFAIL)
423 		td->td_flags &= ~TDF_TIMOFAIL;
424 	else if (callout_stop(&td->td_slpcallout) == 0) {
425 		/*
426 		 * Work around race with cv_timedwait_end similar to that
427 		 * between msleep and endtsleep.
428 		 */
429 		td->td_flags |= TDF_TIMEOUT;
430 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
431 		mi_switch();
432 	}
433 
434 	mtx_unlock_spin(&sched_lock);
435 	PICKUP_GIANT();
436 
437 	PROC_LOCK(td->td_proc);
438 	if (sig == 0)
439 		sig = CURSIG(td->td_proc);
440 	if (sig != 0) {
441 		if (SIGISMEMBER(td->td_proc->p_sigacts->ps_sigintr, sig))
442 			rval = EINTR;
443 		else
444 			rval = ERESTART;
445 	}
446 	PROC_UNLOCK(td->td_proc);
447 
448 #ifdef KTRACE
449 	mtx_lock(&Giant);
450 	if (KTRPOINT(td->td_proc, KTR_CSW))
451 		ktrcsw(td->td_proc->p_tracep, 0, 0);
452 	mtx_unlock(&Giant);
453 #endif
454 	mtx_lock(mp);
455 	WITNESS_RESTORE(&mp->mtx_object, mp);
456 
457 	return (rval);
458 }
459 
460 /*
461  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
462  * called with sched_lock held.
463  */
464 static __inline void
465 cv_wakeup(struct cv *cvp)
466 {
467 	struct thread *td;
468 
469 	mtx_assert(&sched_lock, MA_OWNED);
470 	td = TAILQ_FIRST(&cvp->cv_waitq);
471 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __FUNCTION__));
472 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __FUNCTION__));
473 	TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
474 	td->td_flags &= ~TDF_CVWAITQ;
475 	td->td_wchan = 0;
476 	if (td->td_proc->p_stat == SSLEEP) {
477 		/* OPTIMIZED EXPANSION OF setrunnable(td); */
478 		CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
479 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
480 		if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */
481 			updatepri(td);
482 		td->td_kse->ke_slptime = 0;
483 		td->td_ksegrp->kg_slptime = 0;
484 		td->td_proc->p_stat = SRUN;
485 		if (td->td_proc->p_sflag & PS_INMEM) {
486 			setrunqueue(td);
487 			maybe_resched(td->td_ksegrp);
488 		} else {
489 			td->td_proc->p_sflag |= PS_SWAPINREQ;
490 			wakeup(&proc0); /* XXXKSE */
491 		}
492 		/* END INLINE EXPANSION */
493 	}
494 }
495 
496 /*
497  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
498  * the swapper if the process is not in memory, so that it can bring the
499  * sleeping process in.  Note that this may also result in additional threads
500  * being made runnable.  Should be called with the same mutex as was passed to
501  * cv_wait held.
502  */
503 void
504 cv_signal(struct cv *cvp)
505 {
506 
507 	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
508 	mtx_lock_spin(&sched_lock);
509 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
510 		CV_SIGNAL_VALIDATE(cvp);
511 		cv_wakeup(cvp);
512 	}
513 	mtx_unlock_spin(&sched_lock);
514 }
515 
516 /*
517  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
518  * Should be called with the same mutex as was passed to cv_wait held.
519  */
520 void
521 cv_broadcast(struct cv *cvp)
522 {
523 
524 	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
525 	mtx_lock_spin(&sched_lock);
526 	CV_SIGNAL_VALIDATE(cvp);
527 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
528 		cv_wakeup(cvp);
529 	mtx_unlock_spin(&sched_lock);
530 }
531 
532 /*
533  * Remove a thread from the wait queue of its condition variable.  This may be
534  * called externally.
535  */
536 void
537 cv_waitq_remove(struct thread *td)
538 {
539 	struct cv *cvp;
540 
541 	mtx_lock_spin(&sched_lock);
542 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
543 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
544 		td->td_flags &= ~TDF_CVWAITQ;
545 		td->td_wchan = NULL;
546 	}
547 	mtx_unlock_spin(&sched_lock);
548 }
549 
550 /*
551  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
552  * its timeout flag.
553  */
554 static void
555 cv_timedwait_end(void *arg)
556 {
557 	struct thread *td;
558 
559 	td = arg;
560 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
561 	    td->td_proc->p_comm);
562 	mtx_lock_spin(&sched_lock);
563 	if (td->td_flags & TDF_TIMEOUT) {
564 		td->td_flags &= ~TDF_TIMEOUT;
565 		setrunqueue(td);
566 	} else if (td->td_wchan != NULL) {
567 		if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
568 			setrunnable(td);
569 		else
570 			cv_waitq_remove(td);
571 		td->td_flags |= TDF_TIMEOUT;
572 	} else
573 		td->td_flags |= TDF_TIMOFAIL;
574 	mtx_unlock_spin(&sched_lock);
575 }
576