xref: /freebsd/sys/kern/kern_condvar.c (revision eacee0ff7ec955b32e09515246bd97b6edcd2b0f)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #ifdef KTRACE
42 #include <sys/uio.h>
43 #include <sys/ktrace.h>
44 #endif
45 
46 /*
47  * Common sanity checks for cv_wait* functions.
48  */
49 #define	CV_ASSERT(cvp, mp, td) do {					\
50 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
51 	KASSERT((td)->td_proc->p_stat == SRUN, ("%s: not SRUN", __func__));	\
52 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
53 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
54 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55 } while (0)
56 
57 #ifdef CV_DEBUG
58 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60 		/* Only waiter. */					\
61 		(cvp)->cv_mtx = (mp);					\
62 	} else {							\
63 		/*							\
64 		 * Other waiter; assert that we're using the		\
65 		 * same mutex.						\
66 		 */							\
67 		KASSERT((cvp)->cv_mtx == (mp),				\
68 		    ("%s: Multiple mutexes", __func__));		\
69 	}								\
70 } while (0)
71 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
72 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74 		    ("%s: Mutex not owned", __func__));		\
75 	}								\
76 } while (0)
77 #else
78 #define	CV_WAIT_VALIDATE(cvp, mp)
79 #define	CV_SIGNAL_VALIDATE(cvp)
80 #endif
81 
82 static void cv_timedwait_end(void *arg);
83 
84 /*
85  * Initialize a condition variable.  Must be called before use.
86  */
87 void
88 cv_init(struct cv *cvp, const char *desc)
89 {
90 
91 	TAILQ_INIT(&cvp->cv_waitq);
92 	cvp->cv_mtx = NULL;
93 	cvp->cv_description = desc;
94 }
95 
96 /*
97  * Destroy a condition variable.  The condition variable must be re-initialized
98  * in order to be re-used.
99  */
100 void
101 cv_destroy(struct cv *cvp)
102 {
103 
104 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
105 }
106 
107 /*
108  * Common code for cv_wait* functions.  All require sched_lock.
109  */
110 
111 /*
112  * Switch context.
113  */
114 static __inline void
115 cv_switch(struct thread *td)
116 {
117 
118 	td->td_proc->p_stat = SSLEEP;
119 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
120 	mi_switch();
121 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
122 	    td->td_proc->p_pid, td->td_proc->p_comm);
123 }
124 
125 /*
126  * Switch context, catching signals.
127  */
128 static __inline int
129 cv_switch_catch(struct thread *td)
130 {
131 	struct proc *p;
132 	int sig;
133 
134 	/*
135 	 * We put ourselves on the sleep queue and start our timeout before
136 	 * calling CURSIG, as we could stop there, and a wakeup or a SIGCONT (or
137 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
138 	 * be marked as SSLEEP without resuming us, thus we must be ready for
139 	 * sleep when CURSIG is called.  If the wakeup happens while we're
140 	 * stopped, td->td_wchan will be 0 upon return from CURSIG.
141 	 */
142 	td->td_flags |= TDF_SINTR;
143 	mtx_unlock_spin(&sched_lock);
144 	p = td->td_proc;
145 	PROC_LOCK(p);
146 	sig = CURSIG(p);	/* XXXKSE */
147 	mtx_lock_spin(&sched_lock);
148 	PROC_UNLOCK(p);
149 	if (sig != 0) {
150 		if (td->td_wchan != NULL)
151 			cv_waitq_remove(td);
152 		td->td_proc->p_stat = SRUN;
153 	} else if (td->td_wchan != NULL) {
154 		cv_switch(td);
155 	}
156 	td->td_flags &= ~TDF_SINTR;
157 
158 	return sig;
159 }
160 
161 /*
162  * Add a thread to the wait queue of a condition variable.
163  */
164 static __inline void
165 cv_waitq_add(struct cv *cvp, struct thread *td)
166 {
167 
168 	/*
169 	 * Process may be sitting on a slpque if asleep() was called, remove it
170 	 * before re-adding.
171 	 */
172 	if (td->td_wchan != NULL)
173 		unsleep(td);
174 
175 	td->td_flags |= TDF_CVWAITQ;
176 	td->td_wchan = cvp;
177 	td->td_wmesg = cvp->cv_description;
178 	td->td_kse->ke_slptime = 0; /* XXXKSE */
179 	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
180 	td->td_base_pri = td->td_priority;
181 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
182 	    td->td_proc->p_pid, td->td_proc->p_comm);
183 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
184 }
185 
186 /*
187  * Wait on a condition variable.  The current thread is placed on the condition
188  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
189  * condition variable will resume the thread.  The mutex is released before
190  * sleeping and will be held on return.  It is recommended that the mutex be
191  * held when cv_signal or cv_broadcast are called.
192  */
193 void
194 cv_wait(struct cv *cvp, struct mtx *mp)
195 {
196 	struct thread *td;
197 	WITNESS_SAVE_DECL(mp);
198 
199 	td = curthread;
200 #ifdef KTRACE
201 	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
202 		ktrcsw(td->td_proc->p_tracep, 1, 0);
203 #endif
204 	CV_ASSERT(cvp, mp, td);
205 	WITNESS_SLEEP(0, &mp->mtx_object);
206 	WITNESS_SAVE(&mp->mtx_object, mp);
207 
208 	mtx_lock_spin(&sched_lock);
209 	if (cold || panicstr) {
210 		/*
211 		 * After a panic, or during autoconfiguration, just give
212 		 * interrupts a chance, then just return; don't run any other
213 		 * thread or panic below, in case this is the idle process and
214 		 * already asleep.
215 		 */
216 		mtx_unlock_spin(&sched_lock);
217 		return;
218 	}
219 	CV_WAIT_VALIDATE(cvp, mp);
220 
221 	DROP_GIANT();
222 	mtx_unlock(mp);
223 
224 	cv_waitq_add(cvp, td);
225 	cv_switch(td);
226 
227 	mtx_unlock_spin(&sched_lock);
228 #ifdef KTRACE
229 	if (KTRPOINT(td->td_proc, KTR_CSW))
230 		ktrcsw(td->td_proc->p_tracep, 0, 0);
231 #endif
232 	PICKUP_GIANT();
233 	mtx_lock(mp);
234 	WITNESS_RESTORE(&mp->mtx_object, mp);
235 }
236 
237 /*
238  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
239  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
240  * a signal was caught.  If ERESTART is returned the system call should be
241  * restarted if possible.
242  */
243 int
244 cv_wait_sig(struct cv *cvp, struct mtx *mp)
245 {
246 	struct thread *td;
247 	struct proc *p;
248 	int rval;
249 	int sig;
250 	WITNESS_SAVE_DECL(mp);
251 
252 	td = curthread;
253 	p = td->td_proc;
254 	rval = 0;
255 #ifdef KTRACE
256 	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
257 		ktrcsw(td->td_proc->p_tracep, 1, 0);
258 #endif
259 	CV_ASSERT(cvp, mp, td);
260 	WITNESS_SLEEP(0, &mp->mtx_object);
261 	WITNESS_SAVE(&mp->mtx_object, mp);
262 
263 	mtx_lock_spin(&sched_lock);
264 	if (cold || panicstr) {
265 		/*
266 		 * After a panic, or during autoconfiguration, just give
267 		 * interrupts a chance, then just return; don't run any other
268 		 * procs or panic below, in case this is the idle process and
269 		 * already asleep.
270 		 */
271 		mtx_unlock_spin(&sched_lock);
272 		return 0;
273 	}
274 	CV_WAIT_VALIDATE(cvp, mp);
275 
276 	DROP_GIANT();
277 	mtx_unlock(mp);
278 
279 	cv_waitq_add(cvp, td);
280 	sig = cv_switch_catch(td);
281 
282 	mtx_unlock_spin(&sched_lock);
283 	PICKUP_GIANT();
284 
285 	PROC_LOCK(p);
286 	if (sig == 0)
287 		sig = CURSIG(p);  /* XXXKSE */
288 	if (sig != 0) {
289 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
290 			rval = EINTR;
291 		else
292 			rval = ERESTART;
293 	}
294 	PROC_UNLOCK(p);
295 
296 #ifdef KTRACE
297 	mtx_lock(&Giant);
298 	if (KTRPOINT(td->td_proc, KTR_CSW))
299 		ktrcsw(td->td_proc->p_tracep, 0, 0);
300 	mtx_unlock(&Giant);
301 #endif
302 	mtx_lock(mp);
303 	WITNESS_RESTORE(&mp->mtx_object, mp);
304 
305 	return (rval);
306 }
307 
308 /*
309  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
310  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
311  * expires.
312  */
313 int
314 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
315 {
316 	struct thread *td;
317 	int rval;
318 	WITNESS_SAVE_DECL(mp);
319 
320 	td = curthread;
321 	rval = 0;
322 #ifdef KTRACE
323 		ktrcsw(td->td_proc->p_tracep, 1, 0);
324 #endif
325 	CV_ASSERT(cvp, mp, td);
326 	WITNESS_SLEEP(0, &mp->mtx_object);
327 	WITNESS_SAVE(&mp->mtx_object, mp);
328 
329 	mtx_lock_spin(&sched_lock);
330 	if (cold || panicstr) {
331 		/*
332 		 * After a panic, or during autoconfiguration, just give
333 		 * interrupts a chance, then just return; don't run any other
334 		 * thread or panic below, in case this is the idle process and
335 		 * already asleep.
336 		 */
337 		mtx_unlock_spin(&sched_lock);
338 		return 0;
339 	}
340 	CV_WAIT_VALIDATE(cvp, mp);
341 
342 	DROP_GIANT();
343 	mtx_unlock(mp);
344 
345 	cv_waitq_add(cvp, td);
346 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
347 	cv_switch(td);
348 
349 	if (td->td_flags & TDF_TIMEOUT) {
350 		td->td_flags &= ~TDF_TIMEOUT;
351 		rval = EWOULDBLOCK;
352 	} else if (td->td_flags & TDF_TIMOFAIL)
353 		td->td_flags &= ~TDF_TIMOFAIL;
354 	else if (callout_stop(&td->td_slpcallout) == 0) {
355 		/*
356 		 * Work around race with cv_timedwait_end similar to that
357 		 * between msleep and endtsleep.
358 		 */
359 		td->td_flags |= TDF_TIMEOUT;
360 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
361 		mi_switch();
362 	}
363 
364 	mtx_unlock_spin(&sched_lock);
365 #ifdef KTRACE
366 	if (KTRPOINT(td->td_proc, KTR_CSW))
367 		ktrcsw(td->td_proc->p_tracep, 0, 0);
368 #endif
369 	PICKUP_GIANT();
370 	mtx_lock(mp);
371 	WITNESS_RESTORE(&mp->mtx_object, mp);
372 
373 	return (rval);
374 }
375 
376 /*
377  * Wait on a condition variable for at most timo/hz seconds, allowing
378  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
379  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
380  * a signal was caught.
381  */
382 int
383 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
384 {
385 	struct thread *td;
386 	struct proc *p;
387 	int rval;
388 	int sig;
389 	WITNESS_SAVE_DECL(mp);
390 
391 	td = curthread;
392 	p = td->td_proc;
393 	rval = 0;
394 #ifdef KTRACE
395 	if (td->td_proc && KTRPOINT(td->td_proc, KTR_CSW))
396 		ktrcsw(td->td_proc->p_tracep, 1, 0);
397 #endif
398 	CV_ASSERT(cvp, mp, td);
399 	WITNESS_SLEEP(0, &mp->mtx_object);
400 	WITNESS_SAVE(&mp->mtx_object, mp);
401 
402 	mtx_lock_spin(&sched_lock);
403 	if (cold || panicstr) {
404 		/*
405 		 * After a panic, or during autoconfiguration, just give
406 		 * interrupts a chance, then just return; don't run any other
407 		 * thread or panic below, in case this is the idle process and
408 		 * already asleep.
409 		 */
410 		mtx_unlock_spin(&sched_lock);
411 		return 0;
412 	}
413 	CV_WAIT_VALIDATE(cvp, mp);
414 
415 	DROP_GIANT();
416 	mtx_unlock(mp);
417 
418 	cv_waitq_add(cvp, td);
419 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
420 	sig = cv_switch_catch(td);
421 
422 	if (td->td_flags & TDF_TIMEOUT) {
423 		td->td_flags &= ~TDF_TIMEOUT;
424 		rval = EWOULDBLOCK;
425 	} else if (td->td_flags & TDF_TIMOFAIL)
426 		td->td_flags &= ~TDF_TIMOFAIL;
427 	else if (callout_stop(&td->td_slpcallout) == 0) {
428 		/*
429 		 * Work around race with cv_timedwait_end similar to that
430 		 * between msleep and endtsleep.
431 		 */
432 		td->td_flags |= TDF_TIMEOUT;
433 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
434 		mi_switch();
435 	}
436 
437 	mtx_unlock_spin(&sched_lock);
438 	PICKUP_GIANT();
439 
440 	PROC_LOCK(p);
441 	if (sig == 0)
442 		sig = CURSIG(p);
443 	if (sig != 0) {
444 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
445 			rval = EINTR;
446 		else
447 			rval = ERESTART;
448 	}
449 	PROC_UNLOCK(p);
450 
451 #ifdef KTRACE
452 	mtx_lock(&Giant);
453 	if (KTRPOINT(td->td_proc, KTR_CSW))
454 		ktrcsw(td->td_proc->p_tracep, 0, 0);
455 	mtx_unlock(&Giant);
456 #endif
457 	mtx_lock(mp);
458 	WITNESS_RESTORE(&mp->mtx_object, mp);
459 
460 	return (rval);
461 }
462 
463 /*
464  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
465  * called with sched_lock held.
466  */
467 static __inline void
468 cv_wakeup(struct cv *cvp)
469 {
470 	struct thread *td;
471 
472 	mtx_assert(&sched_lock, MA_OWNED);
473 	td = TAILQ_FIRST(&cvp->cv_waitq);
474 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
475 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
476 	TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
477 	td->td_flags &= ~TDF_CVWAITQ;
478 	td->td_wchan = 0;
479 	if (td->td_proc->p_stat == SSLEEP) {
480 		/* OPTIMIZED EXPANSION OF setrunnable(td); */
481 		CTR3(KTR_PROC, "cv_signal: thread %p (pid %d, %s)",
482 		    td, td->td_proc->p_pid, td->td_proc->p_comm);
483 		if (td->td_ksegrp->kg_slptime > 1) /* XXXKSE */
484 			updatepri(td);
485 		td->td_kse->ke_slptime = 0;
486 		td->td_ksegrp->kg_slptime = 0;
487 		td->td_proc->p_stat = SRUN;
488 		if (td->td_proc->p_sflag & PS_INMEM) {
489 			setrunqueue(td);
490 			maybe_resched(td);
491 		} else {
492 			td->td_proc->p_sflag |= PS_SWAPINREQ;
493 			wakeup(&proc0); /* XXXKSE */
494 		}
495 		/* END INLINE EXPANSION */
496 	}
497 }
498 
499 /*
500  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
501  * the swapper if the process is not in memory, so that it can bring the
502  * sleeping process in.  Note that this may also result in additional threads
503  * being made runnable.  Should be called with the same mutex as was passed to
504  * cv_wait held.
505  */
506 void
507 cv_signal(struct cv *cvp)
508 {
509 
510 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
511 	mtx_lock_spin(&sched_lock);
512 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
513 		CV_SIGNAL_VALIDATE(cvp);
514 		cv_wakeup(cvp);
515 	}
516 	mtx_unlock_spin(&sched_lock);
517 }
518 
519 /*
520  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
521  * Should be called with the same mutex as was passed to cv_wait held.
522  */
523 void
524 cv_broadcast(struct cv *cvp)
525 {
526 
527 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
528 	mtx_lock_spin(&sched_lock);
529 	CV_SIGNAL_VALIDATE(cvp);
530 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
531 		cv_wakeup(cvp);
532 	mtx_unlock_spin(&sched_lock);
533 }
534 
535 /*
536  * Remove a thread from the wait queue of its condition variable.  This may be
537  * called externally.
538  */
539 void
540 cv_waitq_remove(struct thread *td)
541 {
542 	struct cv *cvp;
543 
544 	mtx_lock_spin(&sched_lock);
545 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
546 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
547 		td->td_flags &= ~TDF_CVWAITQ;
548 		td->td_wchan = NULL;
549 	}
550 	mtx_unlock_spin(&sched_lock);
551 }
552 
553 /*
554  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
555  * its timeout flag.
556  */
557 static void
558 cv_timedwait_end(void *arg)
559 {
560 	struct thread *td;
561 
562 	td = arg;
563 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)", td, td->td_proc->p_pid,
564 	    td->td_proc->p_comm);
565 	mtx_lock_spin(&sched_lock);
566 	if (td->td_flags & TDF_TIMEOUT) {
567 		td->td_flags &= ~TDF_TIMEOUT;
568 		setrunqueue(td);
569 	} else if (td->td_wchan != NULL) {
570 		if (td->td_proc->p_stat == SSLEEP) /* XXXKSE */
571 			setrunnable(td);
572 		else
573 			cv_waitq_remove(td);
574 		td->td_flags |= TDF_TIMEOUT;
575 	} else
576 		td->td_flags |= TDF_TIMOFAIL;
577 	mtx_unlock_spin(&sched_lock);
578 }
579