xref: /freebsd/sys/kern/kern_condvar.c (revision 78704ef45793e56c8e064611c05c9bb8a0067e9f)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #ifdef KTRACE
42 #include <sys/uio.h>
43 #include <sys/ktrace.h>
44 #endif
45 
46 /*
47  * Common sanity checks for cv_wait* functions.
48  */
49 #define	CV_ASSERT(cvp, mp, td) do {					\
50 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
51 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
52 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
53 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
54 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55 } while (0)
56 
57 #ifdef INVARIANTS
58 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60 		/* Only waiter. */					\
61 		(cvp)->cv_mtx = (mp);					\
62 	} else {							\
63 		/*							\
64 		 * Other waiter; assert that we're using the		\
65 		 * same mutex.						\
66 		 */							\
67 		KASSERT((cvp)->cv_mtx == (mp),				\
68 		    ("%s: Multiple mutexes", __func__));		\
69 	}								\
70 } while (0)
71 
72 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
73 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
74 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
75 		    ("%s: Mutex not owned", __func__));			\
76 	}								\
77 } while (0)
78 
79 #else
80 #define	CV_WAIT_VALIDATE(cvp, mp)
81 #define	CV_SIGNAL_VALIDATE(cvp)
82 #endif
83 
84 static void cv_timedwait_end(void *arg);
85 static void cv_check_upcall(struct thread *td);
86 
87 /*
88  * Initialize a condition variable.  Must be called before use.
89  */
90 void
91 cv_init(struct cv *cvp, const char *desc)
92 {
93 
94 	TAILQ_INIT(&cvp->cv_waitq);
95 	cvp->cv_mtx = NULL;
96 	cvp->cv_description = desc;
97 }
98 
99 /*
100  * Destroy a condition variable.  The condition variable must be re-initialized
101  * in order to be re-used.
102  */
103 void
104 cv_destroy(struct cv *cvp)
105 {
106 
107 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
108 }
109 
110 /*
111  * Common code for cv_wait* functions.  All require sched_lock.
112  */
113 
114 /*
115  * Decide if we need to queue an upcall.
116  * This is copied from msleep(), perhaps this should be a common function.
117  */
118 static void
119 cv_check_upcall(struct thread *td)
120 {
121 
122 	/*
123 	 * If we are capable of async syscalls and there isn't already
124 	 * another one ready to return, start a new thread
125 	 * and queue it as ready to run. Note that there is danger here
126 	 * because we need to make sure that we don't sleep allocating
127 	 * the thread (recursion here might be bad).
128 	 * Hence the TDF_INMSLEEP flag.
129 	 */
130 	if ((td->td_proc->p_flag & P_KSES) && td->td_mailbox &&
131 	    (td->td_flags & TDF_INMSLEEP) == 0) {
132 		/*
133 		 * We don't need to upcall now, just queue it.
134 		 * The upcall will happen when other n-kernel work
135 		 * in this SKEGRP has completed.
136 		 * Don't recurse here!
137 		 */
138 		td->td_flags |= TDF_INMSLEEP;
139 		thread_schedule_upcall(td, td->td_kse);
140 		td->td_flags &= ~TDF_INMSLEEP;
141 	}
142 }
143 
144 /*
145  * Switch context.
146  */
147 static __inline void
148 cv_switch(struct thread *td)
149 {
150 
151 	cv_check_upcall(td);
152 	TD_SET_SLEEPING(td);
153 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
154 	mi_switch();
155 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
156 	    td->td_proc->p_pid, td->td_proc->p_comm);
157 }
158 
159 /*
160  * Switch context, catching signals.
161  */
162 static __inline int
163 cv_switch_catch(struct thread *td)
164 {
165 	struct proc *p;
166 	int sig;
167 
168 	/*
169 	 * We put ourselves on the sleep queue and start our timeout before
170 	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
171 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
172 	 * be marked as TDS_SLP without resuming us, thus we must be ready for
173 	 * sleep when cursig is called.  If the wakeup happens while we're
174 	 * stopped, td->td_wchan will be 0 upon return from cursig,
175 	 * and TD_ON_SLEEPQ() will return false.
176 	 */
177 	td->td_flags |= TDF_SINTR;
178 	mtx_unlock_spin(&sched_lock);
179 	p = td->td_proc;
180 	PROC_LOCK(p);
181 	sig = cursig(td);
182 	if (thread_suspend_check(1))
183 		sig = SIGSTOP;
184 	mtx_lock_spin(&sched_lock);
185 	PROC_UNLOCK(p);
186 	if (sig != 0) {
187 		if (TD_ON_SLEEPQ(td))
188 			cv_waitq_remove(td);
189 		TD_SET_RUNNING(td);
190 	} else if (TD_ON_SLEEPQ(td)) {
191 		cv_switch(td);
192 	}
193 	td->td_flags &= ~TDF_SINTR;
194 
195 	return sig;
196 }
197 
198 /*
199  * Add a thread to the wait queue of a condition variable.
200  */
201 static __inline void
202 cv_waitq_add(struct cv *cvp, struct thread *td)
203 {
204 
205 	td->td_flags |= TDF_CVWAITQ;
206 	TD_SET_ON_SLEEPQ(td);
207 	td->td_wchan = cvp;
208 	td->td_wmesg = cvp->cv_description;
209 	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
210 	td->td_base_pri = td->td_priority;
211 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
212 	    td->td_proc->p_pid, td->td_proc->p_comm);
213 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
214 }
215 
216 /*
217  * Wait on a condition variable.  The current thread is placed on the condition
218  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
219  * condition variable will resume the thread.  The mutex is released before
220  * sleeping and will be held on return.  It is recommended that the mutex be
221  * held when cv_signal or cv_broadcast are called.
222  */
223 void
224 cv_wait(struct cv *cvp, struct mtx *mp)
225 {
226 	struct thread *td;
227 	WITNESS_SAVE_DECL(mp);
228 
229 	td = curthread;
230 #ifdef KTRACE
231 	if (KTRPOINT(td, KTR_CSW))
232 		ktrcsw(1, 0);
233 #endif
234 	CV_ASSERT(cvp, mp, td);
235 	WITNESS_SLEEP(0, &mp->mtx_object);
236 	WITNESS_SAVE(&mp->mtx_object, mp);
237 
238 	if (cold ) {
239 		/*
240 		 * During autoconfiguration, just give interrupts
241 		 * a chance, then just return.  Don't run any other
242 		 * thread or panic below, in case this is the idle
243 		 * process and already asleep.
244 		 */
245 		return;
246 	}
247 
248 	mtx_lock_spin(&sched_lock);
249 
250 	CV_WAIT_VALIDATE(cvp, mp);
251 
252 	DROP_GIANT();
253 	mtx_unlock(mp);
254 
255 	cv_waitq_add(cvp, td);
256 	cv_switch(td);
257 
258 	mtx_unlock_spin(&sched_lock);
259 #ifdef KTRACE
260 	if (KTRPOINT(td, KTR_CSW))
261 		ktrcsw(0, 0);
262 #endif
263 	PICKUP_GIANT();
264 	mtx_lock(mp);
265 	WITNESS_RESTORE(&mp->mtx_object, mp);
266 }
267 
268 /*
269  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
270  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
271  * a signal was caught.  If ERESTART is returned the system call should be
272  * restarted if possible.
273  */
274 int
275 cv_wait_sig(struct cv *cvp, struct mtx *mp)
276 {
277 	struct thread *td;
278 	struct proc *p;
279 	int rval;
280 	int sig;
281 	WITNESS_SAVE_DECL(mp);
282 
283 	td = curthread;
284 	p = td->td_proc;
285 	rval = 0;
286 #ifdef KTRACE
287 	if (KTRPOINT(td, KTR_CSW))
288 		ktrcsw(1, 0);
289 #endif
290 	CV_ASSERT(cvp, mp, td);
291 	WITNESS_SLEEP(0, &mp->mtx_object);
292 	WITNESS_SAVE(&mp->mtx_object, mp);
293 
294 	if (cold || panicstr) {
295 		/*
296 		 * After a panic, or during autoconfiguration, just give
297 		 * interrupts a chance, then just return; don't run any other
298 		 * procs or panic below, in case this is the idle process and
299 		 * already asleep.
300 		 */
301 		return 0;
302 	}
303 
304 	mtx_lock_spin(&sched_lock);
305 
306 	CV_WAIT_VALIDATE(cvp, mp);
307 
308 	DROP_GIANT();
309 	mtx_unlock(mp);
310 
311 	cv_waitq_add(cvp, td);
312 	sig = cv_switch_catch(td);
313 
314 	mtx_unlock_spin(&sched_lock);
315 
316 	PROC_LOCK(p);
317 	if (sig == 0)
318 		sig = cursig(td);	/* XXXKSE */
319 	if (sig != 0) {
320 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
321 			rval = EINTR;
322 		else
323 			rval = ERESTART;
324 	}
325 	PROC_UNLOCK(p);
326 	if (p->p_flag & P_WEXIT)
327 		rval = EINTR;
328 
329 #ifdef KTRACE
330 	if (KTRPOINT(td, KTR_CSW))
331 		ktrcsw(0, 0);
332 #endif
333 	PICKUP_GIANT();
334 	mtx_lock(mp);
335 	WITNESS_RESTORE(&mp->mtx_object, mp);
336 
337 	return (rval);
338 }
339 
340 /*
341  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
342  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
343  * expires.
344  */
345 int
346 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
347 {
348 	struct thread *td;
349 	int rval;
350 	WITNESS_SAVE_DECL(mp);
351 
352 	td = curthread;
353 	rval = 0;
354 #ifdef KTRACE
355 	if (KTRPOINT(td, KTR_CSW))
356 		ktrcsw(1, 0);
357 #endif
358 	CV_ASSERT(cvp, mp, td);
359 	WITNESS_SLEEP(0, &mp->mtx_object);
360 	WITNESS_SAVE(&mp->mtx_object, mp);
361 
362 	if (cold || panicstr) {
363 		/*
364 		 * After a panic, or during autoconfiguration, just give
365 		 * interrupts a chance, then just return; don't run any other
366 		 * thread or panic below, in case this is the idle process and
367 		 * already asleep.
368 		 */
369 		return 0;
370 	}
371 
372 	mtx_lock_spin(&sched_lock);
373 
374 	CV_WAIT_VALIDATE(cvp, mp);
375 
376 	DROP_GIANT();
377 	mtx_unlock(mp);
378 
379 	cv_waitq_add(cvp, td);
380 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
381 	cv_switch(td);
382 
383 	if (td->td_flags & TDF_TIMEOUT) {
384 		td->td_flags &= ~TDF_TIMEOUT;
385 		rval = EWOULDBLOCK;
386 	} else if (td->td_flags & TDF_TIMOFAIL)
387 		td->td_flags &= ~TDF_TIMOFAIL;
388 	else if (callout_stop(&td->td_slpcallout) == 0) {
389 		/*
390 		 * Work around race with cv_timedwait_end similar to that
391 		 * between msleep and endtsleep.
392 		 * Go back to sleep.
393 		 */
394 		TD_SET_SLEEPING(td);
395 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
396 		mi_switch();
397 		td->td_flags &= ~TDF_TIMOFAIL;
398 	}
399 
400 	if (td->td_proc->p_flag & P_WEXIT)
401 		rval = EWOULDBLOCK;
402 	mtx_unlock_spin(&sched_lock);
403 #ifdef KTRACE
404 	if (KTRPOINT(td, KTR_CSW))
405 		ktrcsw(0, 0);
406 #endif
407 	PICKUP_GIANT();
408 	mtx_lock(mp);
409 	WITNESS_RESTORE(&mp->mtx_object, mp);
410 
411 	return (rval);
412 }
413 
414 /*
415  * Wait on a condition variable for at most timo/hz seconds, allowing
416  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
417  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
418  * a signal was caught.
419  */
420 int
421 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
422 {
423 	struct thread *td;
424 	struct proc *p;
425 	int rval;
426 	int sig;
427 	WITNESS_SAVE_DECL(mp);
428 
429 	td = curthread;
430 	p = td->td_proc;
431 	rval = 0;
432 #ifdef KTRACE
433 	if (KTRPOINT(td, KTR_CSW))
434 		ktrcsw(1, 0);
435 #endif
436 	CV_ASSERT(cvp, mp, td);
437 	WITNESS_SLEEP(0, &mp->mtx_object);
438 	WITNESS_SAVE(&mp->mtx_object, mp);
439 
440 	if (cold || panicstr) {
441 		/*
442 		 * After a panic, or during autoconfiguration, just give
443 		 * interrupts a chance, then just return; don't run any other
444 		 * thread or panic below, in case this is the idle process and
445 		 * already asleep.
446 		 */
447 		return 0;
448 	}
449 
450 	mtx_lock_spin(&sched_lock);
451 
452 	CV_WAIT_VALIDATE(cvp, mp);
453 
454 	DROP_GIANT();
455 	mtx_unlock(mp);
456 
457 	cv_waitq_add(cvp, td);
458 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
459 	sig = cv_switch_catch(td);
460 
461 	if (td->td_flags & TDF_TIMEOUT) {
462 		td->td_flags &= ~TDF_TIMEOUT;
463 		rval = EWOULDBLOCK;
464 	} else if (td->td_flags & TDF_TIMOFAIL)
465 		td->td_flags &= ~TDF_TIMOFAIL;
466 	else if (callout_stop(&td->td_slpcallout) == 0) {
467 		/*
468 		 * Work around race with cv_timedwait_end similar to that
469 		 * between msleep and endtsleep.
470 		 * Go back to sleep.
471 		 */
472 		TD_SET_SLEEPING(td);
473 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
474 		mi_switch();
475 		td->td_flags &= ~TDF_TIMOFAIL;
476 	}
477 	mtx_unlock_spin(&sched_lock);
478 
479 	PROC_LOCK(p);
480 	if (sig == 0)
481 		sig = cursig(td);
482 	if (sig != 0) {
483 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
484 			rval = EINTR;
485 		else
486 			rval = ERESTART;
487 	}
488 	PROC_UNLOCK(p);
489 
490 	if (p->p_flag & P_WEXIT)
491 		rval = EINTR;
492 
493 #ifdef KTRACE
494 	if (KTRPOINT(td, KTR_CSW))
495 		ktrcsw(0, 0);
496 #endif
497 	PICKUP_GIANT();
498 	mtx_lock(mp);
499 	WITNESS_RESTORE(&mp->mtx_object, mp);
500 
501 	return (rval);
502 }
503 
504 /*
505  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
506  * called with sched_lock held.
507  */
508 static __inline void
509 cv_wakeup(struct cv *cvp)
510 {
511 	struct thread *td;
512 
513 	mtx_assert(&sched_lock, MA_OWNED);
514 	td = TAILQ_FIRST(&cvp->cv_waitq);
515 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
516 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
517 	cv_waitq_remove(td);
518 	TD_CLR_SLEEPING(td);
519 	setrunnable(td);
520 }
521 
522 /*
523  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
524  * the swapper if the process is not in memory, so that it can bring the
525  * sleeping process in.  Note that this may also result in additional threads
526  * being made runnable.  Should be called with the same mutex as was passed to
527  * cv_wait held.
528  */
529 void
530 cv_signal(struct cv *cvp)
531 {
532 
533 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
534 	mtx_lock_spin(&sched_lock);
535 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
536 		CV_SIGNAL_VALIDATE(cvp);
537 		cv_wakeup(cvp);
538 	}
539 	mtx_unlock_spin(&sched_lock);
540 }
541 
542 /*
543  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
544  * Should be called with the same mutex as was passed to cv_wait held.
545  */
546 void
547 cv_broadcast(struct cv *cvp)
548 {
549 
550 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
551 	mtx_lock_spin(&sched_lock);
552 	CV_SIGNAL_VALIDATE(cvp);
553 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
554 		cv_wakeup(cvp);
555 	mtx_unlock_spin(&sched_lock);
556 }
557 
558 /*
559  * Remove a thread from the wait queue of its condition variable.  This may be
560  * called externally.
561  */
562 void
563 cv_waitq_remove(struct thread *td)
564 {
565 	struct cv *cvp;
566 
567 	mtx_assert(&sched_lock, MA_OWNED);
568 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
569 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
570 		td->td_flags &= ~TDF_CVWAITQ;
571 		TD_CLR_ON_SLEEPQ(td);
572 	}
573 }
574 
575 /*
576  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
577  * its timeout flag.
578  */
579 static void
580 cv_timedwait_end(void *arg)
581 {
582 	struct thread *td;
583 
584 	td = arg;
585 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
586 	    td, td->td_proc->p_pid, td->td_proc->p_comm);
587 	mtx_lock_spin(&sched_lock);
588 	if (TD_ON_SLEEPQ(td)) {
589 		cv_waitq_remove(td);
590 		td->td_flags |= TDF_TIMEOUT;
591 	} else {
592 		td->td_flags |= TDF_TIMOFAIL;
593 	}
594 	TD_CLR_SLEEPING(td);
595 	setrunnable(td);
596 	mtx_unlock_spin(&sched_lock);
597 }
598 
599 /*
600  * For now only abort interruptable waits.
601  * The others will have to either complete on their own or have a timeout.
602  */
603 void
604 cv_abort(struct thread *td)
605 {
606 
607 	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
608 	    td->td_proc->p_pid, td->td_proc->p_comm);
609 	mtx_lock_spin(&sched_lock);
610 	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
611 		if (TD_ON_SLEEPQ(td)) {
612 			cv_waitq_remove(td);
613 		}
614 		TD_CLR_SLEEPING(td);
615 		setrunnable(td);
616 	}
617 	mtx_unlock_spin(&sched_lock);
618 }
619 
620