xref: /freebsd/sys/kern/kern_condvar.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #ifdef KTRACE
42 #include <sys/uio.h>
43 #include <sys/ktrace.h>
44 #endif
45 
46 /*
47  * Common sanity checks for cv_wait* functions.
48  */
49 #define	CV_ASSERT(cvp, mp, td) do {					\
50 	KASSERT((td) != NULL, ("%s: curthread NULL", __func__));	\
51 	KASSERT(TD_IS_RUNNING(td), ("%s: not TDS_RUNNING", __func__));	\
52 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __func__));		\
53 	KASSERT((mp) != NULL, ("%s: mp NULL", __func__));		\
54 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55 } while (0)
56 
57 #ifdef INVARIANTS
58 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60 		/* Only waiter. */					\
61 		(cvp)->cv_mtx = (mp);					\
62 	} else {							\
63 		/*							\
64 		 * Other waiter; assert that we're using the		\
65 		 * same mutex.						\
66 		 */							\
67 		KASSERT((cvp)->cv_mtx == (mp),				\
68 		    ("%s: Multiple mutexes", __func__));		\
69 	}								\
70 } while (0)
71 
72 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
73 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
74 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
75 		    ("%s: Mutex not owned", __func__));			\
76 	}								\
77 } while (0)
78 
79 #else
80 #define	CV_WAIT_VALIDATE(cvp, mp)
81 #define	CV_SIGNAL_VALIDATE(cvp)
82 #endif
83 
84 static void cv_timedwait_end(void *arg);
85 
86 /*
87  * Initialize a condition variable.  Must be called before use.
88  */
89 void
90 cv_init(struct cv *cvp, const char *desc)
91 {
92 
93 	TAILQ_INIT(&cvp->cv_waitq);
94 	cvp->cv_mtx = NULL;
95 	cvp->cv_description = desc;
96 }
97 
98 /*
99  * Destroy a condition variable.  The condition variable must be re-initialized
100  * in order to be re-used.
101  */
102 void
103 cv_destroy(struct cv *cvp)
104 {
105 
106 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __func__));
107 }
108 
109 /*
110  * Common code for cv_wait* functions.  All require sched_lock.
111  */
112 
113 /*
114  * Switch context.
115  */
116 static __inline void
117 cv_switch(struct thread *td)
118 {
119 
120 	/*
121 	 * If we are capable of async syscalls and there isn't already
122 	 * another one ready to return, start a new thread
123 	 * and queue it as ready to run. Note that there is danger here
124 	 * because we need to make sure that we don't sleep allocating
125 	 * the thread (recursion here might be bad).
126 	 * Hence the TDF_INMSLEEP flag.
127 	 */
128 	if ((td->td_flags & (TDF_UNBOUND|TDF_INMSLEEP)) == TDF_UNBOUND) {
129 		/*
130 		 * We don't need to upcall now, just queue it.
131 		 * The upcall will happen when other n-kernel work
132 		 * in this SKEGRP has completed.
133 		 * Don't recurse here!
134 		 */
135 		td->td_flags |= TDF_INMSLEEP;
136 		thread_schedule_upcall(td, td->td_kse);
137 		td->td_flags &= ~TDF_INMSLEEP;
138 	}
139 	TD_SET_SLEEPING(td);
140 	td->td_proc->p_stats->p_ru.ru_nvcsw++;
141 	mi_switch();
142 	CTR3(KTR_PROC, "cv_switch: resume thread %p (pid %d, %s)", td,
143 	    td->td_proc->p_pid, td->td_proc->p_comm);
144 }
145 
146 /*
147  * Switch context, catching signals.
148  */
149 static __inline int
150 cv_switch_catch(struct thread *td)
151 {
152 	struct proc *p;
153 	int sig;
154 
155 	/*
156 	 * We put ourselves on the sleep queue and start our timeout before
157 	 * calling cursig, as we could stop there, and a wakeup or a SIGCONT (or
158 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
159 	 * be marked as TDS_SLP without resuming us, thus we must be ready for
160 	 * sleep when cursig is called.  If the wakeup happens while we're
161 	 * stopped, td->td_wchan will be 0 upon return from cursig,
162 	 * and TD_ON_SLEEPQ() will return false.
163 	 */
164 	td->td_flags |= TDF_SINTR;
165 	mtx_unlock_spin(&sched_lock);
166 	p = td->td_proc;
167 	PROC_LOCK(p);
168 	sig = cursig(td);
169 	if (thread_suspend_check(1))
170 		sig = SIGSTOP;
171 	mtx_lock_spin(&sched_lock);
172 	PROC_UNLOCK(p);
173 	if (sig != 0) {
174 		if (TD_ON_SLEEPQ(td))
175 			cv_waitq_remove(td);
176 		TD_SET_RUNNING(td);
177 	} else if (TD_ON_SLEEPQ(td)) {
178 		cv_switch(td);
179 	}
180 	td->td_flags &= ~TDF_SINTR;
181 
182 	return sig;
183 }
184 
185 /*
186  * Add a thread to the wait queue of a condition variable.
187  */
188 static __inline void
189 cv_waitq_add(struct cv *cvp, struct thread *td)
190 {
191 
192 	td->td_flags |= TDF_CVWAITQ;
193 	TD_SET_ON_SLEEPQ(td);
194 	td->td_wchan = cvp;
195 	td->td_wmesg = cvp->cv_description;
196 	td->td_ksegrp->kg_slptime = 0; /* XXXKSE */
197 	td->td_base_pri = td->td_priority;
198 	CTR3(KTR_PROC, "cv_waitq_add: thread %p (pid %d, %s)", td,
199 	    td->td_proc->p_pid, td->td_proc->p_comm);
200 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, td, td_slpq);
201 }
202 
203 /*
204  * Wait on a condition variable.  The current thread is placed on the condition
205  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
206  * condition variable will resume the thread.  The mutex is released before
207  * sleeping and will be held on return.  It is recommended that the mutex be
208  * held when cv_signal or cv_broadcast are called.
209  */
210 void
211 cv_wait(struct cv *cvp, struct mtx *mp)
212 {
213 	struct thread *td;
214 	WITNESS_SAVE_DECL(mp);
215 
216 	td = curthread;
217 #ifdef KTRACE
218 	if (KTRPOINT(td, KTR_CSW))
219 		ktrcsw(1, 0);
220 #endif
221 	CV_ASSERT(cvp, mp, td);
222 	WITNESS_SLEEP(0, &mp->mtx_object);
223 	WITNESS_SAVE(&mp->mtx_object, mp);
224 
225 	if (cold ) {
226 		/*
227 		 * During autoconfiguration, just give interrupts
228 		 * a chance, then just return.  Don't run any other
229 		 * thread or panic below, in case this is the idle
230 		 * process and already asleep.
231 		 */
232 		return;
233 	}
234 
235 	mtx_lock_spin(&sched_lock);
236 
237 	CV_WAIT_VALIDATE(cvp, mp);
238 
239 	DROP_GIANT();
240 	mtx_unlock(mp);
241 
242 	cv_waitq_add(cvp, td);
243 	cv_switch(td);
244 
245 	mtx_unlock_spin(&sched_lock);
246 #ifdef KTRACE
247 	if (KTRPOINT(td, KTR_CSW))
248 		ktrcsw(0, 0);
249 #endif
250 	PICKUP_GIANT();
251 	mtx_lock(mp);
252 	WITNESS_RESTORE(&mp->mtx_object, mp);
253 }
254 
255 /*
256  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
257  * the thread was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
258  * a signal was caught.  If ERESTART is returned the system call should be
259  * restarted if possible.
260  */
261 int
262 cv_wait_sig(struct cv *cvp, struct mtx *mp)
263 {
264 	struct thread *td;
265 	struct proc *p;
266 	int rval;
267 	int sig;
268 	WITNESS_SAVE_DECL(mp);
269 
270 	td = curthread;
271 	p = td->td_proc;
272 	rval = 0;
273 #ifdef KTRACE
274 	if (KTRPOINT(td, KTR_CSW))
275 		ktrcsw(1, 0);
276 #endif
277 	CV_ASSERT(cvp, mp, td);
278 	WITNESS_SLEEP(0, &mp->mtx_object);
279 	WITNESS_SAVE(&mp->mtx_object, mp);
280 
281 	if (cold || panicstr) {
282 		/*
283 		 * After a panic, or during autoconfiguration, just give
284 		 * interrupts a chance, then just return; don't run any other
285 		 * procs or panic below, in case this is the idle process and
286 		 * already asleep.
287 		 */
288 		return 0;
289 	}
290 
291 	mtx_lock_spin(&sched_lock);
292 
293 	CV_WAIT_VALIDATE(cvp, mp);
294 
295 	DROP_GIANT();
296 	mtx_unlock(mp);
297 
298 	cv_waitq_add(cvp, td);
299 	sig = cv_switch_catch(td);
300 
301 	mtx_unlock_spin(&sched_lock);
302 
303 	PROC_LOCK(p);
304 	if (sig == 0)
305 		sig = cursig(td);	/* XXXKSE */
306 	if (sig != 0) {
307 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
308 			rval = EINTR;
309 		else
310 			rval = ERESTART;
311 	}
312 	PROC_UNLOCK(p);
313 	if (p->p_flag & P_WEXIT)
314 		rval = EINTR;
315 
316 #ifdef KTRACE
317 	if (KTRPOINT(td, KTR_CSW))
318 		ktrcsw(0, 0);
319 #endif
320 	PICKUP_GIANT();
321 	mtx_lock(mp);
322 	WITNESS_RESTORE(&mp->mtx_object, mp);
323 
324 	return (rval);
325 }
326 
327 /*
328  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
329  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
330  * expires.
331  */
332 int
333 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
334 {
335 	struct thread *td;
336 	int rval;
337 	WITNESS_SAVE_DECL(mp);
338 
339 	td = curthread;
340 	rval = 0;
341 #ifdef KTRACE
342 	if (KTRPOINT(td, KTR_CSW))
343 		ktrcsw(1, 0);
344 #endif
345 	CV_ASSERT(cvp, mp, td);
346 	WITNESS_SLEEP(0, &mp->mtx_object);
347 	WITNESS_SAVE(&mp->mtx_object, mp);
348 
349 	if (cold || panicstr) {
350 		/*
351 		 * After a panic, or during autoconfiguration, just give
352 		 * interrupts a chance, then just return; don't run any other
353 		 * thread or panic below, in case this is the idle process and
354 		 * already asleep.
355 		 */
356 		return 0;
357 	}
358 
359 	mtx_lock_spin(&sched_lock);
360 
361 	CV_WAIT_VALIDATE(cvp, mp);
362 
363 	DROP_GIANT();
364 	mtx_unlock(mp);
365 
366 	cv_waitq_add(cvp, td);
367 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
368 	cv_switch(td);
369 
370 	if (td->td_flags & TDF_TIMEOUT) {
371 		td->td_flags &= ~TDF_TIMEOUT;
372 		rval = EWOULDBLOCK;
373 	} else if (td->td_flags & TDF_TIMOFAIL)
374 		td->td_flags &= ~TDF_TIMOFAIL;
375 	else if (callout_stop(&td->td_slpcallout) == 0) {
376 		/*
377 		 * Work around race with cv_timedwait_end similar to that
378 		 * between msleep and endtsleep.
379 		 * Go back to sleep.
380 		 */
381 		TD_SET_SLEEPING(td);
382 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
383 		mi_switch();
384 		td->td_flags &= ~TDF_TIMOFAIL;
385 	}
386 
387 	if (td->td_proc->p_flag & P_WEXIT)
388 		rval = EWOULDBLOCK;
389 	mtx_unlock_spin(&sched_lock);
390 #ifdef KTRACE
391 	if (KTRPOINT(td, KTR_CSW))
392 		ktrcsw(0, 0);
393 #endif
394 	PICKUP_GIANT();
395 	mtx_lock(mp);
396 	WITNESS_RESTORE(&mp->mtx_object, mp);
397 
398 	return (rval);
399 }
400 
401 /*
402  * Wait on a condition variable for at most timo/hz seconds, allowing
403  * interruption by signals.  Returns 0 if the thread was resumed by cv_signal
404  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
405  * a signal was caught.
406  */
407 int
408 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
409 {
410 	struct thread *td;
411 	struct proc *p;
412 	int rval;
413 	int sig;
414 	WITNESS_SAVE_DECL(mp);
415 
416 	td = curthread;
417 	p = td->td_proc;
418 	rval = 0;
419 #ifdef KTRACE
420 	if (KTRPOINT(td, KTR_CSW))
421 		ktrcsw(1, 0);
422 #endif
423 	CV_ASSERT(cvp, mp, td);
424 	WITNESS_SLEEP(0, &mp->mtx_object);
425 	WITNESS_SAVE(&mp->mtx_object, mp);
426 
427 	if (cold || panicstr) {
428 		/*
429 		 * After a panic, or during autoconfiguration, just give
430 		 * interrupts a chance, then just return; don't run any other
431 		 * thread or panic below, in case this is the idle process and
432 		 * already asleep.
433 		 */
434 		return 0;
435 	}
436 
437 	mtx_lock_spin(&sched_lock);
438 
439 	CV_WAIT_VALIDATE(cvp, mp);
440 
441 	DROP_GIANT();
442 	mtx_unlock(mp);
443 
444 	cv_waitq_add(cvp, td);
445 	callout_reset(&td->td_slpcallout, timo, cv_timedwait_end, td);
446 	sig = cv_switch_catch(td);
447 
448 	if (td->td_flags & TDF_TIMEOUT) {
449 		td->td_flags &= ~TDF_TIMEOUT;
450 		rval = EWOULDBLOCK;
451 	} else if (td->td_flags & TDF_TIMOFAIL)
452 		td->td_flags &= ~TDF_TIMOFAIL;
453 	else if (callout_stop(&td->td_slpcallout) == 0) {
454 		/*
455 		 * Work around race with cv_timedwait_end similar to that
456 		 * between msleep and endtsleep.
457 		 * Go back to sleep.
458 		 */
459 		TD_SET_SLEEPING(td);
460 		td->td_proc->p_stats->p_ru.ru_nivcsw++;
461 		mi_switch();
462 		td->td_flags &= ~TDF_TIMOFAIL;
463 	}
464 	mtx_unlock_spin(&sched_lock);
465 
466 	PROC_LOCK(p);
467 	if (sig == 0)
468 		sig = cursig(td);
469 	if (sig != 0) {
470 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
471 			rval = EINTR;
472 		else
473 			rval = ERESTART;
474 	}
475 	PROC_UNLOCK(p);
476 
477 	if (p->p_flag & P_WEXIT)
478 		rval = EINTR;
479 
480 #ifdef KTRACE
481 	if (KTRPOINT(td, KTR_CSW))
482 		ktrcsw(0, 0);
483 #endif
484 	PICKUP_GIANT();
485 	mtx_lock(mp);
486 	WITNESS_RESTORE(&mp->mtx_object, mp);
487 
488 	return (rval);
489 }
490 
491 /*
492  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
493  * called with sched_lock held.
494  */
495 static __inline void
496 cv_wakeup(struct cv *cvp)
497 {
498 	struct thread *td;
499 
500 	mtx_assert(&sched_lock, MA_OWNED);
501 	td = TAILQ_FIRST(&cvp->cv_waitq);
502 	KASSERT(td->td_wchan == cvp, ("%s: bogus wchan", __func__));
503 	KASSERT(td->td_flags & TDF_CVWAITQ, ("%s: not on waitq", __func__));
504 	cv_waitq_remove(td);
505 	TD_CLR_SLEEPING(td);
506 	setrunnable(td);
507 }
508 
509 /*
510  * Signal a condition variable, wakes up one waiting thread.  Will also wakeup
511  * the swapper if the process is not in memory, so that it can bring the
512  * sleeping process in.  Note that this may also result in additional threads
513  * being made runnable.  Should be called with the same mutex as was passed to
514  * cv_wait held.
515  */
516 void
517 cv_signal(struct cv *cvp)
518 {
519 
520 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
521 	mtx_lock_spin(&sched_lock);
522 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
523 		CV_SIGNAL_VALIDATE(cvp);
524 		cv_wakeup(cvp);
525 	}
526 	mtx_unlock_spin(&sched_lock);
527 }
528 
529 /*
530  * Broadcast a signal to a condition variable.  Wakes up all waiting threads.
531  * Should be called with the same mutex as was passed to cv_wait held.
532  */
533 void
534 cv_broadcast(struct cv *cvp)
535 {
536 
537 	KASSERT(cvp != NULL, ("%s: cvp NULL", __func__));
538 	mtx_lock_spin(&sched_lock);
539 	CV_SIGNAL_VALIDATE(cvp);
540 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
541 		cv_wakeup(cvp);
542 	mtx_unlock_spin(&sched_lock);
543 }
544 
545 /*
546  * Remove a thread from the wait queue of its condition variable.  This may be
547  * called externally.
548  */
549 void
550 cv_waitq_remove(struct thread *td)
551 {
552 	struct cv *cvp;
553 
554 	mtx_assert(&sched_lock, MA_OWNED);
555 	if ((cvp = td->td_wchan) != NULL && td->td_flags & TDF_CVWAITQ) {
556 		TAILQ_REMOVE(&cvp->cv_waitq, td, td_slpq);
557 		td->td_flags &= ~TDF_CVWAITQ;
558 		TD_CLR_ON_SLEEPQ(td);
559 	}
560 }
561 
562 /*
563  * Timeout function for cv_timedwait.  Put the thread on the runqueue and set
564  * its timeout flag.
565  */
566 static void
567 cv_timedwait_end(void *arg)
568 {
569 	struct thread *td;
570 
571 	td = arg;
572 	CTR3(KTR_PROC, "cv_timedwait_end: thread %p (pid %d, %s)",
573 	    td, td->td_proc->p_pid, td->td_proc->p_comm);
574 	mtx_lock_spin(&sched_lock);
575 	if (TD_ON_SLEEPQ(td)) {
576 		cv_waitq_remove(td);
577 		td->td_flags |= TDF_TIMEOUT;
578 	} else {
579 		td->td_flags |= TDF_TIMOFAIL;
580 	}
581 	TD_CLR_SLEEPING(td);
582 	setrunnable(td);
583 	mtx_unlock_spin(&sched_lock);
584 }
585 
586 /*
587  * For now only abort interruptable waits.
588  * The others will have to either complete on their own or have a timeout.
589  */
590 void
591 cv_abort(struct thread *td)
592 {
593 
594 	CTR3(KTR_PROC, "cv_abort: thread %p (pid %d, %s)", td,
595 	    td->td_proc->p_pid, td->td_proc->p_comm);
596 	mtx_lock_spin(&sched_lock);
597 	if ((td->td_flags & (TDF_SINTR|TDF_TIMEOUT)) == TDF_SINTR) {
598 		if (TD_ON_SLEEPQ(td)) {
599 			cv_waitq_remove(td);
600 		}
601 		TD_CLR_SLEEPING(td);
602 		setrunnable(td);
603 	}
604 	mtx_unlock_spin(&sched_lock);
605 }
606 
607