xref: /freebsd/sys/kern/kern_condvar.c (revision c678bc4f13a340ad88debe321afd0097db2590cb)
1 /*-
2  * Copyright (c) 2000 Jake Burkholder <jake@freebsd.org>.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_ktrace.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/mutex.h>
35 #include <sys/proc.h>
36 #include <sys/kernel.h>
37 #include <sys/ktr.h>
38 #include <sys/condvar.h>
39 #include <sys/signalvar.h>
40 #include <sys/resourcevar.h>
41 #ifdef KTRACE
42 #include <sys/uio.h>
43 #include <sys/ktrace.h>
44 #endif
45 
46 /*
47  * Common sanity checks for cv_wait* functions.
48  */
49 #define	CV_ASSERT(cvp, mp, p) do {					\
50 	KASSERT((p) != NULL, ("%s: curproc NULL", __FUNCTION__));	\
51 	KASSERT((p)->p_stat == SRUN, ("%s: not SRUN", __FUNCTION__));	\
52 	KASSERT((cvp) != NULL, ("%s: cvp NULL", __FUNCTION__));		\
53 	KASSERT((mp) != NULL, ("%s: mp NULL", __FUNCTION__));		\
54 	mtx_assert((mp), MA_OWNED | MA_NOTRECURSED);			\
55 } while (0)
56 
57 #ifdef CV_DEBUG
58 #define	CV_WAIT_VALIDATE(cvp, mp) do {					\
59 	if (TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
60 		/* Only waiter. */					\
61 		(cvp)->cv_mtx = (mp);					\
62 	} else {							\
63 		/*							\
64 		 * Other waiter; assert that we're using the		\
65 		 * same mutex.						\
66 		 */							\
67 		KASSERT((cvp)->cv_mtx == (mp),				\
68 		    ("%s: Multiple mutexes", __FUNCTION__));		\
69 	}								\
70 } while (0)
71 #define	CV_SIGNAL_VALIDATE(cvp) do {					\
72 	if (!TAILQ_EMPTY(&(cvp)->cv_waitq)) {				\
73 		KASSERT(mtx_owned((cvp)->cv_mtx),			\
74 		    ("%s: Mutex not owned", __FUNCTION__));		\
75 	}								\
76 } while (0)
77 #else
78 #define	CV_WAIT_VALIDATE(cvp, mp)
79 #define	CV_SIGNAL_VALIDATE(cvp)
80 #endif
81 
82 static void cv_timedwait_end(void *arg);
83 
84 /*
85  * Initialize a condition variable.  Must be called before use.
86  */
87 void
88 cv_init(struct cv *cvp, const char *desc)
89 {
90 
91 	TAILQ_INIT(&cvp->cv_waitq);
92 	cvp->cv_mtx = NULL;
93 	cvp->cv_description = desc;
94 }
95 
96 /*
97  * Destroy a condition variable.  The condition variable must be re-initialized
98  * in order to be re-used.
99  */
100 void
101 cv_destroy(struct cv *cvp)
102 {
103 
104 	KASSERT(cv_waitq_empty(cvp), ("%s: cv_waitq non-empty", __FUNCTION__));
105 }
106 
107 /*
108  * Common code for cv_wait* functions.  All require sched_lock.
109  */
110 
111 /*
112  * Switch context.
113  */
114 static __inline void
115 cv_switch(struct proc *p)
116 {
117 
118 	p->p_stat = SSLEEP;
119 	p->p_stats->p_ru.ru_nvcsw++;
120 	mi_switch();
121 	CTR3(KTR_PROC, "cv_switch: resume proc %p (pid %d, %s)", p, p->p_pid,
122 	    p->p_comm);
123 }
124 
125 /*
126  * Switch context, catching signals.
127  */
128 static __inline int
129 cv_switch_catch(struct proc *p)
130 {
131 	int sig;
132 
133 	/*
134 	 * We put ourselves on the sleep queue and start our timeout before
135 	 * calling CURSIG, as we could stop there, and a wakeup or a SIGCONT (or
136 	 * both) could occur while we were stopped.  A SIGCONT would cause us to
137 	 * be marked as SSLEEP without resuming us, thus we must be ready for
138 	 * sleep when CURSIG is called.  If the wakeup happens while we're
139 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
140 	 */
141 	p->p_sflag |= PS_SINTR;
142 	mtx_unlock_spin(&sched_lock);
143 	sig = CURSIG(p);
144 	mtx_lock_spin(&sched_lock);
145 	if (sig != 0) {
146 		if (p->p_wchan != NULL)
147 			cv_waitq_remove(p);
148 		p->p_stat = SRUN;
149 	} else if (p->p_wchan != NULL) {
150 		cv_switch(p);
151 	}
152 	p->p_sflag &= ~PS_SINTR;
153 
154 	return sig;
155 }
156 
157 /*
158  * Add a process to the wait queue of a condition variable.
159  */
160 static __inline void
161 cv_waitq_add(struct cv *cvp, struct proc *p)
162 {
163 
164 	/*
165 	 * Process may be sitting on a slpque if asleep() was called, remove it
166 	 * before re-adding.
167 	 */
168 	if (p->p_wchan != NULL)
169 		unsleep(p);
170 
171 	p->p_sflag |= PS_CVWAITQ;
172 	p->p_wchan = cvp;
173 	p->p_wmesg = cvp->cv_description;
174 	p->p_slptime = 0;
175 	p->p_pri.pri_native = p->p_pri.pri_level;
176 	CTR3(KTR_PROC, "cv_waitq_add: proc %p (pid %d, %s)", p, p->p_pid,
177 	    p->p_comm);
178 	TAILQ_INSERT_TAIL(&cvp->cv_waitq, p, p_slpq);
179 }
180 
181 /*
182  * Wait on a condition variable.  The current process is placed on the condition
183  * variable's wait queue and suspended.  A cv_signal or cv_broadcast on the same
184  * condition variable will resume the process.  The mutex is released before
185  * sleeping and will be held on return.  It is recommended that the mutex be
186  * held when cv_signal or cv_broadcast are called.
187  */
188 void
189 cv_wait(struct cv *cvp, struct mtx *mp)
190 {
191 	struct proc *p;
192 	WITNESS_SAVE_DECL(mp);
193 
194 	p = CURPROC;
195 #ifdef KTRACE
196 	if (p && KTRPOINT(p, KTR_CSW))
197 		ktrcsw(p->p_tracep, 1, 0);
198 #endif
199 	CV_ASSERT(cvp, mp, p);
200 	WITNESS_SLEEP(0, &mp->mtx_object);
201 	WITNESS_SAVE(&mp->mtx_object, mp);
202 
203 	mtx_lock_spin(&sched_lock);
204 	if (cold || panicstr) {
205 		/*
206 		 * After a panic, or during autoconfiguration, just give
207 		 * interrupts a chance, then just return; don't run any other
208 		 * procs or panic below, in case this is the idle process and
209 		 * already asleep.
210 		 */
211 		mtx_unlock_spin(&sched_lock);
212 		return;
213 	}
214 	CV_WAIT_VALIDATE(cvp, mp);
215 
216 	DROP_GIANT_NOSWITCH();
217 	mtx_unlock_flags(mp, MTX_NOSWITCH);
218 
219 	cv_waitq_add(cvp, p);
220 	cv_switch(p);
221 
222 	mtx_unlock_spin(&sched_lock);
223 #ifdef KTRACE
224 	if (KTRPOINT(p, KTR_CSW))
225 		ktrcsw(p->p_tracep, 0, 0);
226 #endif
227 	PICKUP_GIANT();
228 	mtx_lock(mp);
229 	WITNESS_RESTORE(&mp->mtx_object, mp);
230 }
231 
232 /*
233  * Wait on a condition variable, allowing interruption by signals.  Return 0 if
234  * the process was resumed with cv_signal or cv_broadcast, EINTR or ERESTART if
235  * a signal was caught.  If ERESTART is returned the system call should be
236  * restarted if possible.
237  */
238 int
239 cv_wait_sig(struct cv *cvp, struct mtx *mp)
240 {
241 	struct proc *p;
242 	int rval;
243 	int sig;
244 	WITNESS_SAVE_DECL(mp);
245 
246 	p = CURPROC;
247 	rval = 0;
248 #ifdef KTRACE
249 	if (p && KTRPOINT(p, KTR_CSW))
250 		ktrcsw(p->p_tracep, 1, 0);
251 #endif
252 	CV_ASSERT(cvp, mp, p);
253 	WITNESS_SLEEP(0, &mp->mtx_object);
254 	WITNESS_SAVE(&mp->mtx_object, mp);
255 
256 	mtx_lock_spin(&sched_lock);
257 	if (cold || panicstr) {
258 		/*
259 		 * After a panic, or during autoconfiguration, just give
260 		 * interrupts a chance, then just return; don't run any other
261 		 * procs or panic below, in case this is the idle process and
262 		 * already asleep.
263 		 */
264 		mtx_unlock_spin(&sched_lock);
265 		return 0;
266 	}
267 	CV_WAIT_VALIDATE(cvp, mp);
268 
269 	DROP_GIANT_NOSWITCH();
270 	mtx_unlock_flags(mp, MTX_NOSWITCH);
271 
272 	cv_waitq_add(cvp, p);
273 	sig = cv_switch_catch(p);
274 
275 	mtx_unlock_spin(&sched_lock);
276 	PICKUP_GIANT();
277 
278 	if (sig == 0)
279 		sig = CURSIG(p);
280 	if (sig != 0) {
281 		PROC_LOCK(p);
282 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
283 			rval = EINTR;
284 		else
285 			rval = ERESTART;
286 		PROC_UNLOCK(p);
287 	}
288 
289 #ifdef KTRACE
290 	if (KTRPOINT(p, KTR_CSW))
291 		ktrcsw(p->p_tracep, 0, 0);
292 #endif
293 	mtx_lock(mp);
294 	WITNESS_RESTORE(&mp->mtx_object, mp);
295 
296 	return (rval);
297 }
298 
299 /*
300  * Wait on a condition variable for at most timo/hz seconds.  Returns 0 if the
301  * process was resumed by cv_signal or cv_broadcast, EWOULDBLOCK if the timeout
302  * expires.
303  */
304 int
305 cv_timedwait(struct cv *cvp, struct mtx *mp, int timo)
306 {
307 	struct proc *p;
308 	int rval;
309 	WITNESS_SAVE_DECL(mp);
310 
311 	p = CURPROC;
312 	rval = 0;
313 #ifdef KTRACE
314 	if (p && KTRPOINT(p, KTR_CSW))
315 		ktrcsw(p->p_tracep, 1, 0);
316 #endif
317 	CV_ASSERT(cvp, mp, p);
318 	WITNESS_SLEEP(0, &mp->mtx_object);
319 	WITNESS_SAVE(&mp->mtx_object, mp);
320 
321 	mtx_lock_spin(&sched_lock);
322 	if (cold || panicstr) {
323 		/*
324 		 * After a panic, or during autoconfiguration, just give
325 		 * interrupts a chance, then just return; don't run any other
326 		 * procs or panic below, in case this is the idle process and
327 		 * already asleep.
328 		 */
329 		mtx_unlock_spin(&sched_lock);
330 		return 0;
331 	}
332 	CV_WAIT_VALIDATE(cvp, mp);
333 
334 	DROP_GIANT_NOSWITCH();
335 	mtx_unlock_flags(mp, MTX_NOSWITCH);
336 
337 	cv_waitq_add(cvp, p);
338 	callout_reset(&p->p_slpcallout, timo, cv_timedwait_end, p);
339 	cv_switch(p);
340 
341 	if (p->p_sflag & PS_TIMEOUT) {
342 		p->p_sflag &= ~PS_TIMEOUT;
343 		rval = EWOULDBLOCK;
344 	} else
345 		callout_stop(&p->p_slpcallout);
346 
347 	mtx_unlock_spin(&sched_lock);
348 #ifdef KTRACE
349 	if (KTRPOINT(p, KTR_CSW))
350 		ktrcsw(p->p_tracep, 0, 0);
351 #endif
352 	PICKUP_GIANT();
353 	mtx_lock(mp);
354 	WITNESS_RESTORE(&mp->mtx_object, mp);
355 
356 	return (rval);
357 }
358 
359 /*
360  * Wait on a condition variable for at most timo/hz seconds, allowing
361  * interruption by signals.  Returns 0 if the process was resumed by cv_signal
362  * or cv_broadcast, EWOULDBLOCK if the timeout expires, and EINTR or ERESTART if
363  * a signal was caught.
364  */
365 int
366 cv_timedwait_sig(struct cv *cvp, struct mtx *mp, int timo)
367 {
368 	struct proc *p;
369 	int rval;
370 	int sig;
371 	WITNESS_SAVE_DECL(mp);
372 
373 	p = CURPROC;
374 	rval = 0;
375 #ifdef KTRACE
376 	if (p && KTRPOINT(p, KTR_CSW))
377 		ktrcsw(p->p_tracep, 1, 0);
378 #endif
379 	CV_ASSERT(cvp, mp, p);
380 	WITNESS_SLEEP(0, &mp->mtx_object);
381 	WITNESS_SAVE(&mp->mtx_object, mp);
382 
383 	mtx_lock_spin(&sched_lock);
384 	if (cold || panicstr) {
385 		/*
386 		 * After a panic, or during autoconfiguration, just give
387 		 * interrupts a chance, then just return; don't run any other
388 		 * procs or panic below, in case this is the idle process and
389 		 * already asleep.
390 		 */
391 		mtx_unlock_spin(&sched_lock);
392 		return 0;
393 	}
394 	CV_WAIT_VALIDATE(cvp, mp);
395 
396 	DROP_GIANT_NOSWITCH();
397 	mtx_unlock_flags(mp, MTX_NOSWITCH);
398 
399 	cv_waitq_add(cvp, p);
400 	callout_reset(&p->p_slpcallout, timo, cv_timedwait_end, p);
401 	sig = cv_switch_catch(p);
402 
403 	if (p->p_sflag & PS_TIMEOUT) {
404 		p->p_sflag &= ~PS_TIMEOUT;
405 		rval = EWOULDBLOCK;
406 	} else
407 		callout_stop(&p->p_slpcallout);
408 
409 	mtx_unlock_spin(&sched_lock);
410 	PICKUP_GIANT();
411 
412 	if (sig == 0)
413 		sig = CURSIG(p);
414 	if (sig != 0) {
415 		PROC_LOCK(p);
416 		if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
417 			rval = EINTR;
418 		else
419 			rval = ERESTART;
420 		PROC_UNLOCK(p);
421 	}
422 
423 #ifdef KTRACE
424 	if (KTRPOINT(p, KTR_CSW))
425 		ktrcsw(p->p_tracep, 0, 0);
426 #endif
427 	mtx_lock(mp);
428 	WITNESS_RESTORE(&mp->mtx_object, mp);
429 
430 	return (rval);
431 }
432 
433 /*
434  * Common code for signal and broadcast.  Assumes waitq is not empty.  Must be
435  * called with sched_lock held.
436  */
437 static __inline void
438 cv_wakeup(struct cv *cvp)
439 {
440 	struct proc *p;
441 
442 	mtx_assert(&sched_lock, MA_OWNED);
443 	p = TAILQ_FIRST(&cvp->cv_waitq);
444 	KASSERT(p->p_wchan == cvp, ("%s: bogus wchan", __FUNCTION__));
445 	KASSERT(p->p_sflag & PS_CVWAITQ, ("%s: not on waitq", __FUNCTION__));
446 	TAILQ_REMOVE(&cvp->cv_waitq, p, p_slpq);
447 	p->p_sflag &= ~PS_CVWAITQ;
448 	p->p_wchan = 0;
449 	if (p->p_stat == SSLEEP) {
450 		/* OPTIMIZED EXPANSION OF setrunnable(p); */
451 		CTR3(KTR_PROC, "cv_signal: proc %p (pid %d, %s)",
452 		    p, p->p_pid, p->p_comm);
453 		if (p->p_slptime > 1)
454 			updatepri(p);
455 		p->p_slptime = 0;
456 		p->p_stat = SRUN;
457 		if (p->p_sflag & PS_INMEM) {
458 			setrunqueue(p);
459 			maybe_resched(p);
460 		} else {
461 			p->p_sflag |= PS_SWAPINREQ;
462 			wakeup(&proc0);
463 		}
464 		/* END INLINE EXPANSION */
465 	}
466 }
467 
468 /*
469  * Signal a condition variable, wakes up one waiting process.  Will also wakeup
470  * the swapper if the process is not in memory, so that it can bring the
471  * sleeping process in.  Note that this may also result in additional processes
472  * being made runnable.  Should be called with the same mutex as was passed to
473  * cv_wait held.
474  */
475 void
476 cv_signal(struct cv *cvp)
477 {
478 
479 	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
480 	mtx_lock_spin(&sched_lock);
481 	if (!TAILQ_EMPTY(&cvp->cv_waitq)) {
482 		CV_SIGNAL_VALIDATE(cvp);
483 		cv_wakeup(cvp);
484 	}
485 	mtx_unlock_spin(&sched_lock);
486 }
487 
488 /*
489  * Broadcast a signal to a condition variable.  Wakes up all waiting processes.
490  * Should be called with the same mutex as was passed to cv_wait held.
491  */
492 void
493 cv_broadcast(struct cv *cvp)
494 {
495 
496 	KASSERT(cvp != NULL, ("%s: cvp NULL", __FUNCTION__));
497 	mtx_lock_spin(&sched_lock);
498 	CV_SIGNAL_VALIDATE(cvp);
499 	while (!TAILQ_EMPTY(&cvp->cv_waitq))
500 		cv_wakeup(cvp);
501 	mtx_unlock_spin(&sched_lock);
502 }
503 
504 /*
505  * Remove a process from the wait queue of its condition variable.  This may be
506  * called externally.
507  */
508 void
509 cv_waitq_remove(struct proc *p)
510 {
511 	struct cv *cvp;
512 
513 	mtx_lock_spin(&sched_lock);
514 	if ((cvp = p->p_wchan) != NULL && p->p_sflag & PS_CVWAITQ) {
515 		TAILQ_REMOVE(&cvp->cv_waitq, p, p_slpq);
516 		p->p_sflag &= ~PS_CVWAITQ;
517 		p->p_wchan = NULL;
518 	}
519 	mtx_unlock_spin(&sched_lock);
520 }
521 
522 /*
523  * Timeout function for cv_timedwait.  Put the process on the runqueue and set
524  * its timeout flag.
525  */
526 static void
527 cv_timedwait_end(void *arg)
528 {
529 	struct proc *p;
530 
531 	p = arg;
532 	CTR3(KTR_PROC, "cv_timedwait_end: proc %p (pid %d, %s)", p, p->p_pid,
533 	    p->p_comm);
534 	mtx_lock_spin(&sched_lock);
535 	if (p->p_wchan != NULL) {
536 		if (p->p_stat == SSLEEP)
537 			setrunnable(p);
538 		else
539 			cv_waitq_remove(p);
540 		p->p_sflag |= PS_TIMEOUT;
541 	}
542 	mtx_unlock_spin(&sched_lock);
543 }
544