xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *
10  *  The SPL is free software; you can redistribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; either version 2 of the License, or (at your
13  *  option) any later version.
14  *
15  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
16  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18  *  for more details.
19  *
20  *  You should have received a copy of the GNU General Public License along
21  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
22  *
23  *  Solaris Porting Layer (SPL) Condition Variables Implementation.
24  */
25 
26 #include <sys/condvar.h>
27 #include <sys/time.h>
28 #include <sys/sysmacros.h>
29 #include <linux/hrtimer.h>
30 #include <linux/compiler_compat.h>
31 #include <linux/mod_compat.h>
32 
33 #include <linux/sched.h>
34 #include <linux/sched/signal.h>
35 
36 #define	MAX_HRTIMEOUT_SLACK_US	1000
37 static unsigned int spl_schedule_hrtimeout_slack_us = 0;
38 
39 static int
40 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
41 {
42 	unsigned long val;
43 	int error;
44 
45 	error = kstrtoul(buf, 0, &val);
46 	if (error)
47 		return (error);
48 
49 	if (val > MAX_HRTIMEOUT_SLACK_US)
50 		return (-EINVAL);
51 
52 	error = param_set_uint(buf, kp);
53 	if (error < 0)
54 		return (error);
55 
56 	return (0);
57 }
58 
59 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
60 	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
61 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
62 	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
63 
64 void
65 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
66 {
67 	ASSERT(cvp);
68 	ASSERT(name == NULL);
69 	ASSERT(type == CV_DEFAULT);
70 	ASSERT(arg == NULL);
71 
72 	cvp->cv_magic = CV_MAGIC;
73 	init_waitqueue_head(&cvp->cv_event);
74 	init_waitqueue_head(&cvp->cv_destroy);
75 	atomic_set(&cvp->cv_waiters, 0);
76 	atomic_set(&cvp->cv_refs, 1);
77 	cvp->cv_mutex = NULL;
78 }
79 EXPORT_SYMBOL(__cv_init);
80 
81 static int
82 cv_destroy_wakeup(kcondvar_t *cvp)
83 {
84 	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
85 		ASSERT(cvp->cv_mutex == NULL);
86 		ASSERT(!waitqueue_active(&cvp->cv_event));
87 		return (1);
88 	}
89 
90 	return (0);
91 }
92 
93 void
94 __cv_destroy(kcondvar_t *cvp)
95 {
96 	ASSERT(cvp);
97 	ASSERT(cvp->cv_magic == CV_MAGIC);
98 
99 	cvp->cv_magic = CV_DESTROY;
100 	atomic_dec(&cvp->cv_refs);
101 
102 	/* Block until all waiters are woken and references dropped. */
103 	while (cv_destroy_wakeup(cvp) == 0)
104 		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
105 
106 	ASSERT3P(cvp->cv_mutex, ==, NULL);
107 	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
108 	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
109 	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
110 }
111 EXPORT_SYMBOL(__cv_destroy);
112 
113 static void
114 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
115 {
116 	DEFINE_WAIT(wait);
117 	kmutex_t *m;
118 
119 	ASSERT(cvp);
120 	ASSERT(mp);
121 	ASSERT(cvp->cv_magic == CV_MAGIC);
122 	ASSERT(mutex_owned(mp));
123 	atomic_inc(&cvp->cv_refs);
124 
125 	m = READ_ONCE(cvp->cv_mutex);
126 	if (!m)
127 		m = xchg(&cvp->cv_mutex, mp);
128 	/* Ensure the same mutex is used by all callers */
129 	ASSERT(m == NULL || m == mp);
130 
131 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
132 	atomic_inc(&cvp->cv_waiters);
133 
134 	/*
135 	 * Mutex should be dropped after prepare_to_wait() this
136 	 * ensures we're linked in to the waiters list and avoids the
137 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
138 	 */
139 	mutex_exit(mp);
140 	if (io)
141 		io_schedule();
142 	else
143 		schedule();
144 
145 	/* No more waiters a different mutex could be used */
146 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
147 		/*
148 		 * This is set without any lock, so it's racy. But this is
149 		 * just for debug anyway, so make it best-effort
150 		 */
151 		cvp->cv_mutex = NULL;
152 		wake_up(&cvp->cv_destroy);
153 	}
154 
155 	finish_wait(&cvp->cv_event, &wait);
156 	atomic_dec(&cvp->cv_refs);
157 
158 	/*
159 	 * Hold mutex after we release the cvp, otherwise we could dead lock
160 	 * with a thread holding the mutex and call cv_destroy.
161 	 */
162 	mutex_enter(mp);
163 }
164 
165 void
166 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
167 {
168 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
169 }
170 EXPORT_SYMBOL(__cv_wait);
171 
172 void
173 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
174 {
175 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
176 }
177 EXPORT_SYMBOL(__cv_wait_io);
178 
179 int
180 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
181 {
182 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
183 
184 	return (signal_pending(current) ? 0 : 1);
185 }
186 EXPORT_SYMBOL(__cv_wait_io_sig);
187 
188 int
189 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
190 {
191 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
192 
193 	return (signal_pending(current) ? 0 : 1);
194 }
195 EXPORT_SYMBOL(__cv_wait_sig);
196 
197 void
198 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
199 {
200 	sigset_t blocked, saved;
201 
202 	sigfillset(&blocked);
203 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
204 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
205 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
206 }
207 EXPORT_SYMBOL(__cv_wait_idle);
208 
209 /*
210  * 'expire_time' argument is an absolute wall clock time in jiffies.
211  * Return value is time left (expire_time - now) or -1 if timeout occurred.
212  */
213 static clock_t
214 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
215     int state, int io)
216 {
217 	DEFINE_WAIT(wait);
218 	kmutex_t *m;
219 	clock_t time_left;
220 
221 	ASSERT(cvp);
222 	ASSERT(mp);
223 	ASSERT(cvp->cv_magic == CV_MAGIC);
224 	ASSERT(mutex_owned(mp));
225 
226 	/* XXX - Does not handle jiffie wrap properly */
227 	time_left = expire_time - jiffies;
228 	if (time_left <= 0)
229 		return (-1);
230 
231 	atomic_inc(&cvp->cv_refs);
232 	m = READ_ONCE(cvp->cv_mutex);
233 	if (!m)
234 		m = xchg(&cvp->cv_mutex, mp);
235 	/* Ensure the same mutex is used by all callers */
236 	ASSERT(m == NULL || m == mp);
237 
238 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
239 	atomic_inc(&cvp->cv_waiters);
240 
241 	/*
242 	 * Mutex should be dropped after prepare_to_wait() this
243 	 * ensures we're linked in to the waiters list and avoids the
244 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
245 	 */
246 	mutex_exit(mp);
247 	if (io)
248 		time_left = io_schedule_timeout(time_left);
249 	else
250 		time_left = schedule_timeout(time_left);
251 
252 	/* No more waiters a different mutex could be used */
253 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
254 		/*
255 		 * This is set without any lock, so it's racy. But this is
256 		 * just for debug anyway, so make it best-effort
257 		 */
258 		cvp->cv_mutex = NULL;
259 		wake_up(&cvp->cv_destroy);
260 	}
261 
262 	finish_wait(&cvp->cv_event, &wait);
263 	atomic_dec(&cvp->cv_refs);
264 
265 	/*
266 	 * Hold mutex after we release the cvp, otherwise we could dead lock
267 	 * with a thread holding the mutex and call cv_destroy.
268 	 */
269 	mutex_enter(mp);
270 	return (time_left > 0 ? 1 : -1);
271 }
272 
273 int
274 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
275 {
276 	return (__cv_timedwait_common(cvp, mp, exp_time,
277 	    TASK_UNINTERRUPTIBLE, 0));
278 }
279 EXPORT_SYMBOL(__cv_timedwait);
280 
281 int
282 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
283 {
284 	return (__cv_timedwait_common(cvp, mp, exp_time,
285 	    TASK_UNINTERRUPTIBLE, 1));
286 }
287 EXPORT_SYMBOL(__cv_timedwait_io);
288 
289 int
290 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
291 {
292 	int rc;
293 
294 	rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
295 	return (signal_pending(current) ? 0 : rc);
296 }
297 EXPORT_SYMBOL(__cv_timedwait_sig);
298 
299 int
300 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
301 {
302 	sigset_t blocked, saved;
303 	int rc;
304 
305 	sigfillset(&blocked);
306 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
307 	rc = __cv_timedwait_common(cvp, mp, exp_time,
308 	    TASK_INTERRUPTIBLE, 0);
309 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
310 
311 	return (rc);
312 }
313 EXPORT_SYMBOL(__cv_timedwait_idle);
314 /*
315  * 'expire_time' argument is an absolute clock time in nanoseconds.
316  * Return value is time left (expire_time - now) or -1 if timeout occurred.
317  */
318 static clock_t
319 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
320     hrtime_t res, int state)
321 {
322 	DEFINE_WAIT(wait);
323 	kmutex_t *m;
324 	hrtime_t time_left;
325 	ktime_t ktime_left;
326 	u64 slack = 0;
327 	int rc;
328 
329 	ASSERT(cvp);
330 	ASSERT(mp);
331 	ASSERT(cvp->cv_magic == CV_MAGIC);
332 	ASSERT(mutex_owned(mp));
333 
334 	time_left = expire_time - gethrtime();
335 	if (time_left <= 0)
336 		return (-1);
337 
338 	atomic_inc(&cvp->cv_refs);
339 	m = READ_ONCE(cvp->cv_mutex);
340 	if (!m)
341 		m = xchg(&cvp->cv_mutex, mp);
342 	/* Ensure the same mutex is used by all callers */
343 	ASSERT(m == NULL || m == mp);
344 
345 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
346 	atomic_inc(&cvp->cv_waiters);
347 
348 	/*
349 	 * Mutex should be dropped after prepare_to_wait() this
350 	 * ensures we're linked in to the waiters list and avoids the
351 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
352 	 */
353 	mutex_exit(mp);
354 
355 	ktime_left = ktime_set(0, time_left);
356 	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
357 	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
358 	rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
359 
360 	/* No more waiters a different mutex could be used */
361 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
362 		/*
363 		 * This is set without any lock, so it's racy. But this is
364 		 * just for debug anyway, so make it best-effort
365 		 */
366 		cvp->cv_mutex = NULL;
367 		wake_up(&cvp->cv_destroy);
368 	}
369 
370 	finish_wait(&cvp->cv_event, &wait);
371 	atomic_dec(&cvp->cv_refs);
372 
373 	mutex_enter(mp);
374 	return (rc == -EINTR ? 1 : -1);
375 }
376 
377 /*
378  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
379  */
380 static int
381 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
382     hrtime_t res, int flag, int state)
383 {
384 	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
385 		tim += gethrtime();
386 
387 	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
388 }
389 
390 int
391 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
392     int flag)
393 {
394 	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
395 	    TASK_UNINTERRUPTIBLE));
396 }
397 EXPORT_SYMBOL(cv_timedwait_hires);
398 
399 int
400 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
401     hrtime_t res, int flag)
402 {
403 	int rc;
404 
405 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
406 	    TASK_INTERRUPTIBLE);
407 	return (signal_pending(current) ? 0 : rc);
408 }
409 EXPORT_SYMBOL(cv_timedwait_sig_hires);
410 
411 int
412 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
413     hrtime_t res, int flag)
414 {
415 	sigset_t blocked, saved;
416 	int rc;
417 
418 	sigfillset(&blocked);
419 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
420 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
421 	    TASK_INTERRUPTIBLE);
422 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
423 
424 	return (rc);
425 }
426 EXPORT_SYMBOL(cv_timedwait_idle_hires);
427 
428 void
429 __cv_signal(kcondvar_t *cvp)
430 {
431 	ASSERT(cvp);
432 	ASSERT(cvp->cv_magic == CV_MAGIC);
433 	atomic_inc(&cvp->cv_refs);
434 
435 	/*
436 	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
437 	 * waiter will be set runnable with each call to wake_up().
438 	 * Additionally wake_up() holds a spin_lock associated with
439 	 * the wait queue to ensure we don't race waking up processes.
440 	 */
441 	if (atomic_read(&cvp->cv_waiters) > 0)
442 		wake_up(&cvp->cv_event);
443 
444 	atomic_dec(&cvp->cv_refs);
445 }
446 EXPORT_SYMBOL(__cv_signal);
447 
448 void
449 __cv_broadcast(kcondvar_t *cvp)
450 {
451 	ASSERT(cvp);
452 	ASSERT(cvp->cv_magic == CV_MAGIC);
453 	atomic_inc(&cvp->cv_refs);
454 
455 	/*
456 	 * Wake_up_all() will wake up all waiters even those which
457 	 * have the WQ_FLAG_EXCLUSIVE flag set.
458 	 */
459 	if (atomic_read(&cvp->cv_waiters) > 0)
460 		wake_up_all(&cvp->cv_event);
461 
462 	atomic_dec(&cvp->cv_refs);
463 }
464 EXPORT_SYMBOL(__cv_broadcast);
465