xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
4  *  Copyright (C) 2007 The Regents of the University of California.
5  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
7  *  UCRL-CODE-235197
8  *
9  *  This file is part of the SPL, Solaris Porting Layer.
10  *
11  *  The SPL is free software; you can redistribute it and/or modify it
12  *  under the terms of the GNU General Public License as published by the
13  *  Free Software Foundation; either version 2 of the License, or (at your
14  *  option) any later version.
15  *
16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19  *  for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
23  *
24  *  Solaris Porting Layer (SPL) Condition Variables Implementation.
25  */
26 
27 #include <sys/condvar.h>
28 #include <sys/time.h>
29 #include <sys/sysmacros.h>
30 #include <linux/hrtimer.h>
31 #include <linux/compiler_compat.h>
32 #include <linux/mod_compat.h>
33 
34 #include <linux/sched.h>
35 #include <linux/sched/signal.h>
36 
37 #define	MAX_HRTIMEOUT_SLACK_US	1000
38 static unsigned int spl_schedule_hrtimeout_slack_us = 0;
39 
40 static int
param_set_hrtimeout_slack(const char * buf,zfs_kernel_param_t * kp)41 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
42 {
43 	unsigned long val;
44 	int error;
45 
46 	error = kstrtoul(buf, 0, &val);
47 	if (error)
48 		return (error);
49 
50 	if (val > MAX_HRTIMEOUT_SLACK_US)
51 		return (-EINVAL);
52 
53 	error = param_set_uint(buf, kp);
54 	if (error < 0)
55 		return (error);
56 
57 	return (0);
58 }
59 
60 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
61 	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
62 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
63 	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
64 
65 void
__cv_init(kcondvar_t * cvp,char * name,kcv_type_t type,void * arg)66 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
67 {
68 	ASSERT(cvp);
69 	ASSERT(name == NULL);
70 	ASSERT(type == CV_DEFAULT);
71 	ASSERT(arg == NULL);
72 
73 	cvp->cv_magic = CV_MAGIC;
74 	init_waitqueue_head(&cvp->cv_event);
75 	init_waitqueue_head(&cvp->cv_destroy);
76 	atomic_set(&cvp->cv_waiters, 0);
77 	atomic_set(&cvp->cv_refs, 1);
78 	cvp->cv_mutex = NULL;
79 }
80 EXPORT_SYMBOL(__cv_init);
81 
82 static int
cv_destroy_wakeup(kcondvar_t * cvp)83 cv_destroy_wakeup(kcondvar_t *cvp)
84 {
85 	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
86 		ASSERT(cvp->cv_mutex == NULL);
87 		ASSERT(!waitqueue_active(&cvp->cv_event));
88 		return (1);
89 	}
90 
91 	return (0);
92 }
93 
94 void
__cv_destroy(kcondvar_t * cvp)95 __cv_destroy(kcondvar_t *cvp)
96 {
97 	ASSERT(cvp);
98 	ASSERT(cvp->cv_magic == CV_MAGIC);
99 
100 	cvp->cv_magic = CV_DESTROY;
101 	atomic_dec(&cvp->cv_refs);
102 
103 	/* Block until all waiters are woken and references dropped. */
104 	while (cv_destroy_wakeup(cvp) == 0)
105 		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
106 
107 	ASSERT3P(cvp->cv_mutex, ==, NULL);
108 	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
109 	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
110 	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
111 }
112 EXPORT_SYMBOL(__cv_destroy);
113 
114 static void
cv_wait_common(kcondvar_t * cvp,kmutex_t * mp,int state,int io)115 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
116 {
117 	DEFINE_WAIT(wait);
118 	kmutex_t *m;
119 
120 	ASSERT(cvp);
121 	ASSERT(mp);
122 	ASSERT(cvp->cv_magic == CV_MAGIC);
123 	ASSERT(mutex_owned(mp));
124 	atomic_inc(&cvp->cv_refs);
125 
126 	m = READ_ONCE(cvp->cv_mutex);
127 	if (!m)
128 		m = xchg(&cvp->cv_mutex, mp);
129 	/* Ensure the same mutex is used by all callers */
130 	ASSERT(m == NULL || m == mp);
131 
132 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
133 	atomic_inc(&cvp->cv_waiters);
134 
135 	/*
136 	 * Mutex should be dropped after prepare_to_wait() this
137 	 * ensures we're linked in to the waiters list and avoids the
138 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
139 	 */
140 	mutex_exit(mp);
141 	if (io)
142 		io_schedule();
143 	else
144 		schedule();
145 
146 	/* No more waiters a different mutex could be used */
147 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
148 		/*
149 		 * This is set without any lock, so it's racy. But this is
150 		 * just for debug anyway, so make it best-effort
151 		 */
152 		cvp->cv_mutex = NULL;
153 		wake_up(&cvp->cv_destroy);
154 	}
155 
156 	finish_wait(&cvp->cv_event, &wait);
157 	atomic_dec(&cvp->cv_refs);
158 
159 	/*
160 	 * Hold mutex after we release the cvp, otherwise we could dead lock
161 	 * with a thread holding the mutex and call cv_destroy.
162 	 */
163 	mutex_enter(mp);
164 }
165 
166 void
__cv_wait(kcondvar_t * cvp,kmutex_t * mp)167 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
168 {
169 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
170 }
171 EXPORT_SYMBOL(__cv_wait);
172 
173 void
__cv_wait_io(kcondvar_t * cvp,kmutex_t * mp)174 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
175 {
176 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
177 }
178 EXPORT_SYMBOL(__cv_wait_io);
179 
180 int
__cv_wait_io_sig(kcondvar_t * cvp,kmutex_t * mp)181 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
182 {
183 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
184 
185 	return (signal_pending(current) ? 0 : 1);
186 }
187 EXPORT_SYMBOL(__cv_wait_io_sig);
188 
189 int
__cv_wait_sig(kcondvar_t * cvp,kmutex_t * mp)190 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
191 {
192 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
193 
194 	return (signal_pending(current) ? 0 : 1);
195 }
196 EXPORT_SYMBOL(__cv_wait_sig);
197 
198 void
__cv_wait_idle(kcondvar_t * cvp,kmutex_t * mp)199 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
200 {
201 	sigset_t blocked, saved;
202 
203 	sigfillset(&blocked);
204 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
205 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
206 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
207 }
208 EXPORT_SYMBOL(__cv_wait_idle);
209 
210 /*
211  * 'expire_time' argument is an absolute wall clock time in jiffies.
212  * Return value is time left (expire_time - now) or -1 if timeout occurred.
213  */
214 static clock_t
__cv_timedwait_common(kcondvar_t * cvp,kmutex_t * mp,clock_t expire_time,int state,int io)215 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
216     int state, int io)
217 {
218 	DEFINE_WAIT(wait);
219 	kmutex_t *m;
220 	clock_t time_left;
221 
222 	ASSERT(cvp);
223 	ASSERT(mp);
224 	ASSERT(cvp->cv_magic == CV_MAGIC);
225 	ASSERT(mutex_owned(mp));
226 
227 	/* XXX - Does not handle jiffie wrap properly */
228 	time_left = expire_time - jiffies;
229 	if (time_left <= 0)
230 		return (-1);
231 
232 	atomic_inc(&cvp->cv_refs);
233 	m = READ_ONCE(cvp->cv_mutex);
234 	if (!m)
235 		m = xchg(&cvp->cv_mutex, mp);
236 	/* Ensure the same mutex is used by all callers */
237 	ASSERT(m == NULL || m == mp);
238 
239 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
240 	atomic_inc(&cvp->cv_waiters);
241 
242 	/*
243 	 * Mutex should be dropped after prepare_to_wait() this
244 	 * ensures we're linked in to the waiters list and avoids the
245 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
246 	 */
247 	mutex_exit(mp);
248 	if (io)
249 		time_left = io_schedule_timeout(time_left);
250 	else
251 		time_left = schedule_timeout(time_left);
252 
253 	/* No more waiters a different mutex could be used */
254 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
255 		/*
256 		 * This is set without any lock, so it's racy. But this is
257 		 * just for debug anyway, so make it best-effort
258 		 */
259 		cvp->cv_mutex = NULL;
260 		wake_up(&cvp->cv_destroy);
261 	}
262 
263 	finish_wait(&cvp->cv_event, &wait);
264 	atomic_dec(&cvp->cv_refs);
265 
266 	/*
267 	 * Hold mutex after we release the cvp, otherwise we could dead lock
268 	 * with a thread holding the mutex and call cv_destroy.
269 	 */
270 	mutex_enter(mp);
271 	return (time_left > 0 ? 1 : -1);
272 }
273 
274 int
__cv_timedwait(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)275 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
276 {
277 	return (__cv_timedwait_common(cvp, mp, exp_time,
278 	    TASK_UNINTERRUPTIBLE, 0));
279 }
280 EXPORT_SYMBOL(__cv_timedwait);
281 
282 int
__cv_timedwait_io(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)283 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
284 {
285 	return (__cv_timedwait_common(cvp, mp, exp_time,
286 	    TASK_UNINTERRUPTIBLE, 1));
287 }
288 EXPORT_SYMBOL(__cv_timedwait_io);
289 
290 int
__cv_timedwait_sig(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)291 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
292 {
293 	int rc;
294 
295 	rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
296 	return (signal_pending(current) ? 0 : rc);
297 }
298 EXPORT_SYMBOL(__cv_timedwait_sig);
299 
300 int
__cv_timedwait_idle(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)301 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
302 {
303 	sigset_t blocked, saved;
304 	int rc;
305 
306 	sigfillset(&blocked);
307 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
308 	rc = __cv_timedwait_common(cvp, mp, exp_time,
309 	    TASK_INTERRUPTIBLE, 0);
310 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
311 
312 	return (rc);
313 }
314 EXPORT_SYMBOL(__cv_timedwait_idle);
315 /*
316  * 'expire_time' argument is an absolute clock time in nanoseconds.
317  * Return value is time left (expire_time - now) or -1 if timeout occurred.
318  */
319 static clock_t
__cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t expire_time,hrtime_t res,int state)320 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
321     hrtime_t res, int state)
322 {
323 	DEFINE_WAIT(wait);
324 	kmutex_t *m;
325 	hrtime_t time_left;
326 	ktime_t ktime_left;
327 	u64 slack = 0;
328 	int rc;
329 
330 	ASSERT(cvp);
331 	ASSERT(mp);
332 	ASSERT(cvp->cv_magic == CV_MAGIC);
333 	ASSERT(mutex_owned(mp));
334 
335 	time_left = expire_time - gethrtime();
336 	if (time_left <= 0)
337 		return (-1);
338 
339 	atomic_inc(&cvp->cv_refs);
340 	m = READ_ONCE(cvp->cv_mutex);
341 	if (!m)
342 		m = xchg(&cvp->cv_mutex, mp);
343 	/* Ensure the same mutex is used by all callers */
344 	ASSERT(m == NULL || m == mp);
345 
346 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
347 	atomic_inc(&cvp->cv_waiters);
348 
349 	/*
350 	 * Mutex should be dropped after prepare_to_wait() this
351 	 * ensures we're linked in to the waiters list and avoids the
352 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
353 	 */
354 	mutex_exit(mp);
355 
356 	ktime_left = ktime_set(0, time_left);
357 	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
358 	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
359 	rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
360 
361 	/* No more waiters a different mutex could be used */
362 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
363 		/*
364 		 * This is set without any lock, so it's racy. But this is
365 		 * just for debug anyway, so make it best-effort
366 		 */
367 		cvp->cv_mutex = NULL;
368 		wake_up(&cvp->cv_destroy);
369 	}
370 
371 	finish_wait(&cvp->cv_event, &wait);
372 	atomic_dec(&cvp->cv_refs);
373 
374 	mutex_enter(mp);
375 	return (rc == -EINTR ? 1 : -1);
376 }
377 
378 /*
379  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
380  */
381 static int
cv_timedwait_hires_common(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag,int state)382 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
383     hrtime_t res, int flag, int state)
384 {
385 	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
386 		tim += gethrtime();
387 
388 	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
389 }
390 
391 int
cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)392 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
393     int flag)
394 {
395 	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
396 	    TASK_UNINTERRUPTIBLE));
397 }
398 EXPORT_SYMBOL(cv_timedwait_hires);
399 
400 int
cv_timedwait_sig_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)401 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
402     hrtime_t res, int flag)
403 {
404 	int rc;
405 
406 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
407 	    TASK_INTERRUPTIBLE);
408 	return (signal_pending(current) ? 0 : rc);
409 }
410 EXPORT_SYMBOL(cv_timedwait_sig_hires);
411 
412 int
cv_timedwait_idle_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)413 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
414     hrtime_t res, int flag)
415 {
416 	sigset_t blocked, saved;
417 	int rc;
418 
419 	sigfillset(&blocked);
420 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
421 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
422 	    TASK_INTERRUPTIBLE);
423 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
424 
425 	return (rc);
426 }
427 EXPORT_SYMBOL(cv_timedwait_idle_hires);
428 
429 void
__cv_signal(kcondvar_t * cvp)430 __cv_signal(kcondvar_t *cvp)
431 {
432 	ASSERT(cvp);
433 	ASSERT(cvp->cv_magic == CV_MAGIC);
434 	atomic_inc(&cvp->cv_refs);
435 
436 	/*
437 	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
438 	 * waiter will be set runnable with each call to wake_up().
439 	 * Additionally wake_up() holds a spin_lock associated with
440 	 * the wait queue to ensure we don't race waking up processes.
441 	 */
442 	if (atomic_read(&cvp->cv_waiters) > 0)
443 		wake_up(&cvp->cv_event);
444 
445 	atomic_dec(&cvp->cv_refs);
446 }
447 EXPORT_SYMBOL(__cv_signal);
448 
449 void
__cv_broadcast(kcondvar_t * cvp)450 __cv_broadcast(kcondvar_t *cvp)
451 {
452 	ASSERT(cvp);
453 	ASSERT(cvp->cv_magic == CV_MAGIC);
454 	atomic_inc(&cvp->cv_refs);
455 
456 	/*
457 	 * Wake_up_all() will wake up all waiters even those which
458 	 * have the WQ_FLAG_EXCLUSIVE flag set.
459 	 */
460 	if (atomic_read(&cvp->cv_waiters) > 0)
461 		wake_up_all(&cvp->cv_event);
462 
463 	atomic_dec(&cvp->cv_refs);
464 }
465 EXPORT_SYMBOL(__cv_broadcast);
466