xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-condvar.c (revision 8833aad7befe90716c7526ce6858344ba635582f)
1 /*
2  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3  *  Copyright (C) 2007 The Regents of the University of California.
4  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6  *  UCRL-CODE-235197
7  *
8  *  This file is part of the SPL, Solaris Porting Layer.
9  *  For details, see <http://zfsonlinux.org/>.
10  *
11  *  The SPL is free software; you can redistribute it and/or modify it
12  *  under the terms of the GNU General Public License as published by the
13  *  Free Software Foundation; either version 2 of the License, or (at your
14  *  option) any later version.
15  *
16  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
17  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19  *  for more details.
20  *
21  *  You should have received a copy of the GNU General Public License along
22  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
23  *
24  *  Solaris Porting Layer (SPL) Credential Implementation.
25  */
26 
27 #include <sys/condvar.h>
28 #include <sys/time.h>
29 #include <sys/sysmacros.h>
30 #include <linux/hrtimer.h>
31 #include <linux/compiler_compat.h>
32 #include <linux/mod_compat.h>
33 
34 #include <linux/sched.h>
35 
36 #ifdef HAVE_SCHED_SIGNAL_HEADER
37 #include <linux/sched/signal.h>
38 #endif
39 
40 #define	MAX_HRTIMEOUT_SLACK_US	1000
41 unsigned int spl_schedule_hrtimeout_slack_us = 0;
42 
43 static int
44 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
45 {
46 	unsigned long val;
47 	int error;
48 
49 	error = kstrtoul(buf, 0, &val);
50 	if (error)
51 		return (error);
52 
53 	if (val > MAX_HRTIMEOUT_SLACK_US)
54 		return (-EINVAL);
55 
56 	error = param_set_uint(buf, kp);
57 	if (error < 0)
58 		return (error);
59 
60 	return (0);
61 }
62 
63 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
64 	param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
65 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
66 	"schedule_hrtimeout_range() delta/slack value in us, default(0)");
67 
68 void
69 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
70 {
71 	ASSERT(cvp);
72 	ASSERT(name == NULL);
73 	ASSERT(type == CV_DEFAULT);
74 	ASSERT(arg == NULL);
75 
76 	cvp->cv_magic = CV_MAGIC;
77 	init_waitqueue_head(&cvp->cv_event);
78 	init_waitqueue_head(&cvp->cv_destroy);
79 	atomic_set(&cvp->cv_waiters, 0);
80 	atomic_set(&cvp->cv_refs, 1);
81 	cvp->cv_mutex = NULL;
82 }
83 EXPORT_SYMBOL(__cv_init);
84 
85 static int
86 cv_destroy_wakeup(kcondvar_t *cvp)
87 {
88 	if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
89 		ASSERT(cvp->cv_mutex == NULL);
90 		ASSERT(!waitqueue_active(&cvp->cv_event));
91 		return (1);
92 	}
93 
94 	return (0);
95 }
96 
97 void
98 __cv_destroy(kcondvar_t *cvp)
99 {
100 	ASSERT(cvp);
101 	ASSERT(cvp->cv_magic == CV_MAGIC);
102 
103 	cvp->cv_magic = CV_DESTROY;
104 	atomic_dec(&cvp->cv_refs);
105 
106 	/* Block until all waiters are woken and references dropped. */
107 	while (cv_destroy_wakeup(cvp) == 0)
108 		wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
109 
110 	ASSERT3P(cvp->cv_mutex, ==, NULL);
111 	ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
112 	ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
113 	ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
114 }
115 EXPORT_SYMBOL(__cv_destroy);
116 
117 static void
118 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
119 {
120 	DEFINE_WAIT(wait);
121 	kmutex_t *m;
122 
123 	ASSERT(cvp);
124 	ASSERT(mp);
125 	ASSERT(cvp->cv_magic == CV_MAGIC);
126 	ASSERT(mutex_owned(mp));
127 	atomic_inc(&cvp->cv_refs);
128 
129 	m = READ_ONCE(cvp->cv_mutex);
130 	if (!m)
131 		m = xchg(&cvp->cv_mutex, mp);
132 	/* Ensure the same mutex is used by all callers */
133 	ASSERT(m == NULL || m == mp);
134 
135 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
136 	atomic_inc(&cvp->cv_waiters);
137 
138 	/*
139 	 * Mutex should be dropped after prepare_to_wait() this
140 	 * ensures we're linked in to the waiters list and avoids the
141 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
142 	 */
143 	mutex_exit(mp);
144 	if (io)
145 		io_schedule();
146 	else
147 		schedule();
148 
149 	/* No more waiters a different mutex could be used */
150 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
151 		/*
152 		 * This is set without any lock, so it's racy. But this is
153 		 * just for debug anyway, so make it best-effort
154 		 */
155 		cvp->cv_mutex = NULL;
156 		wake_up(&cvp->cv_destroy);
157 	}
158 
159 	finish_wait(&cvp->cv_event, &wait);
160 	atomic_dec(&cvp->cv_refs);
161 
162 	/*
163 	 * Hold mutex after we release the cvp, otherwise we could dead lock
164 	 * with a thread holding the mutex and call cv_destroy.
165 	 */
166 	mutex_enter(mp);
167 }
168 
169 void
170 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
171 {
172 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
173 }
174 EXPORT_SYMBOL(__cv_wait);
175 
176 void
177 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
178 {
179 	cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
180 }
181 EXPORT_SYMBOL(__cv_wait_io);
182 
183 int
184 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
185 {
186 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
187 
188 	return (signal_pending(current) ? 0 : 1);
189 }
190 EXPORT_SYMBOL(__cv_wait_io_sig);
191 
192 int
193 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
194 {
195 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
196 
197 	return (signal_pending(current) ? 0 : 1);
198 }
199 EXPORT_SYMBOL(__cv_wait_sig);
200 
201 void
202 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
203 {
204 	sigset_t blocked, saved;
205 
206 	sigfillset(&blocked);
207 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
208 	cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
209 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
210 }
211 EXPORT_SYMBOL(__cv_wait_idle);
212 
213 #if defined(HAVE_IO_SCHEDULE_TIMEOUT)
214 #define	spl_io_schedule_timeout(t)	io_schedule_timeout(t)
215 #else
216 
217 struct spl_task_timer {
218 	struct timer_list timer;
219 	struct task_struct *task;
220 };
221 
222 static void
223 __cv_wakeup(spl_timer_list_t t)
224 {
225 	struct timer_list *tmr = (struct timer_list *)t;
226 	struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer);
227 
228 	wake_up_process(task_timer->task);
229 }
230 
231 static long
232 spl_io_schedule_timeout(long time_left)
233 {
234 	long expire_time = jiffies + time_left;
235 	struct spl_task_timer task_timer;
236 	struct timer_list *timer = &task_timer.timer;
237 
238 	task_timer.task = current;
239 
240 	timer_setup(timer, __cv_wakeup, 0);
241 
242 	timer->expires = expire_time;
243 	add_timer(timer);
244 
245 	io_schedule();
246 
247 	del_timer_sync(timer);
248 
249 	time_left = expire_time - jiffies;
250 
251 	return (time_left < 0 ? 0 : time_left);
252 }
253 #endif
254 
255 /*
256  * 'expire_time' argument is an absolute wall clock time in jiffies.
257  * Return value is time left (expire_time - now) or -1 if timeout occurred.
258  */
259 static clock_t
260 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
261     int state, int io)
262 {
263 	DEFINE_WAIT(wait);
264 	kmutex_t *m;
265 	clock_t time_left;
266 
267 	ASSERT(cvp);
268 	ASSERT(mp);
269 	ASSERT(cvp->cv_magic == CV_MAGIC);
270 	ASSERT(mutex_owned(mp));
271 
272 	/* XXX - Does not handle jiffie wrap properly */
273 	time_left = expire_time - jiffies;
274 	if (time_left <= 0)
275 		return (-1);
276 
277 	atomic_inc(&cvp->cv_refs);
278 	m = READ_ONCE(cvp->cv_mutex);
279 	if (!m)
280 		m = xchg(&cvp->cv_mutex, mp);
281 	/* Ensure the same mutex is used by all callers */
282 	ASSERT(m == NULL || m == mp);
283 
284 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
285 	atomic_inc(&cvp->cv_waiters);
286 
287 	/*
288 	 * Mutex should be dropped after prepare_to_wait() this
289 	 * ensures we're linked in to the waiters list and avoids the
290 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
291 	 */
292 	mutex_exit(mp);
293 	if (io)
294 		time_left = spl_io_schedule_timeout(time_left);
295 	else
296 		time_left = schedule_timeout(time_left);
297 
298 	/* No more waiters a different mutex could be used */
299 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
300 		/*
301 		 * This is set without any lock, so it's racy. But this is
302 		 * just for debug anyway, so make it best-effort
303 		 */
304 		cvp->cv_mutex = NULL;
305 		wake_up(&cvp->cv_destroy);
306 	}
307 
308 	finish_wait(&cvp->cv_event, &wait);
309 	atomic_dec(&cvp->cv_refs);
310 
311 	/*
312 	 * Hold mutex after we release the cvp, otherwise we could dead lock
313 	 * with a thread holding the mutex and call cv_destroy.
314 	 */
315 	mutex_enter(mp);
316 	return (time_left > 0 ? 1 : -1);
317 }
318 
319 int
320 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
321 {
322 	return (__cv_timedwait_common(cvp, mp, exp_time,
323 	    TASK_UNINTERRUPTIBLE, 0));
324 }
325 EXPORT_SYMBOL(__cv_timedwait);
326 
327 int
328 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
329 {
330 	return (__cv_timedwait_common(cvp, mp, exp_time,
331 	    TASK_UNINTERRUPTIBLE, 1));
332 }
333 EXPORT_SYMBOL(__cv_timedwait_io);
334 
335 int
336 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
337 {
338 	int rc;
339 
340 	rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
341 	return (signal_pending(current) ? 0 : rc);
342 }
343 EXPORT_SYMBOL(__cv_timedwait_sig);
344 
345 int
346 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
347 {
348 	sigset_t blocked, saved;
349 	int rc;
350 
351 	sigfillset(&blocked);
352 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
353 	rc = __cv_timedwait_common(cvp, mp, exp_time,
354 	    TASK_INTERRUPTIBLE, 0);
355 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
356 
357 	return (rc);
358 }
359 EXPORT_SYMBOL(__cv_timedwait_idle);
360 /*
361  * 'expire_time' argument is an absolute clock time in nanoseconds.
362  * Return value is time left (expire_time - now) or -1 if timeout occurred.
363  */
364 static clock_t
365 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
366     hrtime_t res, int state)
367 {
368 	DEFINE_WAIT(wait);
369 	kmutex_t *m;
370 	hrtime_t time_left;
371 	ktime_t ktime_left;
372 	u64 slack = 0;
373 	int rc;
374 
375 	ASSERT(cvp);
376 	ASSERT(mp);
377 	ASSERT(cvp->cv_magic == CV_MAGIC);
378 	ASSERT(mutex_owned(mp));
379 
380 	time_left = expire_time - gethrtime();
381 	if (time_left <= 0)
382 		return (-1);
383 
384 	atomic_inc(&cvp->cv_refs);
385 	m = READ_ONCE(cvp->cv_mutex);
386 	if (!m)
387 		m = xchg(&cvp->cv_mutex, mp);
388 	/* Ensure the same mutex is used by all callers */
389 	ASSERT(m == NULL || m == mp);
390 
391 	prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
392 	atomic_inc(&cvp->cv_waiters);
393 
394 	/*
395 	 * Mutex should be dropped after prepare_to_wait() this
396 	 * ensures we're linked in to the waiters list and avoids the
397 	 * race where 'cvp->cv_waiters > 0' but the list is empty.
398 	 */
399 	mutex_exit(mp);
400 
401 	ktime_left = ktime_set(0, time_left);
402 	slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
403 	    MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
404 	rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
405 
406 	/* No more waiters a different mutex could be used */
407 	if (atomic_dec_and_test(&cvp->cv_waiters)) {
408 		/*
409 		 * This is set without any lock, so it's racy. But this is
410 		 * just for debug anyway, so make it best-effort
411 		 */
412 		cvp->cv_mutex = NULL;
413 		wake_up(&cvp->cv_destroy);
414 	}
415 
416 	finish_wait(&cvp->cv_event, &wait);
417 	atomic_dec(&cvp->cv_refs);
418 
419 	mutex_enter(mp);
420 	return (rc == -EINTR ? 1 : -1);
421 }
422 
423 /*
424  * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
425  */
426 static int
427 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
428     hrtime_t res, int flag, int state)
429 {
430 	if (!(flag & CALLOUT_FLAG_ABSOLUTE))
431 		tim += gethrtime();
432 
433 	return (__cv_timedwait_hires(cvp, mp, tim, res, state));
434 }
435 
436 int
437 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
438     int flag)
439 {
440 	return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
441 	    TASK_UNINTERRUPTIBLE));
442 }
443 EXPORT_SYMBOL(cv_timedwait_hires);
444 
445 int
446 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
447     hrtime_t res, int flag)
448 {
449 	int rc;
450 
451 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
452 	    TASK_INTERRUPTIBLE);
453 	return (signal_pending(current) ? 0 : rc);
454 }
455 EXPORT_SYMBOL(cv_timedwait_sig_hires);
456 
457 int
458 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
459     hrtime_t res, int flag)
460 {
461 	sigset_t blocked, saved;
462 	int rc;
463 
464 	sigfillset(&blocked);
465 	(void) sigprocmask(SIG_BLOCK, &blocked, &saved);
466 	rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
467 	    TASK_INTERRUPTIBLE);
468 	(void) sigprocmask(SIG_SETMASK, &saved, NULL);
469 
470 	return (rc);
471 }
472 EXPORT_SYMBOL(cv_timedwait_idle_hires);
473 
474 void
475 __cv_signal(kcondvar_t *cvp)
476 {
477 	ASSERT(cvp);
478 	ASSERT(cvp->cv_magic == CV_MAGIC);
479 	atomic_inc(&cvp->cv_refs);
480 
481 	/*
482 	 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
483 	 * waiter will be set runnable with each call to wake_up().
484 	 * Additionally wake_up() holds a spin_lock associated with
485 	 * the wait queue to ensure we don't race waking up processes.
486 	 */
487 	if (atomic_read(&cvp->cv_waiters) > 0)
488 		wake_up(&cvp->cv_event);
489 
490 	atomic_dec(&cvp->cv_refs);
491 }
492 EXPORT_SYMBOL(__cv_signal);
493 
494 void
495 __cv_broadcast(kcondvar_t *cvp)
496 {
497 	ASSERT(cvp);
498 	ASSERT(cvp->cv_magic == CV_MAGIC);
499 	atomic_inc(&cvp->cv_refs);
500 
501 	/*
502 	 * Wake_up_all() will wake up all waiters even those which
503 	 * have the WQ_FLAG_EXCLUSIVE flag set.
504 	 */
505 	if (atomic_read(&cvp->cv_waiters) > 0)
506 		wake_up_all(&cvp->cv_event);
507 
508 	atomic_dec(&cvp->cv_refs);
509 }
510 EXPORT_SYMBOL(__cv_broadcast);
511