1 /*
2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
3 * Copyright (C) 2007 The Regents of the University of California.
4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
6 * UCRL-CODE-235197
7 *
8 * This file is part of the SPL, Solaris Porting Layer.
9 *
10 * The SPL is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2 of the License, or (at your
13 * option) any later version.
14 *
15 * The SPL is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 * for more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
22 *
23 * Solaris Porting Layer (SPL) Condition Variables Implementation.
24 */
25
26 #include <sys/condvar.h>
27 #include <sys/time.h>
28 #include <sys/sysmacros.h>
29 #include <linux/hrtimer.h>
30 #include <linux/compiler_compat.h>
31 #include <linux/mod_compat.h>
32
33 #include <linux/sched.h>
34 #include <linux/sched/signal.h>
35
36 #define MAX_HRTIMEOUT_SLACK_US 1000
37 static unsigned int spl_schedule_hrtimeout_slack_us = 0;
38
39 static int
param_set_hrtimeout_slack(const char * buf,zfs_kernel_param_t * kp)40 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
41 {
42 unsigned long val;
43 int error;
44
45 error = kstrtoul(buf, 0, &val);
46 if (error)
47 return (error);
48
49 if (val > MAX_HRTIMEOUT_SLACK_US)
50 return (-EINVAL);
51
52 error = param_set_uint(buf, kp);
53 if (error < 0)
54 return (error);
55
56 return (0);
57 }
58
59 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
60 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
61 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
62 "schedule_hrtimeout_range() delta/slack value in us, default(0)");
63
64 void
__cv_init(kcondvar_t * cvp,char * name,kcv_type_t type,void * arg)65 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
66 {
67 ASSERT(cvp);
68 ASSERT(name == NULL);
69 ASSERT(type == CV_DEFAULT);
70 ASSERT(arg == NULL);
71
72 cvp->cv_magic = CV_MAGIC;
73 init_waitqueue_head(&cvp->cv_event);
74 init_waitqueue_head(&cvp->cv_destroy);
75 atomic_set(&cvp->cv_waiters, 0);
76 atomic_set(&cvp->cv_refs, 1);
77 cvp->cv_mutex = NULL;
78 }
79 EXPORT_SYMBOL(__cv_init);
80
81 static int
cv_destroy_wakeup(kcondvar_t * cvp)82 cv_destroy_wakeup(kcondvar_t *cvp)
83 {
84 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
85 ASSERT(cvp->cv_mutex == NULL);
86 ASSERT(!waitqueue_active(&cvp->cv_event));
87 return (1);
88 }
89
90 return (0);
91 }
92
93 void
__cv_destroy(kcondvar_t * cvp)94 __cv_destroy(kcondvar_t *cvp)
95 {
96 ASSERT(cvp);
97 ASSERT(cvp->cv_magic == CV_MAGIC);
98
99 cvp->cv_magic = CV_DESTROY;
100 atomic_dec(&cvp->cv_refs);
101
102 /* Block until all waiters are woken and references dropped. */
103 while (cv_destroy_wakeup(cvp) == 0)
104 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
105
106 ASSERT3P(cvp->cv_mutex, ==, NULL);
107 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
108 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
109 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
110 }
111 EXPORT_SYMBOL(__cv_destroy);
112
113 static void
cv_wait_common(kcondvar_t * cvp,kmutex_t * mp,int state,int io)114 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
115 {
116 DEFINE_WAIT(wait);
117 kmutex_t *m;
118
119 ASSERT(cvp);
120 ASSERT(mp);
121 ASSERT(cvp->cv_magic == CV_MAGIC);
122 ASSERT(mutex_owned(mp));
123 atomic_inc(&cvp->cv_refs);
124
125 m = READ_ONCE(cvp->cv_mutex);
126 if (!m)
127 m = xchg(&cvp->cv_mutex, mp);
128 /* Ensure the same mutex is used by all callers */
129 ASSERT(m == NULL || m == mp);
130
131 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
132 atomic_inc(&cvp->cv_waiters);
133
134 /*
135 * Mutex should be dropped after prepare_to_wait() this
136 * ensures we're linked in to the waiters list and avoids the
137 * race where 'cvp->cv_waiters > 0' but the list is empty.
138 */
139 mutex_exit(mp);
140 if (io)
141 io_schedule();
142 else
143 schedule();
144
145 /* No more waiters a different mutex could be used */
146 if (atomic_dec_and_test(&cvp->cv_waiters)) {
147 /*
148 * This is set without any lock, so it's racy. But this is
149 * just for debug anyway, so make it best-effort
150 */
151 cvp->cv_mutex = NULL;
152 wake_up(&cvp->cv_destroy);
153 }
154
155 finish_wait(&cvp->cv_event, &wait);
156 atomic_dec(&cvp->cv_refs);
157
158 /*
159 * Hold mutex after we release the cvp, otherwise we could dead lock
160 * with a thread holding the mutex and call cv_destroy.
161 */
162 mutex_enter(mp);
163 }
164
165 void
__cv_wait(kcondvar_t * cvp,kmutex_t * mp)166 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
167 {
168 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
169 }
170 EXPORT_SYMBOL(__cv_wait);
171
172 void
__cv_wait_io(kcondvar_t * cvp,kmutex_t * mp)173 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
174 {
175 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
176 }
177 EXPORT_SYMBOL(__cv_wait_io);
178
179 int
__cv_wait_io_sig(kcondvar_t * cvp,kmutex_t * mp)180 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
181 {
182 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
183
184 return (signal_pending(current) ? 0 : 1);
185 }
186 EXPORT_SYMBOL(__cv_wait_io_sig);
187
188 int
__cv_wait_sig(kcondvar_t * cvp,kmutex_t * mp)189 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
190 {
191 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
192
193 return (signal_pending(current) ? 0 : 1);
194 }
195 EXPORT_SYMBOL(__cv_wait_sig);
196
197 void
__cv_wait_idle(kcondvar_t * cvp,kmutex_t * mp)198 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
199 {
200 sigset_t blocked, saved;
201
202 sigfillset(&blocked);
203 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
204 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
205 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
206 }
207 EXPORT_SYMBOL(__cv_wait_idle);
208
209 /*
210 * 'expire_time' argument is an absolute wall clock time in jiffies.
211 * Return value is time left (expire_time - now) or -1 if timeout occurred.
212 */
213 static clock_t
__cv_timedwait_common(kcondvar_t * cvp,kmutex_t * mp,clock_t expire_time,int state,int io)214 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
215 int state, int io)
216 {
217 DEFINE_WAIT(wait);
218 kmutex_t *m;
219 clock_t time_left;
220
221 ASSERT(cvp);
222 ASSERT(mp);
223 ASSERT(cvp->cv_magic == CV_MAGIC);
224 ASSERT(mutex_owned(mp));
225
226 /* XXX - Does not handle jiffie wrap properly */
227 time_left = expire_time - jiffies;
228 if (time_left <= 0)
229 return (-1);
230
231 atomic_inc(&cvp->cv_refs);
232 m = READ_ONCE(cvp->cv_mutex);
233 if (!m)
234 m = xchg(&cvp->cv_mutex, mp);
235 /* Ensure the same mutex is used by all callers */
236 ASSERT(m == NULL || m == mp);
237
238 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
239 atomic_inc(&cvp->cv_waiters);
240
241 /*
242 * Mutex should be dropped after prepare_to_wait() this
243 * ensures we're linked in to the waiters list and avoids the
244 * race where 'cvp->cv_waiters > 0' but the list is empty.
245 */
246 mutex_exit(mp);
247 if (io)
248 time_left = io_schedule_timeout(time_left);
249 else
250 time_left = schedule_timeout(time_left);
251
252 /* No more waiters a different mutex could be used */
253 if (atomic_dec_and_test(&cvp->cv_waiters)) {
254 /*
255 * This is set without any lock, so it's racy. But this is
256 * just for debug anyway, so make it best-effort
257 */
258 cvp->cv_mutex = NULL;
259 wake_up(&cvp->cv_destroy);
260 }
261
262 finish_wait(&cvp->cv_event, &wait);
263 atomic_dec(&cvp->cv_refs);
264
265 /*
266 * Hold mutex after we release the cvp, otherwise we could dead lock
267 * with a thread holding the mutex and call cv_destroy.
268 */
269 mutex_enter(mp);
270 return (time_left > 0 ? 1 : -1);
271 }
272
273 int
__cv_timedwait(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)274 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
275 {
276 return (__cv_timedwait_common(cvp, mp, exp_time,
277 TASK_UNINTERRUPTIBLE, 0));
278 }
279 EXPORT_SYMBOL(__cv_timedwait);
280
281 int
__cv_timedwait_io(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)282 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
283 {
284 return (__cv_timedwait_common(cvp, mp, exp_time,
285 TASK_UNINTERRUPTIBLE, 1));
286 }
287 EXPORT_SYMBOL(__cv_timedwait_io);
288
289 int
__cv_timedwait_sig(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)290 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
291 {
292 int rc;
293
294 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
295 return (signal_pending(current) ? 0 : rc);
296 }
297 EXPORT_SYMBOL(__cv_timedwait_sig);
298
299 int
__cv_timedwait_idle(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)300 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
301 {
302 sigset_t blocked, saved;
303 int rc;
304
305 sigfillset(&blocked);
306 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
307 rc = __cv_timedwait_common(cvp, mp, exp_time,
308 TASK_INTERRUPTIBLE, 0);
309 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
310
311 return (rc);
312 }
313 EXPORT_SYMBOL(__cv_timedwait_idle);
314 /*
315 * 'expire_time' argument is an absolute clock time in nanoseconds.
316 * Return value is time left (expire_time - now) or -1 if timeout occurred.
317 */
318 static clock_t
__cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t expire_time,hrtime_t res,int state)319 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
320 hrtime_t res, int state)
321 {
322 DEFINE_WAIT(wait);
323 kmutex_t *m;
324 hrtime_t time_left;
325 ktime_t ktime_left;
326 u64 slack = 0;
327 int rc;
328
329 ASSERT(cvp);
330 ASSERT(mp);
331 ASSERT(cvp->cv_magic == CV_MAGIC);
332 ASSERT(mutex_owned(mp));
333
334 time_left = expire_time - gethrtime();
335 if (time_left <= 0)
336 return (-1);
337
338 atomic_inc(&cvp->cv_refs);
339 m = READ_ONCE(cvp->cv_mutex);
340 if (!m)
341 m = xchg(&cvp->cv_mutex, mp);
342 /* Ensure the same mutex is used by all callers */
343 ASSERT(m == NULL || m == mp);
344
345 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
346 atomic_inc(&cvp->cv_waiters);
347
348 /*
349 * Mutex should be dropped after prepare_to_wait() this
350 * ensures we're linked in to the waiters list and avoids the
351 * race where 'cvp->cv_waiters > 0' but the list is empty.
352 */
353 mutex_exit(mp);
354
355 ktime_left = ktime_set(0, time_left);
356 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
357 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
358 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
359
360 /* No more waiters a different mutex could be used */
361 if (atomic_dec_and_test(&cvp->cv_waiters)) {
362 /*
363 * This is set without any lock, so it's racy. But this is
364 * just for debug anyway, so make it best-effort
365 */
366 cvp->cv_mutex = NULL;
367 wake_up(&cvp->cv_destroy);
368 }
369
370 finish_wait(&cvp->cv_event, &wait);
371 atomic_dec(&cvp->cv_refs);
372
373 mutex_enter(mp);
374 return (rc == -EINTR ? 1 : -1);
375 }
376
377 /*
378 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
379 */
380 static int
cv_timedwait_hires_common(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag,int state)381 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
382 hrtime_t res, int flag, int state)
383 {
384 if (!(flag & CALLOUT_FLAG_ABSOLUTE))
385 tim += gethrtime();
386
387 return (__cv_timedwait_hires(cvp, mp, tim, res, state));
388 }
389
390 int
cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)391 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
392 int flag)
393 {
394 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
395 TASK_UNINTERRUPTIBLE));
396 }
397 EXPORT_SYMBOL(cv_timedwait_hires);
398
399 int
cv_timedwait_sig_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)400 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
401 hrtime_t res, int flag)
402 {
403 int rc;
404
405 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
406 TASK_INTERRUPTIBLE);
407 return (signal_pending(current) ? 0 : rc);
408 }
409 EXPORT_SYMBOL(cv_timedwait_sig_hires);
410
411 int
cv_timedwait_idle_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)412 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
413 hrtime_t res, int flag)
414 {
415 sigset_t blocked, saved;
416 int rc;
417
418 sigfillset(&blocked);
419 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
420 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
421 TASK_INTERRUPTIBLE);
422 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
423
424 return (rc);
425 }
426 EXPORT_SYMBOL(cv_timedwait_idle_hires);
427
428 void
__cv_signal(kcondvar_t * cvp)429 __cv_signal(kcondvar_t *cvp)
430 {
431 ASSERT(cvp);
432 ASSERT(cvp->cv_magic == CV_MAGIC);
433 atomic_inc(&cvp->cv_refs);
434
435 /*
436 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
437 * waiter will be set runnable with each call to wake_up().
438 * Additionally wake_up() holds a spin_lock associated with
439 * the wait queue to ensure we don't race waking up processes.
440 */
441 if (atomic_read(&cvp->cv_waiters) > 0)
442 wake_up(&cvp->cv_event);
443
444 atomic_dec(&cvp->cv_refs);
445 }
446 EXPORT_SYMBOL(__cv_signal);
447
448 void
__cv_broadcast(kcondvar_t * cvp)449 __cv_broadcast(kcondvar_t *cvp)
450 {
451 ASSERT(cvp);
452 ASSERT(cvp->cv_magic == CV_MAGIC);
453 atomic_inc(&cvp->cv_refs);
454
455 /*
456 * Wake_up_all() will wake up all waiters even those which
457 * have the WQ_FLAG_EXCLUSIVE flag set.
458 */
459 if (atomic_read(&cvp->cv_waiters) > 0)
460 wake_up_all(&cvp->cv_event);
461
462 atomic_dec(&cvp->cv_refs);
463 }
464 EXPORT_SYMBOL(__cv_broadcast);
465