1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
4 * Copyright (C) 2007 The Regents of the University of California.
5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6 * Written by Brian Behlendorf <behlendorf1@llnl.gov>.
7 * UCRL-CODE-235197
8 *
9 * This file is part of the SPL, Solaris Porting Layer.
10 *
11 * The SPL is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2 of the License, or (at your
14 * option) any later version.
15 *
16 * The SPL is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 * for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with the SPL. If not, see <http://www.gnu.org/licenses/>.
23 *
24 * Solaris Porting Layer (SPL) Condition Variables Implementation.
25 */
26
27 #include <sys/condvar.h>
28 #include <sys/time.h>
29 #include <sys/sysmacros.h>
30 #include <linux/hrtimer.h>
31 #include <linux/compiler_compat.h>
32 #include <linux/mod_compat.h>
33
34 #include <linux/sched.h>
35 #include <linux/sched/signal.h>
36
37 #define MAX_HRTIMEOUT_SLACK_US 1000
38 static unsigned int spl_schedule_hrtimeout_slack_us = 0;
39
40 static int
param_set_hrtimeout_slack(const char * buf,zfs_kernel_param_t * kp)41 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp)
42 {
43 unsigned long val;
44 int error;
45
46 error = kstrtoul(buf, 0, &val);
47 if (error)
48 return (error);
49
50 if (val > MAX_HRTIMEOUT_SLACK_US)
51 return (-EINVAL);
52
53 error = param_set_uint(buf, kp);
54 if (error < 0)
55 return (error);
56
57 return (0);
58 }
59
60 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack,
61 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644);
62 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us,
63 "schedule_hrtimeout_range() delta/slack value in us, default(0)");
64
65 void
__cv_init(kcondvar_t * cvp,char * name,kcv_type_t type,void * arg)66 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
67 {
68 ASSERT(cvp);
69 ASSERT(name == NULL);
70 ASSERT(type == CV_DEFAULT);
71 ASSERT(arg == NULL);
72
73 cvp->cv_magic = CV_MAGIC;
74 init_waitqueue_head(&cvp->cv_event);
75 init_waitqueue_head(&cvp->cv_destroy);
76 atomic_set(&cvp->cv_waiters, 0);
77 atomic_set(&cvp->cv_refs, 1);
78 cvp->cv_mutex = NULL;
79 }
80 EXPORT_SYMBOL(__cv_init);
81
82 static int
cv_destroy_wakeup(kcondvar_t * cvp)83 cv_destroy_wakeup(kcondvar_t *cvp)
84 {
85 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) {
86 ASSERT(cvp->cv_mutex == NULL);
87 ASSERT(!waitqueue_active(&cvp->cv_event));
88 return (1);
89 }
90
91 return (0);
92 }
93
94 void
__cv_destroy(kcondvar_t * cvp)95 __cv_destroy(kcondvar_t *cvp)
96 {
97 ASSERT(cvp);
98 ASSERT(cvp->cv_magic == CV_MAGIC);
99
100 cvp->cv_magic = CV_DESTROY;
101 atomic_dec(&cvp->cv_refs);
102
103 /* Block until all waiters are woken and references dropped. */
104 while (cv_destroy_wakeup(cvp) == 0)
105 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1);
106
107 ASSERT3P(cvp->cv_mutex, ==, NULL);
108 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0);
109 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0);
110 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0);
111 }
112 EXPORT_SYMBOL(__cv_destroy);
113
114 static void
cv_wait_common(kcondvar_t * cvp,kmutex_t * mp,int state,int io)115 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io)
116 {
117 DEFINE_WAIT(wait);
118 kmutex_t *m;
119
120 ASSERT(cvp);
121 ASSERT(mp);
122 ASSERT(cvp->cv_magic == CV_MAGIC);
123 ASSERT(mutex_owned(mp));
124 atomic_inc(&cvp->cv_refs);
125
126 m = READ_ONCE(cvp->cv_mutex);
127 if (!m)
128 m = xchg(&cvp->cv_mutex, mp);
129 /* Ensure the same mutex is used by all callers */
130 ASSERT(m == NULL || m == mp);
131
132 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
133 atomic_inc(&cvp->cv_waiters);
134
135 /*
136 * Mutex should be dropped after prepare_to_wait() this
137 * ensures we're linked in to the waiters list and avoids the
138 * race where 'cvp->cv_waiters > 0' but the list is empty.
139 */
140 mutex_exit(mp);
141 if (io)
142 io_schedule();
143 else
144 schedule();
145
146 /* No more waiters a different mutex could be used */
147 if (atomic_dec_and_test(&cvp->cv_waiters)) {
148 /*
149 * This is set without any lock, so it's racy. But this is
150 * just for debug anyway, so make it best-effort
151 */
152 cvp->cv_mutex = NULL;
153 wake_up(&cvp->cv_destroy);
154 }
155
156 finish_wait(&cvp->cv_event, &wait);
157 atomic_dec(&cvp->cv_refs);
158
159 /*
160 * Hold mutex after we release the cvp, otherwise we could dead lock
161 * with a thread holding the mutex and call cv_destroy.
162 */
163 mutex_enter(mp);
164 }
165
166 void
__cv_wait(kcondvar_t * cvp,kmutex_t * mp)167 __cv_wait(kcondvar_t *cvp, kmutex_t *mp)
168 {
169 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0);
170 }
171 EXPORT_SYMBOL(__cv_wait);
172
173 void
__cv_wait_io(kcondvar_t * cvp,kmutex_t * mp)174 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp)
175 {
176 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1);
177 }
178 EXPORT_SYMBOL(__cv_wait_io);
179
180 int
__cv_wait_io_sig(kcondvar_t * cvp,kmutex_t * mp)181 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp)
182 {
183 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1);
184
185 return (signal_pending(current) ? 0 : 1);
186 }
187 EXPORT_SYMBOL(__cv_wait_io_sig);
188
189 int
__cv_wait_sig(kcondvar_t * cvp,kmutex_t * mp)190 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp)
191 {
192 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
193
194 return (signal_pending(current) ? 0 : 1);
195 }
196 EXPORT_SYMBOL(__cv_wait_sig);
197
198 void
__cv_wait_idle(kcondvar_t * cvp,kmutex_t * mp)199 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp)
200 {
201 sigset_t blocked, saved;
202
203 sigfillset(&blocked);
204 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
205 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0);
206 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
207 }
208 EXPORT_SYMBOL(__cv_wait_idle);
209
210 /*
211 * 'expire_time' argument is an absolute wall clock time in jiffies.
212 * Return value is time left (expire_time - now) or -1 if timeout occurred.
213 */
214 static clock_t
__cv_timedwait_common(kcondvar_t * cvp,kmutex_t * mp,clock_t expire_time,int state,int io)215 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
216 int state, int io)
217 {
218 DEFINE_WAIT(wait);
219 kmutex_t *m;
220 clock_t time_left;
221
222 ASSERT(cvp);
223 ASSERT(mp);
224 ASSERT(cvp->cv_magic == CV_MAGIC);
225 ASSERT(mutex_owned(mp));
226
227 /* XXX - Does not handle jiffie wrap properly */
228 time_left = expire_time - jiffies;
229 if (time_left <= 0)
230 return (-1);
231
232 atomic_inc(&cvp->cv_refs);
233 m = READ_ONCE(cvp->cv_mutex);
234 if (!m)
235 m = xchg(&cvp->cv_mutex, mp);
236 /* Ensure the same mutex is used by all callers */
237 ASSERT(m == NULL || m == mp);
238
239 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
240 atomic_inc(&cvp->cv_waiters);
241
242 /*
243 * Mutex should be dropped after prepare_to_wait() this
244 * ensures we're linked in to the waiters list and avoids the
245 * race where 'cvp->cv_waiters > 0' but the list is empty.
246 */
247 mutex_exit(mp);
248 if (io)
249 time_left = io_schedule_timeout(time_left);
250 else
251 time_left = schedule_timeout(time_left);
252
253 /* No more waiters a different mutex could be used */
254 if (atomic_dec_and_test(&cvp->cv_waiters)) {
255 /*
256 * This is set without any lock, so it's racy. But this is
257 * just for debug anyway, so make it best-effort
258 */
259 cvp->cv_mutex = NULL;
260 wake_up(&cvp->cv_destroy);
261 }
262
263 finish_wait(&cvp->cv_event, &wait);
264 atomic_dec(&cvp->cv_refs);
265
266 /*
267 * Hold mutex after we release the cvp, otherwise we could dead lock
268 * with a thread holding the mutex and call cv_destroy.
269 */
270 mutex_enter(mp);
271 return (time_left > 0 ? 1 : -1);
272 }
273
274 int
__cv_timedwait(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)275 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
276 {
277 return (__cv_timedwait_common(cvp, mp, exp_time,
278 TASK_UNINTERRUPTIBLE, 0));
279 }
280 EXPORT_SYMBOL(__cv_timedwait);
281
282 int
__cv_timedwait_io(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)283 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
284 {
285 return (__cv_timedwait_common(cvp, mp, exp_time,
286 TASK_UNINTERRUPTIBLE, 1));
287 }
288 EXPORT_SYMBOL(__cv_timedwait_io);
289
290 int
__cv_timedwait_sig(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)291 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
292 {
293 int rc;
294
295 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0);
296 return (signal_pending(current) ? 0 : rc);
297 }
298 EXPORT_SYMBOL(__cv_timedwait_sig);
299
300 int
__cv_timedwait_idle(kcondvar_t * cvp,kmutex_t * mp,clock_t exp_time)301 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time)
302 {
303 sigset_t blocked, saved;
304 int rc;
305
306 sigfillset(&blocked);
307 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
308 rc = __cv_timedwait_common(cvp, mp, exp_time,
309 TASK_INTERRUPTIBLE, 0);
310 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
311
312 return (rc);
313 }
314 EXPORT_SYMBOL(__cv_timedwait_idle);
315 /*
316 * 'expire_time' argument is an absolute clock time in nanoseconds.
317 * Return value is time left (expire_time - now) or -1 if timeout occurred.
318 */
319 static clock_t
__cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t expire_time,hrtime_t res,int state)320 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
321 hrtime_t res, int state)
322 {
323 DEFINE_WAIT(wait);
324 kmutex_t *m;
325 hrtime_t time_left;
326 ktime_t ktime_left;
327 u64 slack = 0;
328 int rc;
329
330 ASSERT(cvp);
331 ASSERT(mp);
332 ASSERT(cvp->cv_magic == CV_MAGIC);
333 ASSERT(mutex_owned(mp));
334
335 time_left = expire_time - gethrtime();
336 if (time_left <= 0)
337 return (-1);
338
339 atomic_inc(&cvp->cv_refs);
340 m = READ_ONCE(cvp->cv_mutex);
341 if (!m)
342 m = xchg(&cvp->cv_mutex, mp);
343 /* Ensure the same mutex is used by all callers */
344 ASSERT(m == NULL || m == mp);
345
346 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
347 atomic_inc(&cvp->cv_waiters);
348
349 /*
350 * Mutex should be dropped after prepare_to_wait() this
351 * ensures we're linked in to the waiters list and avoids the
352 * race where 'cvp->cv_waiters > 0' but the list is empty.
353 */
354 mutex_exit(mp);
355
356 ktime_left = ktime_set(0, time_left);
357 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC),
358 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC);
359 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL);
360
361 /* No more waiters a different mutex could be used */
362 if (atomic_dec_and_test(&cvp->cv_waiters)) {
363 /*
364 * This is set without any lock, so it's racy. But this is
365 * just for debug anyway, so make it best-effort
366 */
367 cvp->cv_mutex = NULL;
368 wake_up(&cvp->cv_destroy);
369 }
370
371 finish_wait(&cvp->cv_event, &wait);
372 atomic_dec(&cvp->cv_refs);
373
374 mutex_enter(mp);
375 return (rc == -EINTR ? 1 : -1);
376 }
377
378 /*
379 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
380 */
381 static int
cv_timedwait_hires_common(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag,int state)382 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
383 hrtime_t res, int flag, int state)
384 {
385 if (!(flag & CALLOUT_FLAG_ABSOLUTE))
386 tim += gethrtime();
387
388 return (__cv_timedwait_hires(cvp, mp, tim, res, state));
389 }
390
391 int
cv_timedwait_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)392 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
393 int flag)
394 {
395 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
396 TASK_UNINTERRUPTIBLE));
397 }
398 EXPORT_SYMBOL(cv_timedwait_hires);
399
400 int
cv_timedwait_sig_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)401 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
402 hrtime_t res, int flag)
403 {
404 int rc;
405
406 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
407 TASK_INTERRUPTIBLE);
408 return (signal_pending(current) ? 0 : rc);
409 }
410 EXPORT_SYMBOL(cv_timedwait_sig_hires);
411
412 int
cv_timedwait_idle_hires(kcondvar_t * cvp,kmutex_t * mp,hrtime_t tim,hrtime_t res,int flag)413 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
414 hrtime_t res, int flag)
415 {
416 sigset_t blocked, saved;
417 int rc;
418
419 sigfillset(&blocked);
420 (void) sigprocmask(SIG_BLOCK, &blocked, &saved);
421 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag,
422 TASK_INTERRUPTIBLE);
423 (void) sigprocmask(SIG_SETMASK, &saved, NULL);
424
425 return (rc);
426 }
427 EXPORT_SYMBOL(cv_timedwait_idle_hires);
428
429 void
__cv_signal(kcondvar_t * cvp)430 __cv_signal(kcondvar_t *cvp)
431 {
432 ASSERT(cvp);
433 ASSERT(cvp->cv_magic == CV_MAGIC);
434 atomic_inc(&cvp->cv_refs);
435
436 /*
437 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one
438 * waiter will be set runnable with each call to wake_up().
439 * Additionally wake_up() holds a spin_lock associated with
440 * the wait queue to ensure we don't race waking up processes.
441 */
442 if (atomic_read(&cvp->cv_waiters) > 0)
443 wake_up(&cvp->cv_event);
444
445 atomic_dec(&cvp->cv_refs);
446 }
447 EXPORT_SYMBOL(__cv_signal);
448
449 void
__cv_broadcast(kcondvar_t * cvp)450 __cv_broadcast(kcondvar_t *cvp)
451 {
452 ASSERT(cvp);
453 ASSERT(cvp->cv_magic == CV_MAGIC);
454 atomic_inc(&cvp->cv_refs);
455
456 /*
457 * Wake_up_all() will wake up all waiters even those which
458 * have the WQ_FLAG_EXCLUSIVE flag set.
459 */
460 if (atomic_read(&cvp->cv_waiters) > 0)
461 wake_up_all(&cvp->cv_event);
462
463 atomic_dec(&cvp->cv_refs);
464 }
465 EXPORT_SYMBOL(__cv_broadcast);
466