1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 4 * Copyright (C) 2007 The Regents of the University of California. 5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 7 * UCRL-CODE-235197 8 * 9 * This file is part of the SPL, Solaris Porting Layer. 10 * 11 * The SPL is free software; you can redistribute it and/or modify it 12 * under the terms of the GNU General Public License as published by the 13 * Free Software Foundation; either version 2 of the License, or (at your 14 * option) any later version. 15 * 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 * for more details. 20 * 21 * You should have received a copy of the GNU General Public License along 22 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 23 * 24 * Solaris Porting Layer (SPL) Condition Variables Implementation. 25 */ 26 27 #include <sys/condvar.h> 28 #include <sys/time.h> 29 #include <sys/sysmacros.h> 30 #include <linux/hrtimer.h> 31 #include <linux/compiler_compat.h> 32 #include <linux/mod_compat.h> 33 34 #include <linux/sched.h> 35 #include <linux/sched/signal.h> 36 37 #define MAX_HRTIMEOUT_SLACK_US 1000 38 static unsigned int spl_schedule_hrtimeout_slack_us = 0; 39 40 static int 41 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) 42 { 43 unsigned long val; 44 int error; 45 46 error = kstrtoul(buf, 0, &val); 47 if (error) 48 return (error); 49 50 if (val > MAX_HRTIMEOUT_SLACK_US) 51 return (-EINVAL); 52 53 error = param_set_uint(buf, kp); 54 if (error < 0) 55 return (error); 56 57 return (0); 58 } 59 60 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, 61 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); 62 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, 63 "schedule_hrtimeout_range() delta/slack value in us, default(0)"); 64 65 void 66 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) 67 { 68 ASSERT(cvp); 69 ASSERT0P(name); 70 ASSERT(type == CV_DEFAULT); 71 ASSERT0P(arg); 72 73 cvp->cv_magic = CV_MAGIC; 74 init_waitqueue_head(&cvp->cv_event); 75 init_waitqueue_head(&cvp->cv_destroy); 76 atomic_set(&cvp->cv_waiters, 0); 77 atomic_set(&cvp->cv_refs, 1); 78 cvp->cv_mutex = NULL; 79 } 80 EXPORT_SYMBOL(__cv_init); 81 82 static int 83 cv_destroy_wakeup(kcondvar_t *cvp) 84 { 85 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) { 86 ASSERT0P(cvp->cv_mutex); 87 ASSERT(!waitqueue_active(&cvp->cv_event)); 88 return (1); 89 } 90 91 return (0); 92 } 93 94 void 95 __cv_destroy(kcondvar_t *cvp) 96 { 97 ASSERT(cvp); 98 ASSERT(cvp->cv_magic == CV_MAGIC); 99 100 cvp->cv_magic = CV_DESTROY; 101 atomic_dec(&cvp->cv_refs); 102 103 /* Block until all waiters are woken and references dropped. */ 104 while (cv_destroy_wakeup(cvp) == 0) 105 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1); 106 107 ASSERT0P(cvp->cv_mutex); 108 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0); 109 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0); 110 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0); 111 } 112 EXPORT_SYMBOL(__cv_destroy); 113 114 static void 115 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) 116 { 117 DEFINE_WAIT(wait); 118 kmutex_t *m; 119 120 ASSERT(cvp); 121 ASSERT(mp); 122 ASSERT(cvp->cv_magic == CV_MAGIC); 123 ASSERT(mutex_owned(mp)); 124 atomic_inc(&cvp->cv_refs); 125 126 m = READ_ONCE(cvp->cv_mutex); 127 if (!m) 128 m = xchg(&cvp->cv_mutex, mp); 129 /* Ensure the same mutex is used by all callers */ 130 ASSERT(m == NULL || m == mp); 131 132 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 133 atomic_inc(&cvp->cv_waiters); 134 135 /* 136 * Mutex should be dropped after prepare_to_wait() this 137 * ensures we're linked in to the waiters list and avoids the 138 * race where 'cvp->cv_waiters > 0' but the list is empty. 139 */ 140 mutex_exit(mp); 141 if (io) 142 io_schedule(); 143 else 144 schedule(); 145 146 /* No more waiters a different mutex could be used */ 147 if (atomic_dec_and_test(&cvp->cv_waiters)) { 148 /* 149 * This is set without any lock, so it's racy. But this is 150 * just for debug anyway, so make it best-effort 151 */ 152 cvp->cv_mutex = NULL; 153 wake_up(&cvp->cv_destroy); 154 } 155 156 finish_wait(&cvp->cv_event, &wait); 157 atomic_dec(&cvp->cv_refs); 158 159 /* 160 * Hold mutex after we release the cvp, otherwise we could dead lock 161 * with a thread holding the mutex and call cv_destroy. 162 */ 163 mutex_enter(mp); 164 } 165 166 void 167 __cv_wait(kcondvar_t *cvp, kmutex_t *mp) 168 { 169 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0); 170 } 171 EXPORT_SYMBOL(__cv_wait); 172 173 void 174 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) 175 { 176 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1); 177 } 178 EXPORT_SYMBOL(__cv_wait_io); 179 180 int 181 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) 182 { 183 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); 184 185 return (signal_pending(current) ? 0 : 1); 186 } 187 EXPORT_SYMBOL(__cv_wait_io_sig); 188 189 int 190 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) 191 { 192 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 193 194 return (signal_pending(current) ? 0 : 1); 195 } 196 EXPORT_SYMBOL(__cv_wait_sig); 197 198 void 199 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) 200 { 201 sigset_t blocked, saved; 202 203 sigfillset(&blocked); 204 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 205 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 206 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 207 } 208 EXPORT_SYMBOL(__cv_wait_idle); 209 210 /* 211 * 'expire_time' argument is an absolute wall clock time in jiffies. 212 * Return value is time left (expire_time - now) or -1 if timeout occurred. 213 */ 214 static clock_t 215 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, 216 int state, int io) 217 { 218 DEFINE_WAIT(wait); 219 kmutex_t *m; 220 clock_t time_left; 221 222 ASSERT(cvp); 223 ASSERT(mp); 224 ASSERT(cvp->cv_magic == CV_MAGIC); 225 ASSERT(mutex_owned(mp)); 226 227 /* XXX - Does not handle jiffie wrap properly */ 228 time_left = expire_time - jiffies; 229 if (time_left <= 0) 230 return (-1); 231 232 atomic_inc(&cvp->cv_refs); 233 m = READ_ONCE(cvp->cv_mutex); 234 if (!m) 235 m = xchg(&cvp->cv_mutex, mp); 236 /* Ensure the same mutex is used by all callers */ 237 ASSERT(m == NULL || m == mp); 238 239 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 240 atomic_inc(&cvp->cv_waiters); 241 242 /* 243 * Mutex should be dropped after prepare_to_wait() this 244 * ensures we're linked in to the waiters list and avoids the 245 * race where 'cvp->cv_waiters > 0' but the list is empty. 246 */ 247 mutex_exit(mp); 248 if (io) 249 time_left = io_schedule_timeout(time_left); 250 else 251 time_left = schedule_timeout(time_left); 252 253 /* No more waiters a different mutex could be used */ 254 if (atomic_dec_and_test(&cvp->cv_waiters)) { 255 /* 256 * This is set without any lock, so it's racy. But this is 257 * just for debug anyway, so make it best-effort 258 */ 259 cvp->cv_mutex = NULL; 260 wake_up(&cvp->cv_destroy); 261 } 262 263 finish_wait(&cvp->cv_event, &wait); 264 atomic_dec(&cvp->cv_refs); 265 266 /* 267 * Hold mutex after we release the cvp, otherwise we could dead lock 268 * with a thread holding the mutex and call cv_destroy. 269 */ 270 mutex_enter(mp); 271 return (time_left > 0 ? 1 : -1); 272 } 273 274 int 275 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 276 { 277 return (__cv_timedwait_common(cvp, mp, exp_time, 278 TASK_UNINTERRUPTIBLE, 0)); 279 } 280 EXPORT_SYMBOL(__cv_timedwait); 281 282 int 283 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 284 { 285 return (__cv_timedwait_common(cvp, mp, exp_time, 286 TASK_UNINTERRUPTIBLE, 1)); 287 } 288 EXPORT_SYMBOL(__cv_timedwait_io); 289 290 int 291 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 292 { 293 int rc; 294 295 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0); 296 return (signal_pending(current) ? 0 : rc); 297 } 298 EXPORT_SYMBOL(__cv_timedwait_sig); 299 300 int 301 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 302 { 303 sigset_t blocked, saved; 304 int rc; 305 306 sigfillset(&blocked); 307 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 308 rc = __cv_timedwait_common(cvp, mp, exp_time, 309 TASK_INTERRUPTIBLE, 0); 310 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 311 312 return (rc); 313 } 314 EXPORT_SYMBOL(__cv_timedwait_idle); 315 /* 316 * 'expire_time' argument is an absolute clock time in nanoseconds. 317 * Return value is time left (expire_time - now) or -1 if timeout occurred. 318 */ 319 static clock_t 320 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, 321 hrtime_t res, int state) 322 { 323 DEFINE_WAIT(wait); 324 kmutex_t *m; 325 hrtime_t time_left; 326 ktime_t ktime_left; 327 u64 slack = 0; 328 int rc; 329 330 ASSERT(cvp); 331 ASSERT(mp); 332 ASSERT(cvp->cv_magic == CV_MAGIC); 333 ASSERT(mutex_owned(mp)); 334 335 time_left = expire_time - gethrtime(); 336 if (time_left <= 0) 337 return (-1); 338 339 atomic_inc(&cvp->cv_refs); 340 m = READ_ONCE(cvp->cv_mutex); 341 if (!m) 342 m = xchg(&cvp->cv_mutex, mp); 343 /* Ensure the same mutex is used by all callers */ 344 ASSERT(m == NULL || m == mp); 345 346 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 347 atomic_inc(&cvp->cv_waiters); 348 349 /* 350 * Mutex should be dropped after prepare_to_wait() this 351 * ensures we're linked in to the waiters list and avoids the 352 * race where 'cvp->cv_waiters > 0' but the list is empty. 353 */ 354 mutex_exit(mp); 355 356 ktime_left = ktime_set(0, time_left); 357 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), 358 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); 359 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); 360 361 /* No more waiters a different mutex could be used */ 362 if (atomic_dec_and_test(&cvp->cv_waiters)) { 363 /* 364 * This is set without any lock, so it's racy. But this is 365 * just for debug anyway, so make it best-effort 366 */ 367 cvp->cv_mutex = NULL; 368 wake_up(&cvp->cv_destroy); 369 } 370 371 finish_wait(&cvp->cv_event, &wait); 372 atomic_dec(&cvp->cv_refs); 373 374 mutex_enter(mp); 375 return (rc == -EINTR ? 1 : -1); 376 } 377 378 /* 379 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. 380 */ 381 static int 382 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 383 hrtime_t res, int flag, int state) 384 { 385 if (!(flag & CALLOUT_FLAG_ABSOLUTE)) 386 tim += gethrtime(); 387 388 return (__cv_timedwait_hires(cvp, mp, tim, res, state)); 389 } 390 391 int 392 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, 393 int flag) 394 { 395 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag, 396 TASK_UNINTERRUPTIBLE)); 397 } 398 EXPORT_SYMBOL(cv_timedwait_hires); 399 400 int 401 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 402 hrtime_t res, int flag) 403 { 404 int rc; 405 406 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 407 TASK_INTERRUPTIBLE); 408 return (signal_pending(current) ? 0 : rc); 409 } 410 EXPORT_SYMBOL(cv_timedwait_sig_hires); 411 412 int 413 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 414 hrtime_t res, int flag) 415 { 416 sigset_t blocked, saved; 417 int rc; 418 419 sigfillset(&blocked); 420 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 421 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 422 TASK_INTERRUPTIBLE); 423 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 424 425 return (rc); 426 } 427 EXPORT_SYMBOL(cv_timedwait_idle_hires); 428 429 void 430 __cv_signal(kcondvar_t *cvp) 431 { 432 ASSERT(cvp); 433 ASSERT(cvp->cv_magic == CV_MAGIC); 434 atomic_inc(&cvp->cv_refs); 435 436 /* 437 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one 438 * waiter will be set runnable with each call to wake_up(). 439 * Additionally wake_up() holds a spin_lock associated with 440 * the wait queue to ensure we don't race waking up processes. 441 */ 442 if (atomic_read(&cvp->cv_waiters) > 0) 443 wake_up(&cvp->cv_event); 444 445 atomic_dec(&cvp->cv_refs); 446 } 447 EXPORT_SYMBOL(__cv_signal); 448 449 void 450 __cv_broadcast(kcondvar_t *cvp) 451 { 452 ASSERT(cvp); 453 ASSERT(cvp->cv_magic == CV_MAGIC); 454 atomic_inc(&cvp->cv_refs); 455 456 /* 457 * Wake_up_all() will wake up all waiters even those which 458 * have the WQ_FLAG_EXCLUSIVE flag set. 459 */ 460 if (atomic_read(&cvp->cv_waiters) > 0) 461 wake_up_all(&cvp->cv_event); 462 463 atomic_dec(&cvp->cv_refs); 464 } 465 EXPORT_SYMBOL(__cv_broadcast); 466