1 /* 2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 3 * Copyright (C) 2007 The Regents of the University of California. 4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 6 * UCRL-CODE-235197 7 * 8 * This file is part of the SPL, Solaris Porting Layer. 9 * 10 * The SPL is free software; you can redistribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; either version 2 of the License, or (at your 13 * option) any later version. 14 * 15 * The SPL is distributed in the hope that it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 * for more details. 19 * 20 * You should have received a copy of the GNU General Public License along 21 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 22 * 23 * Solaris Porting Layer (SPL) Condition Variables Implementation. 24 */ 25 26 #include <sys/condvar.h> 27 #include <sys/time.h> 28 #include <sys/sysmacros.h> 29 #include <linux/hrtimer.h> 30 #include <linux/compiler_compat.h> 31 #include <linux/mod_compat.h> 32 33 #include <linux/sched.h> 34 #include <linux/sched/signal.h> 35 36 #define MAX_HRTIMEOUT_SLACK_US 1000 37 static unsigned int spl_schedule_hrtimeout_slack_us = 0; 38 39 static int 40 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) 41 { 42 unsigned long val; 43 int error; 44 45 error = kstrtoul(buf, 0, &val); 46 if (error) 47 return (error); 48 49 if (val > MAX_HRTIMEOUT_SLACK_US) 50 return (-EINVAL); 51 52 error = param_set_uint(buf, kp); 53 if (error < 0) 54 return (error); 55 56 return (0); 57 } 58 59 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, 60 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); 61 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, 62 "schedule_hrtimeout_range() delta/slack value in us, default(0)"); 63 64 void 65 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) 66 { 67 ASSERT(cvp); 68 ASSERT(name == NULL); 69 ASSERT(type == CV_DEFAULT); 70 ASSERT(arg == NULL); 71 72 cvp->cv_magic = CV_MAGIC; 73 init_waitqueue_head(&cvp->cv_event); 74 init_waitqueue_head(&cvp->cv_destroy); 75 atomic_set(&cvp->cv_waiters, 0); 76 atomic_set(&cvp->cv_refs, 1); 77 cvp->cv_mutex = NULL; 78 } 79 EXPORT_SYMBOL(__cv_init); 80 81 static int 82 cv_destroy_wakeup(kcondvar_t *cvp) 83 { 84 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) { 85 ASSERT(cvp->cv_mutex == NULL); 86 ASSERT(!waitqueue_active(&cvp->cv_event)); 87 return (1); 88 } 89 90 return (0); 91 } 92 93 void 94 __cv_destroy(kcondvar_t *cvp) 95 { 96 ASSERT(cvp); 97 ASSERT(cvp->cv_magic == CV_MAGIC); 98 99 cvp->cv_magic = CV_DESTROY; 100 atomic_dec(&cvp->cv_refs); 101 102 /* Block until all waiters are woken and references dropped. */ 103 while (cv_destroy_wakeup(cvp) == 0) 104 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1); 105 106 ASSERT3P(cvp->cv_mutex, ==, NULL); 107 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0); 108 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0); 109 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0); 110 } 111 EXPORT_SYMBOL(__cv_destroy); 112 113 static void 114 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) 115 { 116 DEFINE_WAIT(wait); 117 kmutex_t *m; 118 119 ASSERT(cvp); 120 ASSERT(mp); 121 ASSERT(cvp->cv_magic == CV_MAGIC); 122 ASSERT(mutex_owned(mp)); 123 atomic_inc(&cvp->cv_refs); 124 125 m = READ_ONCE(cvp->cv_mutex); 126 if (!m) 127 m = xchg(&cvp->cv_mutex, mp); 128 /* Ensure the same mutex is used by all callers */ 129 ASSERT(m == NULL || m == mp); 130 131 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 132 atomic_inc(&cvp->cv_waiters); 133 134 /* 135 * Mutex should be dropped after prepare_to_wait() this 136 * ensures we're linked in to the waiters list and avoids the 137 * race where 'cvp->cv_waiters > 0' but the list is empty. 138 */ 139 mutex_exit(mp); 140 if (io) 141 io_schedule(); 142 else 143 schedule(); 144 145 /* No more waiters a different mutex could be used */ 146 if (atomic_dec_and_test(&cvp->cv_waiters)) { 147 /* 148 * This is set without any lock, so it's racy. But this is 149 * just for debug anyway, so make it best-effort 150 */ 151 cvp->cv_mutex = NULL; 152 wake_up(&cvp->cv_destroy); 153 } 154 155 finish_wait(&cvp->cv_event, &wait); 156 atomic_dec(&cvp->cv_refs); 157 158 /* 159 * Hold mutex after we release the cvp, otherwise we could dead lock 160 * with a thread holding the mutex and call cv_destroy. 161 */ 162 mutex_enter(mp); 163 } 164 165 void 166 __cv_wait(kcondvar_t *cvp, kmutex_t *mp) 167 { 168 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0); 169 } 170 EXPORT_SYMBOL(__cv_wait); 171 172 void 173 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) 174 { 175 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1); 176 } 177 EXPORT_SYMBOL(__cv_wait_io); 178 179 int 180 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) 181 { 182 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); 183 184 return (signal_pending(current) ? 0 : 1); 185 } 186 EXPORT_SYMBOL(__cv_wait_io_sig); 187 188 int 189 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) 190 { 191 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 192 193 return (signal_pending(current) ? 0 : 1); 194 } 195 EXPORT_SYMBOL(__cv_wait_sig); 196 197 void 198 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) 199 { 200 sigset_t blocked, saved; 201 202 sigfillset(&blocked); 203 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 204 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 205 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 206 } 207 EXPORT_SYMBOL(__cv_wait_idle); 208 209 /* 210 * 'expire_time' argument is an absolute wall clock time in jiffies. 211 * Return value is time left (expire_time - now) or -1 if timeout occurred. 212 */ 213 static clock_t 214 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, 215 int state, int io) 216 { 217 DEFINE_WAIT(wait); 218 kmutex_t *m; 219 clock_t time_left; 220 221 ASSERT(cvp); 222 ASSERT(mp); 223 ASSERT(cvp->cv_magic == CV_MAGIC); 224 ASSERT(mutex_owned(mp)); 225 226 /* XXX - Does not handle jiffie wrap properly */ 227 time_left = expire_time - jiffies; 228 if (time_left <= 0) 229 return (-1); 230 231 atomic_inc(&cvp->cv_refs); 232 m = READ_ONCE(cvp->cv_mutex); 233 if (!m) 234 m = xchg(&cvp->cv_mutex, mp); 235 /* Ensure the same mutex is used by all callers */ 236 ASSERT(m == NULL || m == mp); 237 238 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 239 atomic_inc(&cvp->cv_waiters); 240 241 /* 242 * Mutex should be dropped after prepare_to_wait() this 243 * ensures we're linked in to the waiters list and avoids the 244 * race where 'cvp->cv_waiters > 0' but the list is empty. 245 */ 246 mutex_exit(mp); 247 if (io) 248 time_left = io_schedule_timeout(time_left); 249 else 250 time_left = schedule_timeout(time_left); 251 252 /* No more waiters a different mutex could be used */ 253 if (atomic_dec_and_test(&cvp->cv_waiters)) { 254 /* 255 * This is set without any lock, so it's racy. But this is 256 * just for debug anyway, so make it best-effort 257 */ 258 cvp->cv_mutex = NULL; 259 wake_up(&cvp->cv_destroy); 260 } 261 262 finish_wait(&cvp->cv_event, &wait); 263 atomic_dec(&cvp->cv_refs); 264 265 /* 266 * Hold mutex after we release the cvp, otherwise we could dead lock 267 * with a thread holding the mutex and call cv_destroy. 268 */ 269 mutex_enter(mp); 270 return (time_left > 0 ? 1 : -1); 271 } 272 273 int 274 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 275 { 276 return (__cv_timedwait_common(cvp, mp, exp_time, 277 TASK_UNINTERRUPTIBLE, 0)); 278 } 279 EXPORT_SYMBOL(__cv_timedwait); 280 281 int 282 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 283 { 284 return (__cv_timedwait_common(cvp, mp, exp_time, 285 TASK_UNINTERRUPTIBLE, 1)); 286 } 287 EXPORT_SYMBOL(__cv_timedwait_io); 288 289 int 290 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 291 { 292 int rc; 293 294 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0); 295 return (signal_pending(current) ? 0 : rc); 296 } 297 EXPORT_SYMBOL(__cv_timedwait_sig); 298 299 int 300 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 301 { 302 sigset_t blocked, saved; 303 int rc; 304 305 sigfillset(&blocked); 306 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 307 rc = __cv_timedwait_common(cvp, mp, exp_time, 308 TASK_INTERRUPTIBLE, 0); 309 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 310 311 return (rc); 312 } 313 EXPORT_SYMBOL(__cv_timedwait_idle); 314 /* 315 * 'expire_time' argument is an absolute clock time in nanoseconds. 316 * Return value is time left (expire_time - now) or -1 if timeout occurred. 317 */ 318 static clock_t 319 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, 320 hrtime_t res, int state) 321 { 322 DEFINE_WAIT(wait); 323 kmutex_t *m; 324 hrtime_t time_left; 325 ktime_t ktime_left; 326 u64 slack = 0; 327 int rc; 328 329 ASSERT(cvp); 330 ASSERT(mp); 331 ASSERT(cvp->cv_magic == CV_MAGIC); 332 ASSERT(mutex_owned(mp)); 333 334 time_left = expire_time - gethrtime(); 335 if (time_left <= 0) 336 return (-1); 337 338 atomic_inc(&cvp->cv_refs); 339 m = READ_ONCE(cvp->cv_mutex); 340 if (!m) 341 m = xchg(&cvp->cv_mutex, mp); 342 /* Ensure the same mutex is used by all callers */ 343 ASSERT(m == NULL || m == mp); 344 345 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 346 atomic_inc(&cvp->cv_waiters); 347 348 /* 349 * Mutex should be dropped after prepare_to_wait() this 350 * ensures we're linked in to the waiters list and avoids the 351 * race where 'cvp->cv_waiters > 0' but the list is empty. 352 */ 353 mutex_exit(mp); 354 355 ktime_left = ktime_set(0, time_left); 356 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), 357 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); 358 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); 359 360 /* No more waiters a different mutex could be used */ 361 if (atomic_dec_and_test(&cvp->cv_waiters)) { 362 /* 363 * This is set without any lock, so it's racy. But this is 364 * just for debug anyway, so make it best-effort 365 */ 366 cvp->cv_mutex = NULL; 367 wake_up(&cvp->cv_destroy); 368 } 369 370 finish_wait(&cvp->cv_event, &wait); 371 atomic_dec(&cvp->cv_refs); 372 373 mutex_enter(mp); 374 return (rc == -EINTR ? 1 : -1); 375 } 376 377 /* 378 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. 379 */ 380 static int 381 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 382 hrtime_t res, int flag, int state) 383 { 384 if (!(flag & CALLOUT_FLAG_ABSOLUTE)) 385 tim += gethrtime(); 386 387 return (__cv_timedwait_hires(cvp, mp, tim, res, state)); 388 } 389 390 int 391 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, 392 int flag) 393 { 394 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag, 395 TASK_UNINTERRUPTIBLE)); 396 } 397 EXPORT_SYMBOL(cv_timedwait_hires); 398 399 int 400 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 401 hrtime_t res, int flag) 402 { 403 int rc; 404 405 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 406 TASK_INTERRUPTIBLE); 407 return (signal_pending(current) ? 0 : rc); 408 } 409 EXPORT_SYMBOL(cv_timedwait_sig_hires); 410 411 int 412 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 413 hrtime_t res, int flag) 414 { 415 sigset_t blocked, saved; 416 int rc; 417 418 sigfillset(&blocked); 419 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 420 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 421 TASK_INTERRUPTIBLE); 422 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 423 424 return (rc); 425 } 426 EXPORT_SYMBOL(cv_timedwait_idle_hires); 427 428 void 429 __cv_signal(kcondvar_t *cvp) 430 { 431 ASSERT(cvp); 432 ASSERT(cvp->cv_magic == CV_MAGIC); 433 atomic_inc(&cvp->cv_refs); 434 435 /* 436 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one 437 * waiter will be set runnable with each call to wake_up(). 438 * Additionally wake_up() holds a spin_lock associated with 439 * the wait queue to ensure we don't race waking up processes. 440 */ 441 if (atomic_read(&cvp->cv_waiters) > 0) 442 wake_up(&cvp->cv_event); 443 444 atomic_dec(&cvp->cv_refs); 445 } 446 EXPORT_SYMBOL(__cv_signal); 447 448 void 449 __cv_broadcast(kcondvar_t *cvp) 450 { 451 ASSERT(cvp); 452 ASSERT(cvp->cv_magic == CV_MAGIC); 453 atomic_inc(&cvp->cv_refs); 454 455 /* 456 * Wake_up_all() will wake up all waiters even those which 457 * have the WQ_FLAG_EXCLUSIVE flag set. 458 */ 459 if (atomic_read(&cvp->cv_waiters) > 0) 460 wake_up_all(&cvp->cv_event); 461 462 atomic_dec(&cvp->cv_refs); 463 } 464 EXPORT_SYMBOL(__cv_broadcast); 465