1 /* 2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 3 * Copyright (C) 2007 The Regents of the University of California. 4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 6 * UCRL-CODE-235197 7 * 8 * This file is part of the SPL, Solaris Porting Layer. 9 * 10 * The SPL is free software; you can redistribute it and/or modify it 11 * under the terms of the GNU General Public License as published by the 12 * Free Software Foundation; either version 2 of the License, or (at your 13 * option) any later version. 14 * 15 * The SPL is distributed in the hope that it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18 * for more details. 19 * 20 * You should have received a copy of the GNU General Public License along 21 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 22 * 23 * Solaris Porting Layer (SPL) Condition Variables Implementation. 24 */ 25 26 #include <sys/condvar.h> 27 #include <sys/time.h> 28 #include <sys/sysmacros.h> 29 #include <linux/hrtimer.h> 30 #include <linux/compiler_compat.h> 31 #include <linux/mod_compat.h> 32 33 #include <linux/sched.h> 34 35 #ifdef HAVE_SCHED_SIGNAL_HEADER 36 #include <linux/sched/signal.h> 37 #endif 38 39 #define MAX_HRTIMEOUT_SLACK_US 1000 40 static unsigned int spl_schedule_hrtimeout_slack_us = 0; 41 42 static int 43 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) 44 { 45 unsigned long val; 46 int error; 47 48 error = kstrtoul(buf, 0, &val); 49 if (error) 50 return (error); 51 52 if (val > MAX_HRTIMEOUT_SLACK_US) 53 return (-EINVAL); 54 55 error = param_set_uint(buf, kp); 56 if (error < 0) 57 return (error); 58 59 return (0); 60 } 61 62 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, 63 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); 64 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, 65 "schedule_hrtimeout_range() delta/slack value in us, default(0)"); 66 67 void 68 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) 69 { 70 ASSERT(cvp); 71 ASSERT(name == NULL); 72 ASSERT(type == CV_DEFAULT); 73 ASSERT(arg == NULL); 74 75 cvp->cv_magic = CV_MAGIC; 76 init_waitqueue_head(&cvp->cv_event); 77 init_waitqueue_head(&cvp->cv_destroy); 78 atomic_set(&cvp->cv_waiters, 0); 79 atomic_set(&cvp->cv_refs, 1); 80 cvp->cv_mutex = NULL; 81 } 82 EXPORT_SYMBOL(__cv_init); 83 84 static int 85 cv_destroy_wakeup(kcondvar_t *cvp) 86 { 87 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) { 88 ASSERT(cvp->cv_mutex == NULL); 89 ASSERT(!waitqueue_active(&cvp->cv_event)); 90 return (1); 91 } 92 93 return (0); 94 } 95 96 void 97 __cv_destroy(kcondvar_t *cvp) 98 { 99 ASSERT(cvp); 100 ASSERT(cvp->cv_magic == CV_MAGIC); 101 102 cvp->cv_magic = CV_DESTROY; 103 atomic_dec(&cvp->cv_refs); 104 105 /* Block until all waiters are woken and references dropped. */ 106 while (cv_destroy_wakeup(cvp) == 0) 107 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1); 108 109 ASSERT3P(cvp->cv_mutex, ==, NULL); 110 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0); 111 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0); 112 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0); 113 } 114 EXPORT_SYMBOL(__cv_destroy); 115 116 static void 117 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) 118 { 119 DEFINE_WAIT(wait); 120 kmutex_t *m; 121 122 ASSERT(cvp); 123 ASSERT(mp); 124 ASSERT(cvp->cv_magic == CV_MAGIC); 125 ASSERT(mutex_owned(mp)); 126 atomic_inc(&cvp->cv_refs); 127 128 m = READ_ONCE(cvp->cv_mutex); 129 if (!m) 130 m = xchg(&cvp->cv_mutex, mp); 131 /* Ensure the same mutex is used by all callers */ 132 ASSERT(m == NULL || m == mp); 133 134 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 135 atomic_inc(&cvp->cv_waiters); 136 137 /* 138 * Mutex should be dropped after prepare_to_wait() this 139 * ensures we're linked in to the waiters list and avoids the 140 * race where 'cvp->cv_waiters > 0' but the list is empty. 141 */ 142 mutex_exit(mp); 143 if (io) 144 io_schedule(); 145 else 146 schedule(); 147 148 /* No more waiters a different mutex could be used */ 149 if (atomic_dec_and_test(&cvp->cv_waiters)) { 150 /* 151 * This is set without any lock, so it's racy. But this is 152 * just for debug anyway, so make it best-effort 153 */ 154 cvp->cv_mutex = NULL; 155 wake_up(&cvp->cv_destroy); 156 } 157 158 finish_wait(&cvp->cv_event, &wait); 159 atomic_dec(&cvp->cv_refs); 160 161 /* 162 * Hold mutex after we release the cvp, otherwise we could dead lock 163 * with a thread holding the mutex and call cv_destroy. 164 */ 165 mutex_enter(mp); 166 } 167 168 void 169 __cv_wait(kcondvar_t *cvp, kmutex_t *mp) 170 { 171 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0); 172 } 173 EXPORT_SYMBOL(__cv_wait); 174 175 void 176 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) 177 { 178 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1); 179 } 180 EXPORT_SYMBOL(__cv_wait_io); 181 182 int 183 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) 184 { 185 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); 186 187 return (signal_pending(current) ? 0 : 1); 188 } 189 EXPORT_SYMBOL(__cv_wait_io_sig); 190 191 int 192 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) 193 { 194 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 195 196 return (signal_pending(current) ? 0 : 1); 197 } 198 EXPORT_SYMBOL(__cv_wait_sig); 199 200 void 201 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) 202 { 203 sigset_t blocked, saved; 204 205 sigfillset(&blocked); 206 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 207 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 208 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 209 } 210 EXPORT_SYMBOL(__cv_wait_idle); 211 212 #if defined(HAVE_IO_SCHEDULE_TIMEOUT) 213 #define spl_io_schedule_timeout(t) io_schedule_timeout(t) 214 #else 215 216 struct spl_task_timer { 217 struct timer_list timer; 218 struct task_struct *task; 219 }; 220 221 static void 222 __cv_wakeup(spl_timer_list_t t) 223 { 224 struct timer_list *tmr = (struct timer_list *)t; 225 struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); 226 227 wake_up_process(task_timer->task); 228 } 229 230 static long 231 spl_io_schedule_timeout(long time_left) 232 { 233 long expire_time = jiffies + time_left; 234 struct spl_task_timer task_timer; 235 struct timer_list *timer = &task_timer.timer; 236 237 task_timer.task = current; 238 239 timer_setup(timer, __cv_wakeup, 0); 240 241 timer->expires = expire_time; 242 add_timer(timer); 243 244 io_schedule(); 245 246 del_timer_sync(timer); 247 248 time_left = expire_time - jiffies; 249 250 return (time_left < 0 ? 0 : time_left); 251 } 252 #endif 253 254 /* 255 * 'expire_time' argument is an absolute wall clock time in jiffies. 256 * Return value is time left (expire_time - now) or -1 if timeout occurred. 257 */ 258 static clock_t 259 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, 260 int state, int io) 261 { 262 DEFINE_WAIT(wait); 263 kmutex_t *m; 264 clock_t time_left; 265 266 ASSERT(cvp); 267 ASSERT(mp); 268 ASSERT(cvp->cv_magic == CV_MAGIC); 269 ASSERT(mutex_owned(mp)); 270 271 /* XXX - Does not handle jiffie wrap properly */ 272 time_left = expire_time - jiffies; 273 if (time_left <= 0) 274 return (-1); 275 276 atomic_inc(&cvp->cv_refs); 277 m = READ_ONCE(cvp->cv_mutex); 278 if (!m) 279 m = xchg(&cvp->cv_mutex, mp); 280 /* Ensure the same mutex is used by all callers */ 281 ASSERT(m == NULL || m == mp); 282 283 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 284 atomic_inc(&cvp->cv_waiters); 285 286 /* 287 * Mutex should be dropped after prepare_to_wait() this 288 * ensures we're linked in to the waiters list and avoids the 289 * race where 'cvp->cv_waiters > 0' but the list is empty. 290 */ 291 mutex_exit(mp); 292 if (io) 293 time_left = spl_io_schedule_timeout(time_left); 294 else 295 time_left = schedule_timeout(time_left); 296 297 /* No more waiters a different mutex could be used */ 298 if (atomic_dec_and_test(&cvp->cv_waiters)) { 299 /* 300 * This is set without any lock, so it's racy. But this is 301 * just for debug anyway, so make it best-effort 302 */ 303 cvp->cv_mutex = NULL; 304 wake_up(&cvp->cv_destroy); 305 } 306 307 finish_wait(&cvp->cv_event, &wait); 308 atomic_dec(&cvp->cv_refs); 309 310 /* 311 * Hold mutex after we release the cvp, otherwise we could dead lock 312 * with a thread holding the mutex and call cv_destroy. 313 */ 314 mutex_enter(mp); 315 return (time_left > 0 ? 1 : -1); 316 } 317 318 int 319 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 320 { 321 return (__cv_timedwait_common(cvp, mp, exp_time, 322 TASK_UNINTERRUPTIBLE, 0)); 323 } 324 EXPORT_SYMBOL(__cv_timedwait); 325 326 int 327 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 328 { 329 return (__cv_timedwait_common(cvp, mp, exp_time, 330 TASK_UNINTERRUPTIBLE, 1)); 331 } 332 EXPORT_SYMBOL(__cv_timedwait_io); 333 334 int 335 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 336 { 337 int rc; 338 339 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0); 340 return (signal_pending(current) ? 0 : rc); 341 } 342 EXPORT_SYMBOL(__cv_timedwait_sig); 343 344 int 345 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 346 { 347 sigset_t blocked, saved; 348 int rc; 349 350 sigfillset(&blocked); 351 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 352 rc = __cv_timedwait_common(cvp, mp, exp_time, 353 TASK_INTERRUPTIBLE, 0); 354 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 355 356 return (rc); 357 } 358 EXPORT_SYMBOL(__cv_timedwait_idle); 359 /* 360 * 'expire_time' argument is an absolute clock time in nanoseconds. 361 * Return value is time left (expire_time - now) or -1 if timeout occurred. 362 */ 363 static clock_t 364 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, 365 hrtime_t res, int state) 366 { 367 DEFINE_WAIT(wait); 368 kmutex_t *m; 369 hrtime_t time_left; 370 ktime_t ktime_left; 371 u64 slack = 0; 372 int rc; 373 374 ASSERT(cvp); 375 ASSERT(mp); 376 ASSERT(cvp->cv_magic == CV_MAGIC); 377 ASSERT(mutex_owned(mp)); 378 379 time_left = expire_time - gethrtime(); 380 if (time_left <= 0) 381 return (-1); 382 383 atomic_inc(&cvp->cv_refs); 384 m = READ_ONCE(cvp->cv_mutex); 385 if (!m) 386 m = xchg(&cvp->cv_mutex, mp); 387 /* Ensure the same mutex is used by all callers */ 388 ASSERT(m == NULL || m == mp); 389 390 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 391 atomic_inc(&cvp->cv_waiters); 392 393 /* 394 * Mutex should be dropped after prepare_to_wait() this 395 * ensures we're linked in to the waiters list and avoids the 396 * race where 'cvp->cv_waiters > 0' but the list is empty. 397 */ 398 mutex_exit(mp); 399 400 ktime_left = ktime_set(0, time_left); 401 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), 402 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); 403 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); 404 405 /* No more waiters a different mutex could be used */ 406 if (atomic_dec_and_test(&cvp->cv_waiters)) { 407 /* 408 * This is set without any lock, so it's racy. But this is 409 * just for debug anyway, so make it best-effort 410 */ 411 cvp->cv_mutex = NULL; 412 wake_up(&cvp->cv_destroy); 413 } 414 415 finish_wait(&cvp->cv_event, &wait); 416 atomic_dec(&cvp->cv_refs); 417 418 mutex_enter(mp); 419 return (rc == -EINTR ? 1 : -1); 420 } 421 422 /* 423 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. 424 */ 425 static int 426 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 427 hrtime_t res, int flag, int state) 428 { 429 if (!(flag & CALLOUT_FLAG_ABSOLUTE)) 430 tim += gethrtime(); 431 432 return (__cv_timedwait_hires(cvp, mp, tim, res, state)); 433 } 434 435 int 436 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, 437 int flag) 438 { 439 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag, 440 TASK_UNINTERRUPTIBLE)); 441 } 442 EXPORT_SYMBOL(cv_timedwait_hires); 443 444 int 445 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 446 hrtime_t res, int flag) 447 { 448 int rc; 449 450 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 451 TASK_INTERRUPTIBLE); 452 return (signal_pending(current) ? 0 : rc); 453 } 454 EXPORT_SYMBOL(cv_timedwait_sig_hires); 455 456 int 457 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 458 hrtime_t res, int flag) 459 { 460 sigset_t blocked, saved; 461 int rc; 462 463 sigfillset(&blocked); 464 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 465 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 466 TASK_INTERRUPTIBLE); 467 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 468 469 return (rc); 470 } 471 EXPORT_SYMBOL(cv_timedwait_idle_hires); 472 473 void 474 __cv_signal(kcondvar_t *cvp) 475 { 476 ASSERT(cvp); 477 ASSERT(cvp->cv_magic == CV_MAGIC); 478 atomic_inc(&cvp->cv_refs); 479 480 /* 481 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one 482 * waiter will be set runnable with each call to wake_up(). 483 * Additionally wake_up() holds a spin_lock associated with 484 * the wait queue to ensure we don't race waking up processes. 485 */ 486 if (atomic_read(&cvp->cv_waiters) > 0) 487 wake_up(&cvp->cv_event); 488 489 atomic_dec(&cvp->cv_refs); 490 } 491 EXPORT_SYMBOL(__cv_signal); 492 493 void 494 __cv_broadcast(kcondvar_t *cvp) 495 { 496 ASSERT(cvp); 497 ASSERT(cvp->cv_magic == CV_MAGIC); 498 atomic_inc(&cvp->cv_refs); 499 500 /* 501 * Wake_up_all() will wake up all waiters even those which 502 * have the WQ_FLAG_EXCLUSIVE flag set. 503 */ 504 if (atomic_read(&cvp->cv_waiters) > 0) 505 wake_up_all(&cvp->cv_event); 506 507 atomic_dec(&cvp->cv_refs); 508 } 509 EXPORT_SYMBOL(__cv_broadcast); 510