1 /* 2 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 3 * Copyright (C) 2007 The Regents of the University of California. 4 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 5 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 6 * UCRL-CODE-235197 7 * 8 * This file is part of the SPL, Solaris Porting Layer. 9 * For details, see <http://zfsonlinux.org/>. 10 * 11 * The SPL is free software; you can redistribute it and/or modify it 12 * under the terms of the GNU General Public License as published by the 13 * Free Software Foundation; either version 2 of the License, or (at your 14 * option) any later version. 15 * 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 * for more details. 20 * 21 * You should have received a copy of the GNU General Public License along 22 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 23 * 24 * Solaris Porting Layer (SPL) Credential Implementation. 25 */ 26 27 #include <sys/condvar.h> 28 #include <sys/time.h> 29 #include <sys/sysmacros.h> 30 #include <linux/hrtimer.h> 31 #include <linux/compiler_compat.h> 32 #include <linux/mod_compat.h> 33 34 #include <linux/sched.h> 35 36 #ifdef HAVE_SCHED_SIGNAL_HEADER 37 #include <linux/sched/signal.h> 38 #endif 39 40 #define MAX_HRTIMEOUT_SLACK_US 1000 41 unsigned int spl_schedule_hrtimeout_slack_us = 0; 42 43 static int 44 param_set_hrtimeout_slack(const char *buf, zfs_kernel_param_t *kp) 45 { 46 unsigned long val; 47 int error; 48 49 error = kstrtoul(buf, 0, &val); 50 if (error) 51 return (error); 52 53 if (val > MAX_HRTIMEOUT_SLACK_US) 54 return (-EINVAL); 55 56 error = param_set_uint(buf, kp); 57 if (error < 0) 58 return (error); 59 60 return (0); 61 } 62 63 module_param_call(spl_schedule_hrtimeout_slack_us, param_set_hrtimeout_slack, 64 param_get_uint, &spl_schedule_hrtimeout_slack_us, 0644); 65 MODULE_PARM_DESC(spl_schedule_hrtimeout_slack_us, 66 "schedule_hrtimeout_range() delta/slack value in us, default(0)"); 67 68 void 69 __cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg) 70 { 71 ASSERT(cvp); 72 ASSERT(name == NULL); 73 ASSERT(type == CV_DEFAULT); 74 ASSERT(arg == NULL); 75 76 cvp->cv_magic = CV_MAGIC; 77 init_waitqueue_head(&cvp->cv_event); 78 init_waitqueue_head(&cvp->cv_destroy); 79 atomic_set(&cvp->cv_waiters, 0); 80 atomic_set(&cvp->cv_refs, 1); 81 cvp->cv_mutex = NULL; 82 } 83 EXPORT_SYMBOL(__cv_init); 84 85 static int 86 cv_destroy_wakeup(kcondvar_t *cvp) 87 { 88 if (!atomic_read(&cvp->cv_waiters) && !atomic_read(&cvp->cv_refs)) { 89 ASSERT(cvp->cv_mutex == NULL); 90 ASSERT(!waitqueue_active(&cvp->cv_event)); 91 return (1); 92 } 93 94 return (0); 95 } 96 97 void 98 __cv_destroy(kcondvar_t *cvp) 99 { 100 ASSERT(cvp); 101 ASSERT(cvp->cv_magic == CV_MAGIC); 102 103 cvp->cv_magic = CV_DESTROY; 104 atomic_dec(&cvp->cv_refs); 105 106 /* Block until all waiters are woken and references dropped. */ 107 while (cv_destroy_wakeup(cvp) == 0) 108 wait_event_timeout(cvp->cv_destroy, cv_destroy_wakeup(cvp), 1); 109 110 ASSERT3P(cvp->cv_mutex, ==, NULL); 111 ASSERT3S(atomic_read(&cvp->cv_refs), ==, 0); 112 ASSERT3S(atomic_read(&cvp->cv_waiters), ==, 0); 113 ASSERT3S(waitqueue_active(&cvp->cv_event), ==, 0); 114 } 115 EXPORT_SYMBOL(__cv_destroy); 116 117 static void 118 cv_wait_common(kcondvar_t *cvp, kmutex_t *mp, int state, int io) 119 { 120 DEFINE_WAIT(wait); 121 kmutex_t *m; 122 123 ASSERT(cvp); 124 ASSERT(mp); 125 ASSERT(cvp->cv_magic == CV_MAGIC); 126 ASSERT(mutex_owned(mp)); 127 atomic_inc(&cvp->cv_refs); 128 129 m = READ_ONCE(cvp->cv_mutex); 130 if (!m) 131 m = xchg(&cvp->cv_mutex, mp); 132 /* Ensure the same mutex is used by all callers */ 133 ASSERT(m == NULL || m == mp); 134 135 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 136 atomic_inc(&cvp->cv_waiters); 137 138 /* 139 * Mutex should be dropped after prepare_to_wait() this 140 * ensures we're linked in to the waiters list and avoids the 141 * race where 'cvp->cv_waiters > 0' but the list is empty. 142 */ 143 mutex_exit(mp); 144 if (io) 145 io_schedule(); 146 else 147 schedule(); 148 149 /* No more waiters a different mutex could be used */ 150 if (atomic_dec_and_test(&cvp->cv_waiters)) { 151 /* 152 * This is set without any lock, so it's racy. But this is 153 * just for debug anyway, so make it best-effort 154 */ 155 cvp->cv_mutex = NULL; 156 wake_up(&cvp->cv_destroy); 157 } 158 159 finish_wait(&cvp->cv_event, &wait); 160 atomic_dec(&cvp->cv_refs); 161 162 /* 163 * Hold mutex after we release the cvp, otherwise we could dead lock 164 * with a thread holding the mutex and call cv_destroy. 165 */ 166 mutex_enter(mp); 167 } 168 169 void 170 __cv_wait(kcondvar_t *cvp, kmutex_t *mp) 171 { 172 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 0); 173 } 174 EXPORT_SYMBOL(__cv_wait); 175 176 void 177 __cv_wait_io(kcondvar_t *cvp, kmutex_t *mp) 178 { 179 cv_wait_common(cvp, mp, TASK_UNINTERRUPTIBLE, 1); 180 } 181 EXPORT_SYMBOL(__cv_wait_io); 182 183 int 184 __cv_wait_io_sig(kcondvar_t *cvp, kmutex_t *mp) 185 { 186 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 1); 187 188 return (signal_pending(current) ? 0 : 1); 189 } 190 EXPORT_SYMBOL(__cv_wait_io_sig); 191 192 int 193 __cv_wait_sig(kcondvar_t *cvp, kmutex_t *mp) 194 { 195 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 196 197 return (signal_pending(current) ? 0 : 1); 198 } 199 EXPORT_SYMBOL(__cv_wait_sig); 200 201 void 202 __cv_wait_idle(kcondvar_t *cvp, kmutex_t *mp) 203 { 204 sigset_t blocked, saved; 205 206 sigfillset(&blocked); 207 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 208 cv_wait_common(cvp, mp, TASK_INTERRUPTIBLE, 0); 209 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 210 } 211 EXPORT_SYMBOL(__cv_wait_idle); 212 213 #if defined(HAVE_IO_SCHEDULE_TIMEOUT) 214 #define spl_io_schedule_timeout(t) io_schedule_timeout(t) 215 #else 216 217 struct spl_task_timer { 218 struct timer_list timer; 219 struct task_struct *task; 220 }; 221 222 static void 223 __cv_wakeup(spl_timer_list_t t) 224 { 225 struct timer_list *tmr = (struct timer_list *)t; 226 struct spl_task_timer *task_timer = from_timer(task_timer, tmr, timer); 227 228 wake_up_process(task_timer->task); 229 } 230 231 static long 232 spl_io_schedule_timeout(long time_left) 233 { 234 long expire_time = jiffies + time_left; 235 struct spl_task_timer task_timer; 236 struct timer_list *timer = &task_timer.timer; 237 238 task_timer.task = current; 239 240 timer_setup(timer, __cv_wakeup, 0); 241 242 timer->expires = expire_time; 243 add_timer(timer); 244 245 io_schedule(); 246 247 del_timer_sync(timer); 248 249 time_left = expire_time - jiffies; 250 251 return (time_left < 0 ? 0 : time_left); 252 } 253 #endif 254 255 /* 256 * 'expire_time' argument is an absolute wall clock time in jiffies. 257 * Return value is time left (expire_time - now) or -1 if timeout occurred. 258 */ 259 static clock_t 260 __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time, 261 int state, int io) 262 { 263 DEFINE_WAIT(wait); 264 kmutex_t *m; 265 clock_t time_left; 266 267 ASSERT(cvp); 268 ASSERT(mp); 269 ASSERT(cvp->cv_magic == CV_MAGIC); 270 ASSERT(mutex_owned(mp)); 271 272 /* XXX - Does not handle jiffie wrap properly */ 273 time_left = expire_time - jiffies; 274 if (time_left <= 0) 275 return (-1); 276 277 atomic_inc(&cvp->cv_refs); 278 m = READ_ONCE(cvp->cv_mutex); 279 if (!m) 280 m = xchg(&cvp->cv_mutex, mp); 281 /* Ensure the same mutex is used by all callers */ 282 ASSERT(m == NULL || m == mp); 283 284 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 285 atomic_inc(&cvp->cv_waiters); 286 287 /* 288 * Mutex should be dropped after prepare_to_wait() this 289 * ensures we're linked in to the waiters list and avoids the 290 * race where 'cvp->cv_waiters > 0' but the list is empty. 291 */ 292 mutex_exit(mp); 293 if (io) 294 time_left = spl_io_schedule_timeout(time_left); 295 else 296 time_left = schedule_timeout(time_left); 297 298 /* No more waiters a different mutex could be used */ 299 if (atomic_dec_and_test(&cvp->cv_waiters)) { 300 /* 301 * This is set without any lock, so it's racy. But this is 302 * just for debug anyway, so make it best-effort 303 */ 304 cvp->cv_mutex = NULL; 305 wake_up(&cvp->cv_destroy); 306 } 307 308 finish_wait(&cvp->cv_event, &wait); 309 atomic_dec(&cvp->cv_refs); 310 311 /* 312 * Hold mutex after we release the cvp, otherwise we could dead lock 313 * with a thread holding the mutex and call cv_destroy. 314 */ 315 mutex_enter(mp); 316 return (time_left > 0 ? 1 : -1); 317 } 318 319 int 320 __cv_timedwait(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 321 { 322 return (__cv_timedwait_common(cvp, mp, exp_time, 323 TASK_UNINTERRUPTIBLE, 0)); 324 } 325 EXPORT_SYMBOL(__cv_timedwait); 326 327 int 328 __cv_timedwait_io(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 329 { 330 return (__cv_timedwait_common(cvp, mp, exp_time, 331 TASK_UNINTERRUPTIBLE, 1)); 332 } 333 EXPORT_SYMBOL(__cv_timedwait_io); 334 335 int 336 __cv_timedwait_sig(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 337 { 338 int rc; 339 340 rc = __cv_timedwait_common(cvp, mp, exp_time, TASK_INTERRUPTIBLE, 0); 341 return (signal_pending(current) ? 0 : rc); 342 } 343 EXPORT_SYMBOL(__cv_timedwait_sig); 344 345 int 346 __cv_timedwait_idle(kcondvar_t *cvp, kmutex_t *mp, clock_t exp_time) 347 { 348 sigset_t blocked, saved; 349 int rc; 350 351 sigfillset(&blocked); 352 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 353 rc = __cv_timedwait_common(cvp, mp, exp_time, 354 TASK_INTERRUPTIBLE, 0); 355 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 356 357 return (rc); 358 } 359 EXPORT_SYMBOL(__cv_timedwait_idle); 360 /* 361 * 'expire_time' argument is an absolute clock time in nanoseconds. 362 * Return value is time left (expire_time - now) or -1 if timeout occurred. 363 */ 364 static clock_t 365 __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time, 366 hrtime_t res, int state) 367 { 368 DEFINE_WAIT(wait); 369 kmutex_t *m; 370 hrtime_t time_left; 371 ktime_t ktime_left; 372 u64 slack = 0; 373 int rc; 374 375 ASSERT(cvp); 376 ASSERT(mp); 377 ASSERT(cvp->cv_magic == CV_MAGIC); 378 ASSERT(mutex_owned(mp)); 379 380 time_left = expire_time - gethrtime(); 381 if (time_left <= 0) 382 return (-1); 383 384 atomic_inc(&cvp->cv_refs); 385 m = READ_ONCE(cvp->cv_mutex); 386 if (!m) 387 m = xchg(&cvp->cv_mutex, mp); 388 /* Ensure the same mutex is used by all callers */ 389 ASSERT(m == NULL || m == mp); 390 391 prepare_to_wait_exclusive(&cvp->cv_event, &wait, state); 392 atomic_inc(&cvp->cv_waiters); 393 394 /* 395 * Mutex should be dropped after prepare_to_wait() this 396 * ensures we're linked in to the waiters list and avoids the 397 * race where 'cvp->cv_waiters > 0' but the list is empty. 398 */ 399 mutex_exit(mp); 400 401 ktime_left = ktime_set(0, time_left); 402 slack = MIN(MAX(res, spl_schedule_hrtimeout_slack_us * NSEC_PER_USEC), 403 MAX_HRTIMEOUT_SLACK_US * NSEC_PER_USEC); 404 rc = schedule_hrtimeout_range(&ktime_left, slack, HRTIMER_MODE_REL); 405 406 /* No more waiters a different mutex could be used */ 407 if (atomic_dec_and_test(&cvp->cv_waiters)) { 408 /* 409 * This is set without any lock, so it's racy. But this is 410 * just for debug anyway, so make it best-effort 411 */ 412 cvp->cv_mutex = NULL; 413 wake_up(&cvp->cv_destroy); 414 } 415 416 finish_wait(&cvp->cv_event, &wait); 417 atomic_dec(&cvp->cv_refs); 418 419 mutex_enter(mp); 420 return (rc == -EINTR ? 1 : -1); 421 } 422 423 /* 424 * Compatibility wrapper for the cv_timedwait_hires() Illumos interface. 425 */ 426 static int 427 cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 428 hrtime_t res, int flag, int state) 429 { 430 if (!(flag & CALLOUT_FLAG_ABSOLUTE)) 431 tim += gethrtime(); 432 433 return (__cv_timedwait_hires(cvp, mp, tim, res, state)); 434 } 435 436 int 437 cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res, 438 int flag) 439 { 440 return (cv_timedwait_hires_common(cvp, mp, tim, res, flag, 441 TASK_UNINTERRUPTIBLE)); 442 } 443 EXPORT_SYMBOL(cv_timedwait_hires); 444 445 int 446 cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 447 hrtime_t res, int flag) 448 { 449 int rc; 450 451 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 452 TASK_INTERRUPTIBLE); 453 return (signal_pending(current) ? 0 : rc); 454 } 455 EXPORT_SYMBOL(cv_timedwait_sig_hires); 456 457 int 458 cv_timedwait_idle_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, 459 hrtime_t res, int flag) 460 { 461 sigset_t blocked, saved; 462 int rc; 463 464 sigfillset(&blocked); 465 (void) sigprocmask(SIG_BLOCK, &blocked, &saved); 466 rc = cv_timedwait_hires_common(cvp, mp, tim, res, flag, 467 TASK_INTERRUPTIBLE); 468 (void) sigprocmask(SIG_SETMASK, &saved, NULL); 469 470 return (rc); 471 } 472 EXPORT_SYMBOL(cv_timedwait_idle_hires); 473 474 void 475 __cv_signal(kcondvar_t *cvp) 476 { 477 ASSERT(cvp); 478 ASSERT(cvp->cv_magic == CV_MAGIC); 479 atomic_inc(&cvp->cv_refs); 480 481 /* 482 * All waiters are added with WQ_FLAG_EXCLUSIVE so only one 483 * waiter will be set runnable with each call to wake_up(). 484 * Additionally wake_up() holds a spin_lock associated with 485 * the wait queue to ensure we don't race waking up processes. 486 */ 487 if (atomic_read(&cvp->cv_waiters) > 0) 488 wake_up(&cvp->cv_event); 489 490 atomic_dec(&cvp->cv_refs); 491 } 492 EXPORT_SYMBOL(__cv_signal); 493 494 void 495 __cv_broadcast(kcondvar_t *cvp) 496 { 497 ASSERT(cvp); 498 ASSERT(cvp->cv_magic == CV_MAGIC); 499 atomic_inc(&cvp->cv_refs); 500 501 /* 502 * Wake_up_all() will wake up all waiters even those which 503 * have the WQ_FLAG_EXCLUSIVE flag set. 504 */ 505 if (atomic_read(&cvp->cv_waiters) > 0) 506 wake_up_all(&cvp->cv_event); 507 508 atomic_dec(&cvp->cv_refs); 509 } 510 EXPORT_SYMBOL(__cv_broadcast); 511