1 /* 2 * Generic waiting primitives. 3 * 4 * (C) 2004 Nadia Yvette Chambers, Oracle 5 */ 6 #include <linux/init.h> 7 #include <linux/export.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/wait.h> 11 #include <linux/hash.h> 12 13 void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key) 14 { 15 spin_lock_init(&q->lock); 16 lockdep_set_class_and_name(&q->lock, key, name); 17 INIT_LIST_HEAD(&q->task_list); 18 } 19 20 EXPORT_SYMBOL(__init_waitqueue_head); 21 22 void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 23 { 24 unsigned long flags; 25 26 wait->flags &= ~WQ_FLAG_EXCLUSIVE; 27 spin_lock_irqsave(&q->lock, flags); 28 __add_wait_queue(q, wait); 29 spin_unlock_irqrestore(&q->lock, flags); 30 } 31 EXPORT_SYMBOL(add_wait_queue); 32 33 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) 34 { 35 unsigned long flags; 36 37 wait->flags |= WQ_FLAG_EXCLUSIVE; 38 spin_lock_irqsave(&q->lock, flags); 39 __add_wait_queue_tail(q, wait); 40 spin_unlock_irqrestore(&q->lock, flags); 41 } 42 EXPORT_SYMBOL(add_wait_queue_exclusive); 43 44 void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 45 { 46 unsigned long flags; 47 48 spin_lock_irqsave(&q->lock, flags); 49 __remove_wait_queue(q, wait); 50 spin_unlock_irqrestore(&q->lock, flags); 51 } 52 EXPORT_SYMBOL(remove_wait_queue); 53 54 55 /* 56 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just 57 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve 58 * number) then we wake all the non-exclusive tasks and one exclusive task. 59 * 60 * There are circumstances in which we can try to wake a task which has already 61 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 62 * zero in this (rare) case, and we handle it by continuing to scan the queue. 63 */ 64 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 65 int nr_exclusive, int wake_flags, void *key) 66 { 67 wait_queue_t *curr, *next; 68 69 list_for_each_entry_safe(curr, next, &q->task_list, task_list) { 70 unsigned flags = curr->flags; 71 72 if (curr->func(curr, mode, wake_flags, key) && 73 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) 74 break; 75 } 76 } 77 78 /** 79 * __wake_up - wake up threads blocked on a waitqueue. 80 * @q: the waitqueue 81 * @mode: which threads 82 * @nr_exclusive: how many wake-one or wake-many threads to wake up 83 * @key: is directly passed to the wakeup function 84 * 85 * It may be assumed that this function implies a write memory barrier before 86 * changing the task state if and only if any tasks are woken up. 87 */ 88 void __wake_up(wait_queue_head_t *q, unsigned int mode, 89 int nr_exclusive, void *key) 90 { 91 unsigned long flags; 92 93 spin_lock_irqsave(&q->lock, flags); 94 __wake_up_common(q, mode, nr_exclusive, 0, key); 95 spin_unlock_irqrestore(&q->lock, flags); 96 } 97 EXPORT_SYMBOL(__wake_up); 98 99 /* 100 * Same as __wake_up but called with the spinlock in wait_queue_head_t held. 101 */ 102 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) 103 { 104 __wake_up_common(q, mode, nr, 0, NULL); 105 } 106 EXPORT_SYMBOL_GPL(__wake_up_locked); 107 108 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) 109 { 110 __wake_up_common(q, mode, 1, 0, key); 111 } 112 EXPORT_SYMBOL_GPL(__wake_up_locked_key); 113 114 /** 115 * __wake_up_sync_key - wake up threads blocked on a waitqueue. 116 * @q: the waitqueue 117 * @mode: which threads 118 * @nr_exclusive: how many wake-one or wake-many threads to wake up 119 * @key: opaque value to be passed to wakeup targets 120 * 121 * The sync wakeup differs that the waker knows that it will schedule 122 * away soon, so while the target thread will be woken up, it will not 123 * be migrated to another CPU - ie. the two threads are 'synchronized' 124 * with each other. This can prevent needless bouncing between CPUs. 125 * 126 * On UP it can prevent extra preemption. 127 * 128 * It may be assumed that this function implies a write memory barrier before 129 * changing the task state if and only if any tasks are woken up. 130 */ 131 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, 132 int nr_exclusive, void *key) 133 { 134 unsigned long flags; 135 int wake_flags = 1; /* XXX WF_SYNC */ 136 137 if (unlikely(!q)) 138 return; 139 140 if (unlikely(nr_exclusive != 1)) 141 wake_flags = 0; 142 143 spin_lock_irqsave(&q->lock, flags); 144 __wake_up_common(q, mode, nr_exclusive, wake_flags, key); 145 spin_unlock_irqrestore(&q->lock, flags); 146 } 147 EXPORT_SYMBOL_GPL(__wake_up_sync_key); 148 149 /* 150 * __wake_up_sync - see __wake_up_sync_key() 151 */ 152 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) 153 { 154 __wake_up_sync_key(q, mode, nr_exclusive, NULL); 155 } 156 EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ 157 158 /* 159 * Note: we use "set_current_state()" _after_ the wait-queue add, 160 * because we need a memory barrier there on SMP, so that any 161 * wake-function that tests for the wait-queue being active 162 * will be guaranteed to see waitqueue addition _or_ subsequent 163 * tests in this thread will see the wakeup having taken place. 164 * 165 * The spin_unlock() itself is semi-permeable and only protects 166 * one way (it only protects stuff inside the critical region and 167 * stops them from bleeding out - it would still allow subsequent 168 * loads to move into the critical region). 169 */ 170 void 171 prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) 172 { 173 unsigned long flags; 174 175 wait->flags &= ~WQ_FLAG_EXCLUSIVE; 176 spin_lock_irqsave(&q->lock, flags); 177 if (list_empty(&wait->task_list)) 178 __add_wait_queue(q, wait); 179 set_current_state(state); 180 spin_unlock_irqrestore(&q->lock, flags); 181 } 182 EXPORT_SYMBOL(prepare_to_wait); 183 184 void 185 prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) 186 { 187 unsigned long flags; 188 189 wait->flags |= WQ_FLAG_EXCLUSIVE; 190 spin_lock_irqsave(&q->lock, flags); 191 if (list_empty(&wait->task_list)) 192 __add_wait_queue_tail(q, wait); 193 set_current_state(state); 194 spin_unlock_irqrestore(&q->lock, flags); 195 } 196 EXPORT_SYMBOL(prepare_to_wait_exclusive); 197 198 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) 199 { 200 unsigned long flags; 201 202 if (signal_pending_state(state, current)) 203 return -ERESTARTSYS; 204 205 wait->private = current; 206 wait->func = autoremove_wake_function; 207 208 spin_lock_irqsave(&q->lock, flags); 209 if (list_empty(&wait->task_list)) { 210 if (wait->flags & WQ_FLAG_EXCLUSIVE) 211 __add_wait_queue_tail(q, wait); 212 else 213 __add_wait_queue(q, wait); 214 } 215 set_current_state(state); 216 spin_unlock_irqrestore(&q->lock, flags); 217 218 return 0; 219 } 220 EXPORT_SYMBOL(prepare_to_wait_event); 221 222 /** 223 * finish_wait - clean up after waiting in a queue 224 * @q: waitqueue waited on 225 * @wait: wait descriptor 226 * 227 * Sets current thread back to running state and removes 228 * the wait descriptor from the given waitqueue if still 229 * queued. 230 */ 231 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) 232 { 233 unsigned long flags; 234 235 __set_current_state(TASK_RUNNING); 236 /* 237 * We can check for list emptiness outside the lock 238 * IFF: 239 * - we use the "careful" check that verifies both 240 * the next and prev pointers, so that there cannot 241 * be any half-pending updates in progress on other 242 * CPU's that we haven't seen yet (and that might 243 * still change the stack area. 244 * and 245 * - all other users take the lock (ie we can only 246 * have _one_ other CPU that looks at or modifies 247 * the list). 248 */ 249 if (!list_empty_careful(&wait->task_list)) { 250 spin_lock_irqsave(&q->lock, flags); 251 list_del_init(&wait->task_list); 252 spin_unlock_irqrestore(&q->lock, flags); 253 } 254 } 255 EXPORT_SYMBOL(finish_wait); 256 257 /** 258 * abort_exclusive_wait - abort exclusive waiting in a queue 259 * @q: waitqueue waited on 260 * @wait: wait descriptor 261 * @mode: runstate of the waiter to be woken 262 * @key: key to identify a wait bit queue or %NULL 263 * 264 * Sets current thread back to running state and removes 265 * the wait descriptor from the given waitqueue if still 266 * queued. 267 * 268 * Wakes up the next waiter if the caller is concurrently 269 * woken up through the queue. 270 * 271 * This prevents waiter starvation where an exclusive waiter 272 * aborts and is woken up concurrently and no one wakes up 273 * the next waiter. 274 */ 275 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, 276 unsigned int mode, void *key) 277 { 278 unsigned long flags; 279 280 __set_current_state(TASK_RUNNING); 281 spin_lock_irqsave(&q->lock, flags); 282 if (!list_empty(&wait->task_list)) 283 list_del_init(&wait->task_list); 284 else if (waitqueue_active(q)) 285 __wake_up_locked_key(q, mode, key); 286 spin_unlock_irqrestore(&q->lock, flags); 287 } 288 EXPORT_SYMBOL(abort_exclusive_wait); 289 290 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) 291 { 292 int ret = default_wake_function(wait, mode, sync, key); 293 294 if (ret) 295 list_del_init(&wait->task_list); 296 return ret; 297 } 298 EXPORT_SYMBOL(autoremove_wake_function); 299 300 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) 301 { 302 struct wait_bit_key *key = arg; 303 struct wait_bit_queue *wait_bit 304 = container_of(wait, struct wait_bit_queue, wait); 305 306 if (wait_bit->key.flags != key->flags || 307 wait_bit->key.bit_nr != key->bit_nr || 308 test_bit(key->bit_nr, key->flags)) 309 return 0; 310 else 311 return autoremove_wake_function(wait, mode, sync, key); 312 } 313 EXPORT_SYMBOL(wake_bit_function); 314 315 /* 316 * To allow interruptible waiting and asynchronous (i.e. nonblocking) 317 * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are 318 * permitted return codes. Nonzero return codes halt waiting and return. 319 */ 320 int __sched 321 __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, 322 wait_bit_action_f *action, unsigned mode) 323 { 324 int ret = 0; 325 326 do { 327 prepare_to_wait(wq, &q->wait, mode); 328 if (test_bit(q->key.bit_nr, q->key.flags)) 329 ret = (*action)(&q->key); 330 } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); 331 finish_wait(wq, &q->wait); 332 return ret; 333 } 334 EXPORT_SYMBOL(__wait_on_bit); 335 336 int __sched out_of_line_wait_on_bit(void *word, int bit, 337 wait_bit_action_f *action, unsigned mode) 338 { 339 wait_queue_head_t *wq = bit_waitqueue(word, bit); 340 DEFINE_WAIT_BIT(wait, word, bit); 341 342 return __wait_on_bit(wq, &wait, action, mode); 343 } 344 EXPORT_SYMBOL(out_of_line_wait_on_bit); 345 346 int __sched 347 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, 348 wait_bit_action_f *action, unsigned mode) 349 { 350 do { 351 int ret; 352 353 prepare_to_wait_exclusive(wq, &q->wait, mode); 354 if (!test_bit(q->key.bit_nr, q->key.flags)) 355 continue; 356 ret = action(&q->key); 357 if (!ret) 358 continue; 359 abort_exclusive_wait(wq, &q->wait, mode, &q->key); 360 return ret; 361 } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); 362 finish_wait(wq, &q->wait); 363 return 0; 364 } 365 EXPORT_SYMBOL(__wait_on_bit_lock); 366 367 int __sched out_of_line_wait_on_bit_lock(void *word, int bit, 368 wait_bit_action_f *action, unsigned mode) 369 { 370 wait_queue_head_t *wq = bit_waitqueue(word, bit); 371 DEFINE_WAIT_BIT(wait, word, bit); 372 373 return __wait_on_bit_lock(wq, &wait, action, mode); 374 } 375 EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); 376 377 void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) 378 { 379 struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); 380 if (waitqueue_active(wq)) 381 __wake_up(wq, TASK_NORMAL, 1, &key); 382 } 383 EXPORT_SYMBOL(__wake_up_bit); 384 385 /** 386 * wake_up_bit - wake up a waiter on a bit 387 * @word: the word being waited on, a kernel virtual address 388 * @bit: the bit of the word being waited on 389 * 390 * There is a standard hashed waitqueue table for generic use. This 391 * is the part of the hashtable's accessor API that wakes up waiters 392 * on a bit. For instance, if one were to have waiters on a bitflag, 393 * one would call wake_up_bit() after clearing the bit. 394 * 395 * In order for this to function properly, as it uses waitqueue_active() 396 * internally, some kind of memory barrier must be done prior to calling 397 * this. Typically, this will be smp_mb__after_atomic(), but in some 398 * cases where bitflags are manipulated non-atomically under a lock, one 399 * may need to use a less regular barrier, such fs/inode.c's smp_mb(), 400 * because spin_unlock() does not guarantee a memory barrier. 401 */ 402 void wake_up_bit(void *word, int bit) 403 { 404 __wake_up_bit(bit_waitqueue(word, bit), word, bit); 405 } 406 EXPORT_SYMBOL(wake_up_bit); 407 408 wait_queue_head_t *bit_waitqueue(void *word, int bit) 409 { 410 const int shift = BITS_PER_LONG == 32 ? 5 : 6; 411 const struct zone *zone = page_zone(virt_to_page(word)); 412 unsigned long val = (unsigned long)word << shift | bit; 413 414 return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; 415 } 416 EXPORT_SYMBOL(bit_waitqueue); 417 418 /* 419 * Manipulate the atomic_t address to produce a better bit waitqueue table hash 420 * index (we're keying off bit -1, but that would produce a horrible hash 421 * value). 422 */ 423 static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) 424 { 425 if (BITS_PER_LONG == 64) { 426 unsigned long q = (unsigned long)p; 427 return bit_waitqueue((void *)(q & ~1), q & 1); 428 } 429 return bit_waitqueue(p, 0); 430 } 431 432 static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync, 433 void *arg) 434 { 435 struct wait_bit_key *key = arg; 436 struct wait_bit_queue *wait_bit 437 = container_of(wait, struct wait_bit_queue, wait); 438 atomic_t *val = key->flags; 439 440 if (wait_bit->key.flags != key->flags || 441 wait_bit->key.bit_nr != key->bit_nr || 442 atomic_read(val) != 0) 443 return 0; 444 return autoremove_wake_function(wait, mode, sync, key); 445 } 446 447 /* 448 * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting, 449 * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero 450 * return codes halt waiting and return. 451 */ 452 static __sched 453 int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, 454 int (*action)(atomic_t *), unsigned mode) 455 { 456 atomic_t *val; 457 int ret = 0; 458 459 do { 460 prepare_to_wait(wq, &q->wait, mode); 461 val = q->key.flags; 462 if (atomic_read(val) == 0) 463 break; 464 ret = (*action)(val); 465 } while (!ret && atomic_read(val) != 0); 466 finish_wait(wq, &q->wait); 467 return ret; 468 } 469 470 #define DEFINE_WAIT_ATOMIC_T(name, p) \ 471 struct wait_bit_queue name = { \ 472 .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ 473 .wait = { \ 474 .private = current, \ 475 .func = wake_atomic_t_function, \ 476 .task_list = \ 477 LIST_HEAD_INIT((name).wait.task_list), \ 478 }, \ 479 } 480 481 __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), 482 unsigned mode) 483 { 484 wait_queue_head_t *wq = atomic_t_waitqueue(p); 485 DEFINE_WAIT_ATOMIC_T(wait, p); 486 487 return __wait_on_atomic_t(wq, &wait, action, mode); 488 } 489 EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); 490 491 /** 492 * wake_up_atomic_t - Wake up a waiter on a atomic_t 493 * @p: The atomic_t being waited on, a kernel virtual address 494 * 495 * Wake up anyone waiting for the atomic_t to go to zero. 496 * 497 * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t 498 * check is done by the waiter's wake function, not the by the waker itself). 499 */ 500 void wake_up_atomic_t(atomic_t *p) 501 { 502 __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); 503 } 504 EXPORT_SYMBOL(wake_up_atomic_t); 505 506 __sched int bit_wait(struct wait_bit_key *word) 507 { 508 if (signal_pending_state(current->state, current)) 509 return 1; 510 schedule(); 511 return 0; 512 } 513 EXPORT_SYMBOL(bit_wait); 514 515 __sched int bit_wait_io(struct wait_bit_key *word) 516 { 517 if (signal_pending_state(current->state, current)) 518 return 1; 519 io_schedule(); 520 return 0; 521 } 522 EXPORT_SYMBOL(bit_wait_io); 523