1 2 #include <linux/wait.h> 3 #include <linux/backing-dev.h> 4 #include <linux/kthread.h> 5 #include <linux/freezer.h> 6 #include <linux/fs.h> 7 #include <linux/pagemap.h> 8 #include <linux/mm.h> 9 #include <linux/sched.h> 10 #include <linux/module.h> 11 #include <linux/writeback.h> 12 #include <linux/device.h> 13 #include <trace/events/writeback.h> 14 15 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 16 17 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 18 { 19 } 20 EXPORT_SYMBOL(default_unplug_io_fn); 21 22 struct backing_dev_info default_backing_dev_info = { 23 .name = "default", 24 .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, 25 .state = 0, 26 .capabilities = BDI_CAP_MAP_COPY, 27 .unplug_io_fn = default_unplug_io_fn, 28 }; 29 EXPORT_SYMBOL_GPL(default_backing_dev_info); 30 31 struct backing_dev_info noop_backing_dev_info = { 32 .name = "noop", 33 }; 34 EXPORT_SYMBOL_GPL(noop_backing_dev_info); 35 36 static struct class *bdi_class; 37 38 /* 39 * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as 40 * reader side protection for bdi_pending_list. bdi_list has RCU reader side 41 * locking. 42 */ 43 DEFINE_SPINLOCK(bdi_lock); 44 LIST_HEAD(bdi_list); 45 LIST_HEAD(bdi_pending_list); 46 47 static struct task_struct *sync_supers_tsk; 48 static struct timer_list sync_supers_timer; 49 50 static int bdi_sync_supers(void *); 51 static void sync_supers_timer_fn(unsigned long); 52 53 #ifdef CONFIG_DEBUG_FS 54 #include <linux/debugfs.h> 55 #include <linux/seq_file.h> 56 57 static struct dentry *bdi_debug_root; 58 59 static void bdi_debug_init(void) 60 { 61 bdi_debug_root = debugfs_create_dir("bdi", NULL); 62 } 63 64 static int bdi_debug_stats_show(struct seq_file *m, void *v) 65 { 66 struct backing_dev_info *bdi = m->private; 67 struct bdi_writeback *wb = &bdi->wb; 68 unsigned long background_thresh; 69 unsigned long dirty_thresh; 70 unsigned long bdi_thresh; 71 unsigned long nr_dirty, nr_io, nr_more_io, nr_wb; 72 struct inode *inode; 73 74 nr_wb = nr_dirty = nr_io = nr_more_io = 0; 75 spin_lock(&inode_lock); 76 list_for_each_entry(inode, &wb->b_dirty, i_list) 77 nr_dirty++; 78 list_for_each_entry(inode, &wb->b_io, i_list) 79 nr_io++; 80 list_for_each_entry(inode, &wb->b_more_io, i_list) 81 nr_more_io++; 82 spin_unlock(&inode_lock); 83 84 global_dirty_limits(&background_thresh, &dirty_thresh); 85 bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); 86 87 #define K(x) ((x) << (PAGE_SHIFT - 10)) 88 seq_printf(m, 89 "BdiWriteback: %8lu kB\n" 90 "BdiReclaimable: %8lu kB\n" 91 "BdiDirtyThresh: %8lu kB\n" 92 "DirtyThresh: %8lu kB\n" 93 "BackgroundThresh: %8lu kB\n" 94 "b_dirty: %8lu\n" 95 "b_io: %8lu\n" 96 "b_more_io: %8lu\n" 97 "bdi_list: %8u\n" 98 "state: %8lx\n", 99 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), 100 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), 101 K(bdi_thresh), K(dirty_thresh), 102 K(background_thresh), nr_dirty, nr_io, nr_more_io, 103 !list_empty(&bdi->bdi_list), bdi->state); 104 #undef K 105 106 return 0; 107 } 108 109 static int bdi_debug_stats_open(struct inode *inode, struct file *file) 110 { 111 return single_open(file, bdi_debug_stats_show, inode->i_private); 112 } 113 114 static const struct file_operations bdi_debug_stats_fops = { 115 .open = bdi_debug_stats_open, 116 .read = seq_read, 117 .llseek = seq_lseek, 118 .release = single_release, 119 }; 120 121 static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) 122 { 123 bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); 124 bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, 125 bdi, &bdi_debug_stats_fops); 126 } 127 128 static void bdi_debug_unregister(struct backing_dev_info *bdi) 129 { 130 debugfs_remove(bdi->debug_stats); 131 debugfs_remove(bdi->debug_dir); 132 } 133 #else 134 static inline void bdi_debug_init(void) 135 { 136 } 137 static inline void bdi_debug_register(struct backing_dev_info *bdi, 138 const char *name) 139 { 140 } 141 static inline void bdi_debug_unregister(struct backing_dev_info *bdi) 142 { 143 } 144 #endif 145 146 static ssize_t read_ahead_kb_store(struct device *dev, 147 struct device_attribute *attr, 148 const char *buf, size_t count) 149 { 150 struct backing_dev_info *bdi = dev_get_drvdata(dev); 151 char *end; 152 unsigned long read_ahead_kb; 153 ssize_t ret = -EINVAL; 154 155 read_ahead_kb = simple_strtoul(buf, &end, 10); 156 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { 157 bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10); 158 ret = count; 159 } 160 return ret; 161 } 162 163 #define K(pages) ((pages) << (PAGE_SHIFT - 10)) 164 165 #define BDI_SHOW(name, expr) \ 166 static ssize_t name##_show(struct device *dev, \ 167 struct device_attribute *attr, char *page) \ 168 { \ 169 struct backing_dev_info *bdi = dev_get_drvdata(dev); \ 170 \ 171 return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ 172 } 173 174 BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) 175 176 static ssize_t min_ratio_store(struct device *dev, 177 struct device_attribute *attr, const char *buf, size_t count) 178 { 179 struct backing_dev_info *bdi = dev_get_drvdata(dev); 180 char *end; 181 unsigned int ratio; 182 ssize_t ret = -EINVAL; 183 184 ratio = simple_strtoul(buf, &end, 10); 185 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { 186 ret = bdi_set_min_ratio(bdi, ratio); 187 if (!ret) 188 ret = count; 189 } 190 return ret; 191 } 192 BDI_SHOW(min_ratio, bdi->min_ratio) 193 194 static ssize_t max_ratio_store(struct device *dev, 195 struct device_attribute *attr, const char *buf, size_t count) 196 { 197 struct backing_dev_info *bdi = dev_get_drvdata(dev); 198 char *end; 199 unsigned int ratio; 200 ssize_t ret = -EINVAL; 201 202 ratio = simple_strtoul(buf, &end, 10); 203 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { 204 ret = bdi_set_max_ratio(bdi, ratio); 205 if (!ret) 206 ret = count; 207 } 208 return ret; 209 } 210 BDI_SHOW(max_ratio, bdi->max_ratio) 211 212 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) 213 214 static struct device_attribute bdi_dev_attrs[] = { 215 __ATTR_RW(read_ahead_kb), 216 __ATTR_RW(min_ratio), 217 __ATTR_RW(max_ratio), 218 __ATTR_NULL, 219 }; 220 221 static __init int bdi_class_init(void) 222 { 223 bdi_class = class_create(THIS_MODULE, "bdi"); 224 if (IS_ERR(bdi_class)) 225 return PTR_ERR(bdi_class); 226 227 bdi_class->dev_attrs = bdi_dev_attrs; 228 bdi_debug_init(); 229 return 0; 230 } 231 postcore_initcall(bdi_class_init); 232 233 static int __init default_bdi_init(void) 234 { 235 int err; 236 237 sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers"); 238 BUG_ON(IS_ERR(sync_supers_tsk)); 239 240 setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); 241 bdi_arm_supers_timer(); 242 243 err = bdi_init(&default_backing_dev_info); 244 if (!err) 245 bdi_register(&default_backing_dev_info, NULL, "default"); 246 247 return err; 248 } 249 subsys_initcall(default_bdi_init); 250 251 int bdi_has_dirty_io(struct backing_dev_info *bdi) 252 { 253 return wb_has_dirty_io(&bdi->wb); 254 } 255 256 static void bdi_flush_io(struct backing_dev_info *bdi) 257 { 258 struct writeback_control wbc = { 259 .sync_mode = WB_SYNC_NONE, 260 .older_than_this = NULL, 261 .range_cyclic = 1, 262 .nr_to_write = 1024, 263 }; 264 265 writeback_inodes_wb(&bdi->wb, &wbc); 266 } 267 268 /* 269 * kupdated() used to do this. We cannot do it from the bdi_forker_thread() 270 * or we risk deadlocking on ->s_umount. The longer term solution would be 271 * to implement sync_supers_bdi() or similar and simply do it from the 272 * bdi writeback thread individually. 273 */ 274 static int bdi_sync_supers(void *unused) 275 { 276 set_user_nice(current, 0); 277 278 while (!kthread_should_stop()) { 279 set_current_state(TASK_INTERRUPTIBLE); 280 schedule(); 281 282 /* 283 * Do this periodically, like kupdated() did before. 284 */ 285 sync_supers(); 286 } 287 288 return 0; 289 } 290 291 void bdi_arm_supers_timer(void) 292 { 293 unsigned long next; 294 295 if (!dirty_writeback_interval) 296 return; 297 298 next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; 299 mod_timer(&sync_supers_timer, round_jiffies_up(next)); 300 } 301 302 static void sync_supers_timer_fn(unsigned long unused) 303 { 304 wake_up_process(sync_supers_tsk); 305 bdi_arm_supers_timer(); 306 } 307 308 static void wakeup_timer_fn(unsigned long data) 309 { 310 struct backing_dev_info *bdi = (struct backing_dev_info *)data; 311 312 spin_lock_bh(&bdi->wb_lock); 313 if (bdi->wb.task) { 314 trace_writeback_wake_thread(bdi); 315 wake_up_process(bdi->wb.task); 316 } else { 317 /* 318 * When bdi tasks are inactive for long time, they are killed. 319 * In this case we have to wake-up the forker thread which 320 * should create and run the bdi thread. 321 */ 322 trace_writeback_wake_forker_thread(bdi); 323 wake_up_process(default_backing_dev_info.wb.task); 324 } 325 spin_unlock_bh(&bdi->wb_lock); 326 } 327 328 /* 329 * This function is used when the first inode for this bdi is marked dirty. It 330 * wakes-up the corresponding bdi thread which should then take care of the 331 * periodic background write-out of dirty inodes. Since the write-out would 332 * starts only 'dirty_writeback_interval' centisecs from now anyway, we just 333 * set up a timer which wakes the bdi thread up later. 334 * 335 * Note, we wouldn't bother setting up the timer, but this function is on the 336 * fast-path (used by '__mark_inode_dirty()'), so we save few context switches 337 * by delaying the wake-up. 338 */ 339 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) 340 { 341 unsigned long timeout; 342 343 timeout = msecs_to_jiffies(dirty_writeback_interval * 10); 344 mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); 345 } 346 347 /* 348 * Calculate the longest interval (jiffies) bdi threads are allowed to be 349 * inactive. 350 */ 351 static unsigned long bdi_longest_inactive(void) 352 { 353 unsigned long interval; 354 355 interval = msecs_to_jiffies(dirty_writeback_interval * 10); 356 return max(5UL * 60 * HZ, interval); 357 } 358 359 static int bdi_forker_thread(void *ptr) 360 { 361 struct bdi_writeback *me = ptr; 362 363 current->flags |= PF_FLUSHER | PF_SWAPWRITE; 364 set_freezable(); 365 366 /* 367 * Our parent may run at a different priority, just set us to normal 368 */ 369 set_user_nice(current, 0); 370 371 for (;;) { 372 struct task_struct *task = NULL; 373 struct backing_dev_info *bdi; 374 enum { 375 NO_ACTION, /* Nothing to do */ 376 FORK_THREAD, /* Fork bdi thread */ 377 KILL_THREAD, /* Kill inactive bdi thread */ 378 } action = NO_ACTION; 379 380 /* 381 * Temporary measure, we want to make sure we don't see 382 * dirty data on the default backing_dev_info 383 */ 384 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { 385 del_timer(&me->wakeup_timer); 386 wb_do_writeback(me, 0); 387 } 388 389 spin_lock_bh(&bdi_lock); 390 set_current_state(TASK_INTERRUPTIBLE); 391 392 list_for_each_entry(bdi, &bdi_list, bdi_list) { 393 bool have_dirty_io; 394 395 if (!bdi_cap_writeback_dirty(bdi) || 396 bdi_cap_flush_forker(bdi)) 397 continue; 398 399 WARN(!test_bit(BDI_registered, &bdi->state), 400 "bdi %p/%s is not registered!\n", bdi, bdi->name); 401 402 have_dirty_io = !list_empty(&bdi->work_list) || 403 wb_has_dirty_io(&bdi->wb); 404 405 /* 406 * If the bdi has work to do, but the thread does not 407 * exist - create it. 408 */ 409 if (!bdi->wb.task && have_dirty_io) { 410 /* 411 * Set the pending bit - if someone will try to 412 * unregister this bdi - it'll wait on this bit. 413 */ 414 set_bit(BDI_pending, &bdi->state); 415 action = FORK_THREAD; 416 break; 417 } 418 419 spin_lock(&bdi->wb_lock); 420 421 /* 422 * If there is no work to do and the bdi thread was 423 * inactive long enough - kill it. The wb_lock is taken 424 * to make sure no-one adds more work to this bdi and 425 * wakes the bdi thread up. 426 */ 427 if (bdi->wb.task && !have_dirty_io && 428 time_after(jiffies, bdi->wb.last_active + 429 bdi_longest_inactive())) { 430 task = bdi->wb.task; 431 bdi->wb.task = NULL; 432 spin_unlock(&bdi->wb_lock); 433 set_bit(BDI_pending, &bdi->state); 434 action = KILL_THREAD; 435 break; 436 } 437 spin_unlock(&bdi->wb_lock); 438 } 439 spin_unlock_bh(&bdi_lock); 440 441 /* Keep working if default bdi still has things to do */ 442 if (!list_empty(&me->bdi->work_list)) 443 __set_current_state(TASK_RUNNING); 444 445 switch (action) { 446 case FORK_THREAD: 447 __set_current_state(TASK_RUNNING); 448 task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s", 449 dev_name(bdi->dev)); 450 if (IS_ERR(task)) { 451 /* 452 * If thread creation fails, force writeout of 453 * the bdi from the thread. 454 */ 455 bdi_flush_io(bdi); 456 } else { 457 /* 458 * The spinlock makes sure we do not lose 459 * wake-ups when racing with 'bdi_queue_work()'. 460 */ 461 spin_lock_bh(&bdi->wb_lock); 462 bdi->wb.task = task; 463 spin_unlock_bh(&bdi->wb_lock); 464 } 465 break; 466 467 case KILL_THREAD: 468 __set_current_state(TASK_RUNNING); 469 kthread_stop(task); 470 break; 471 472 case NO_ACTION: 473 if (!wb_has_dirty_io(me) || !dirty_writeback_interval) 474 /* 475 * There are no dirty data. The only thing we 476 * should now care about is checking for 477 * inactive bdi threads and killing them. Thus, 478 * let's sleep for longer time, save energy and 479 * be friendly for battery-driven devices. 480 */ 481 schedule_timeout(bdi_longest_inactive()); 482 else 483 schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); 484 try_to_freeze(); 485 /* Back to the main loop */ 486 continue; 487 } 488 489 /* 490 * Clear pending bit and wakeup anybody waiting to tear us down. 491 */ 492 clear_bit(BDI_pending, &bdi->state); 493 smp_mb__after_clear_bit(); 494 wake_up_bit(&bdi->state, BDI_pending); 495 } 496 497 return 0; 498 } 499 500 /* 501 * Remove bdi from bdi_list, and ensure that it is no longer visible 502 */ 503 static void bdi_remove_from_list(struct backing_dev_info *bdi) 504 { 505 spin_lock_bh(&bdi_lock); 506 list_del_rcu(&bdi->bdi_list); 507 spin_unlock_bh(&bdi_lock); 508 509 synchronize_rcu(); 510 } 511 512 int bdi_register(struct backing_dev_info *bdi, struct device *parent, 513 const char *fmt, ...) 514 { 515 va_list args; 516 struct device *dev; 517 518 if (bdi->dev) /* The driver needs to use separate queues per device */ 519 return 0; 520 521 va_start(args, fmt); 522 dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); 523 va_end(args); 524 if (IS_ERR(dev)) 525 return PTR_ERR(dev); 526 527 bdi->dev = dev; 528 529 /* 530 * Just start the forker thread for our default backing_dev_info, 531 * and add other bdi's to the list. They will get a thread created 532 * on-demand when they need it. 533 */ 534 if (bdi_cap_flush_forker(bdi)) { 535 struct bdi_writeback *wb = &bdi->wb; 536 537 wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", 538 dev_name(dev)); 539 if (IS_ERR(wb->task)) 540 return PTR_ERR(wb->task); 541 } 542 543 bdi_debug_register(bdi, dev_name(dev)); 544 set_bit(BDI_registered, &bdi->state); 545 546 spin_lock_bh(&bdi_lock); 547 list_add_tail_rcu(&bdi->bdi_list, &bdi_list); 548 spin_unlock_bh(&bdi_lock); 549 550 trace_writeback_bdi_register(bdi); 551 return 0; 552 } 553 EXPORT_SYMBOL(bdi_register); 554 555 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) 556 { 557 return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); 558 } 559 EXPORT_SYMBOL(bdi_register_dev); 560 561 /* 562 * Remove bdi from the global list and shutdown any threads we have running 563 */ 564 static void bdi_wb_shutdown(struct backing_dev_info *bdi) 565 { 566 if (!bdi_cap_writeback_dirty(bdi)) 567 return; 568 569 /* 570 * Make sure nobody finds us on the bdi_list anymore 571 */ 572 bdi_remove_from_list(bdi); 573 574 /* 575 * If setup is pending, wait for that to complete first 576 */ 577 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, 578 TASK_UNINTERRUPTIBLE); 579 580 /* 581 * Finally, kill the kernel thread. We don't need to be RCU 582 * safe anymore, since the bdi is gone from visibility. Force 583 * unfreeze of the thread before calling kthread_stop(), otherwise 584 * it would never exet if it is currently stuck in the refrigerator. 585 */ 586 if (bdi->wb.task) { 587 thaw_process(bdi->wb.task); 588 kthread_stop(bdi->wb.task); 589 } 590 } 591 592 /* 593 * This bdi is going away now, make sure that no super_blocks point to it 594 */ 595 static void bdi_prune_sb(struct backing_dev_info *bdi) 596 { 597 struct super_block *sb; 598 599 spin_lock(&sb_lock); 600 list_for_each_entry(sb, &super_blocks, s_list) { 601 if (sb->s_bdi == bdi) 602 sb->s_bdi = NULL; 603 } 604 spin_unlock(&sb_lock); 605 } 606 607 void bdi_unregister(struct backing_dev_info *bdi) 608 { 609 if (bdi->dev) { 610 trace_writeback_bdi_unregister(bdi); 611 bdi_prune_sb(bdi); 612 del_timer_sync(&bdi->wb.wakeup_timer); 613 614 if (!bdi_cap_flush_forker(bdi)) 615 bdi_wb_shutdown(bdi); 616 bdi_debug_unregister(bdi); 617 device_unregister(bdi->dev); 618 bdi->dev = NULL; 619 } 620 } 621 EXPORT_SYMBOL(bdi_unregister); 622 623 static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) 624 { 625 memset(wb, 0, sizeof(*wb)); 626 627 wb->bdi = bdi; 628 wb->last_old_flush = jiffies; 629 INIT_LIST_HEAD(&wb->b_dirty); 630 INIT_LIST_HEAD(&wb->b_io); 631 INIT_LIST_HEAD(&wb->b_more_io); 632 setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); 633 } 634 635 int bdi_init(struct backing_dev_info *bdi) 636 { 637 int i, err; 638 639 bdi->dev = NULL; 640 641 bdi->min_ratio = 0; 642 bdi->max_ratio = 100; 643 bdi->max_prop_frac = PROP_FRAC_BASE; 644 spin_lock_init(&bdi->wb_lock); 645 INIT_LIST_HEAD(&bdi->bdi_list); 646 INIT_LIST_HEAD(&bdi->work_list); 647 648 bdi_wb_init(&bdi->wb, bdi); 649 650 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 651 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 652 if (err) 653 goto err; 654 } 655 656 bdi->dirty_exceeded = 0; 657 err = prop_local_init_percpu(&bdi->completions); 658 659 if (err) { 660 err: 661 while (i--) 662 percpu_counter_destroy(&bdi->bdi_stat[i]); 663 } 664 665 return err; 666 } 667 EXPORT_SYMBOL(bdi_init); 668 669 void bdi_destroy(struct backing_dev_info *bdi) 670 { 671 int i; 672 673 /* 674 * Splice our entries to the default_backing_dev_info, if this 675 * bdi disappears 676 */ 677 if (bdi_has_dirty_io(bdi)) { 678 struct bdi_writeback *dst = &default_backing_dev_info.wb; 679 680 spin_lock(&inode_lock); 681 list_splice(&bdi->wb.b_dirty, &dst->b_dirty); 682 list_splice(&bdi->wb.b_io, &dst->b_io); 683 list_splice(&bdi->wb.b_more_io, &dst->b_more_io); 684 spin_unlock(&inode_lock); 685 } 686 687 bdi_unregister(bdi); 688 689 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 690 percpu_counter_destroy(&bdi->bdi_stat[i]); 691 692 prop_local_destroy_percpu(&bdi->completions); 693 } 694 EXPORT_SYMBOL(bdi_destroy); 695 696 /* 697 * For use from filesystems to quickly init and register a bdi associated 698 * with dirty writeback 699 */ 700 int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, 701 unsigned int cap) 702 { 703 char tmp[32]; 704 int err; 705 706 bdi->name = name; 707 bdi->capabilities = cap; 708 err = bdi_init(bdi); 709 if (err) 710 return err; 711 712 sprintf(tmp, "%.28s%s", name, "-%d"); 713 err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq)); 714 if (err) { 715 bdi_destroy(bdi); 716 return err; 717 } 718 719 return 0; 720 } 721 EXPORT_SYMBOL(bdi_setup_and_register); 722 723 static wait_queue_head_t congestion_wqh[2] = { 724 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 725 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 726 }; 727 728 void clear_bdi_congested(struct backing_dev_info *bdi, int sync) 729 { 730 enum bdi_state bit; 731 wait_queue_head_t *wqh = &congestion_wqh[sync]; 732 733 bit = sync ? BDI_sync_congested : BDI_async_congested; 734 clear_bit(bit, &bdi->state); 735 smp_mb__after_clear_bit(); 736 if (waitqueue_active(wqh)) 737 wake_up(wqh); 738 } 739 EXPORT_SYMBOL(clear_bdi_congested); 740 741 void set_bdi_congested(struct backing_dev_info *bdi, int sync) 742 { 743 enum bdi_state bit; 744 745 bit = sync ? BDI_sync_congested : BDI_async_congested; 746 set_bit(bit, &bdi->state); 747 } 748 EXPORT_SYMBOL(set_bdi_congested); 749 750 /** 751 * congestion_wait - wait for a backing_dev to become uncongested 752 * @sync: SYNC or ASYNC IO 753 * @timeout: timeout in jiffies 754 * 755 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit 756 * write congestion. If no backing_devs are congested then just wait for the 757 * next write to be completed. 758 */ 759 long congestion_wait(int sync, long timeout) 760 { 761 long ret; 762 DEFINE_WAIT(wait); 763 wait_queue_head_t *wqh = &congestion_wqh[sync]; 764 765 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); 766 ret = io_schedule_timeout(timeout); 767 finish_wait(wqh, &wait); 768 return ret; 769 } 770 EXPORT_SYMBOL(congestion_wait); 771 772