1 2 #include <linux/wait.h> 3 #include <linux/backing-dev.h> 4 #include <linux/kthread.h> 5 #include <linux/freezer.h> 6 #include <linux/fs.h> 7 #include <linux/pagemap.h> 8 #include <linux/mm.h> 9 #include <linux/sched.h> 10 #include <linux/module.h> 11 #include <linux/writeback.h> 12 #include <linux/device.h> 13 14 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); 15 16 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 17 { 18 } 19 EXPORT_SYMBOL(default_unplug_io_fn); 20 21 struct backing_dev_info default_backing_dev_info = { 22 .name = "default", 23 .ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE, 24 .state = 0, 25 .capabilities = BDI_CAP_MAP_COPY, 26 .unplug_io_fn = default_unplug_io_fn, 27 }; 28 EXPORT_SYMBOL_GPL(default_backing_dev_info); 29 30 struct backing_dev_info noop_backing_dev_info = { 31 .name = "noop", 32 }; 33 EXPORT_SYMBOL_GPL(noop_backing_dev_info); 34 35 static struct class *bdi_class; 36 37 /* 38 * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as 39 * reader side protection for bdi_pending_list. bdi_list has RCU reader side 40 * locking. 41 */ 42 DEFINE_SPINLOCK(bdi_lock); 43 LIST_HEAD(bdi_list); 44 LIST_HEAD(bdi_pending_list); 45 46 static struct task_struct *sync_supers_tsk; 47 static struct timer_list sync_supers_timer; 48 49 static int bdi_sync_supers(void *); 50 static void sync_supers_timer_fn(unsigned long); 51 static void arm_supers_timer(void); 52 53 static void bdi_add_default_flusher_task(struct backing_dev_info *bdi); 54 55 #ifdef CONFIG_DEBUG_FS 56 #include <linux/debugfs.h> 57 #include <linux/seq_file.h> 58 59 static struct dentry *bdi_debug_root; 60 61 static void bdi_debug_init(void) 62 { 63 bdi_debug_root = debugfs_create_dir("bdi", NULL); 64 } 65 66 static int bdi_debug_stats_show(struct seq_file *m, void *v) 67 { 68 struct backing_dev_info *bdi = m->private; 69 struct bdi_writeback *wb; 70 unsigned long background_thresh; 71 unsigned long dirty_thresh; 72 unsigned long bdi_thresh; 73 unsigned long nr_dirty, nr_io, nr_more_io, nr_wb; 74 struct inode *inode; 75 76 /* 77 * inode lock is enough here, the bdi->wb_list is protected by 78 * RCU on the reader side 79 */ 80 nr_wb = nr_dirty = nr_io = nr_more_io = 0; 81 spin_lock(&inode_lock); 82 list_for_each_entry(wb, &bdi->wb_list, list) { 83 nr_wb++; 84 list_for_each_entry(inode, &wb->b_dirty, i_list) 85 nr_dirty++; 86 list_for_each_entry(inode, &wb->b_io, i_list) 87 nr_io++; 88 list_for_each_entry(inode, &wb->b_more_io, i_list) 89 nr_more_io++; 90 } 91 spin_unlock(&inode_lock); 92 93 get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi); 94 95 #define K(x) ((x) << (PAGE_SHIFT - 10)) 96 seq_printf(m, 97 "BdiWriteback: %8lu kB\n" 98 "BdiReclaimable: %8lu kB\n" 99 "BdiDirtyThresh: %8lu kB\n" 100 "DirtyThresh: %8lu kB\n" 101 "BackgroundThresh: %8lu kB\n" 102 "WritebackThreads: %8lu\n" 103 "b_dirty: %8lu\n" 104 "b_io: %8lu\n" 105 "b_more_io: %8lu\n" 106 "bdi_list: %8u\n" 107 "state: %8lx\n" 108 "wb_mask: %8lx\n" 109 "wb_list: %8u\n" 110 "wb_cnt: %8u\n", 111 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), 112 (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), 113 K(bdi_thresh), K(dirty_thresh), 114 K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io, 115 !list_empty(&bdi->bdi_list), bdi->state, bdi->wb_mask, 116 !list_empty(&bdi->wb_list), bdi->wb_cnt); 117 #undef K 118 119 return 0; 120 } 121 122 static int bdi_debug_stats_open(struct inode *inode, struct file *file) 123 { 124 return single_open(file, bdi_debug_stats_show, inode->i_private); 125 } 126 127 static const struct file_operations bdi_debug_stats_fops = { 128 .open = bdi_debug_stats_open, 129 .read = seq_read, 130 .llseek = seq_lseek, 131 .release = single_release, 132 }; 133 134 static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) 135 { 136 bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); 137 bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, 138 bdi, &bdi_debug_stats_fops); 139 } 140 141 static void bdi_debug_unregister(struct backing_dev_info *bdi) 142 { 143 debugfs_remove(bdi->debug_stats); 144 debugfs_remove(bdi->debug_dir); 145 } 146 #else 147 static inline void bdi_debug_init(void) 148 { 149 } 150 static inline void bdi_debug_register(struct backing_dev_info *bdi, 151 const char *name) 152 { 153 } 154 static inline void bdi_debug_unregister(struct backing_dev_info *bdi) 155 { 156 } 157 #endif 158 159 static ssize_t read_ahead_kb_store(struct device *dev, 160 struct device_attribute *attr, 161 const char *buf, size_t count) 162 { 163 struct backing_dev_info *bdi = dev_get_drvdata(dev); 164 char *end; 165 unsigned long read_ahead_kb; 166 ssize_t ret = -EINVAL; 167 168 read_ahead_kb = simple_strtoul(buf, &end, 10); 169 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { 170 bdi->ra_pages = read_ahead_kb >> (PAGE_SHIFT - 10); 171 ret = count; 172 } 173 return ret; 174 } 175 176 #define K(pages) ((pages) << (PAGE_SHIFT - 10)) 177 178 #define BDI_SHOW(name, expr) \ 179 static ssize_t name##_show(struct device *dev, \ 180 struct device_attribute *attr, char *page) \ 181 { \ 182 struct backing_dev_info *bdi = dev_get_drvdata(dev); \ 183 \ 184 return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ 185 } 186 187 BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) 188 189 static ssize_t min_ratio_store(struct device *dev, 190 struct device_attribute *attr, const char *buf, size_t count) 191 { 192 struct backing_dev_info *bdi = dev_get_drvdata(dev); 193 char *end; 194 unsigned int ratio; 195 ssize_t ret = -EINVAL; 196 197 ratio = simple_strtoul(buf, &end, 10); 198 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { 199 ret = bdi_set_min_ratio(bdi, ratio); 200 if (!ret) 201 ret = count; 202 } 203 return ret; 204 } 205 BDI_SHOW(min_ratio, bdi->min_ratio) 206 207 static ssize_t max_ratio_store(struct device *dev, 208 struct device_attribute *attr, const char *buf, size_t count) 209 { 210 struct backing_dev_info *bdi = dev_get_drvdata(dev); 211 char *end; 212 unsigned int ratio; 213 ssize_t ret = -EINVAL; 214 215 ratio = simple_strtoul(buf, &end, 10); 216 if (*buf && (end[0] == '\0' || (end[0] == '\n' && end[1] == '\0'))) { 217 ret = bdi_set_max_ratio(bdi, ratio); 218 if (!ret) 219 ret = count; 220 } 221 return ret; 222 } 223 BDI_SHOW(max_ratio, bdi->max_ratio) 224 225 #define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) 226 227 static struct device_attribute bdi_dev_attrs[] = { 228 __ATTR_RW(read_ahead_kb), 229 __ATTR_RW(min_ratio), 230 __ATTR_RW(max_ratio), 231 __ATTR_NULL, 232 }; 233 234 static __init int bdi_class_init(void) 235 { 236 bdi_class = class_create(THIS_MODULE, "bdi"); 237 if (IS_ERR(bdi_class)) 238 return PTR_ERR(bdi_class); 239 240 bdi_class->dev_attrs = bdi_dev_attrs; 241 bdi_debug_init(); 242 return 0; 243 } 244 postcore_initcall(bdi_class_init); 245 246 static int __init default_bdi_init(void) 247 { 248 int err; 249 250 sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers"); 251 BUG_ON(IS_ERR(sync_supers_tsk)); 252 253 init_timer(&sync_supers_timer); 254 setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0); 255 arm_supers_timer(); 256 257 err = bdi_init(&default_backing_dev_info); 258 if (!err) 259 bdi_register(&default_backing_dev_info, NULL, "default"); 260 261 return err; 262 } 263 subsys_initcall(default_bdi_init); 264 265 static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) 266 { 267 memset(wb, 0, sizeof(*wb)); 268 269 wb->bdi = bdi; 270 wb->last_old_flush = jiffies; 271 INIT_LIST_HEAD(&wb->b_dirty); 272 INIT_LIST_HEAD(&wb->b_io); 273 INIT_LIST_HEAD(&wb->b_more_io); 274 } 275 276 static void bdi_task_init(struct backing_dev_info *bdi, 277 struct bdi_writeback *wb) 278 { 279 struct task_struct *tsk = current; 280 281 spin_lock(&bdi->wb_lock); 282 list_add_tail_rcu(&wb->list, &bdi->wb_list); 283 spin_unlock(&bdi->wb_lock); 284 285 tsk->flags |= PF_FLUSHER | PF_SWAPWRITE; 286 set_freezable(); 287 288 /* 289 * Our parent may run at a different priority, just set us to normal 290 */ 291 set_user_nice(tsk, 0); 292 } 293 294 static int bdi_start_fn(void *ptr) 295 { 296 struct bdi_writeback *wb = ptr; 297 struct backing_dev_info *bdi = wb->bdi; 298 int ret; 299 300 /* 301 * Add us to the active bdi_list 302 */ 303 spin_lock_bh(&bdi_lock); 304 list_add_rcu(&bdi->bdi_list, &bdi_list); 305 spin_unlock_bh(&bdi_lock); 306 307 bdi_task_init(bdi, wb); 308 309 /* 310 * Clear pending bit and wakeup anybody waiting to tear us down 311 */ 312 clear_bit(BDI_pending, &bdi->state); 313 smp_mb__after_clear_bit(); 314 wake_up_bit(&bdi->state, BDI_pending); 315 316 ret = bdi_writeback_task(wb); 317 318 /* 319 * Remove us from the list 320 */ 321 spin_lock(&bdi->wb_lock); 322 list_del_rcu(&wb->list); 323 spin_unlock(&bdi->wb_lock); 324 325 /* 326 * Flush any work that raced with us exiting. No new work 327 * will be added, since this bdi isn't discoverable anymore. 328 */ 329 if (!list_empty(&bdi->work_list)) 330 wb_do_writeback(wb, 1); 331 332 wb->task = NULL; 333 return ret; 334 } 335 336 int bdi_has_dirty_io(struct backing_dev_info *bdi) 337 { 338 return wb_has_dirty_io(&bdi->wb); 339 } 340 341 static void bdi_flush_io(struct backing_dev_info *bdi) 342 { 343 struct writeback_control wbc = { 344 .bdi = bdi, 345 .sync_mode = WB_SYNC_NONE, 346 .older_than_this = NULL, 347 .range_cyclic = 1, 348 .nr_to_write = 1024, 349 }; 350 351 writeback_inodes_wbc(&wbc); 352 } 353 354 /* 355 * kupdated() used to do this. We cannot do it from the bdi_forker_task() 356 * or we risk deadlocking on ->s_umount. The longer term solution would be 357 * to implement sync_supers_bdi() or similar and simply do it from the 358 * bdi writeback tasks individually. 359 */ 360 static int bdi_sync_supers(void *unused) 361 { 362 set_user_nice(current, 0); 363 364 while (!kthread_should_stop()) { 365 set_current_state(TASK_INTERRUPTIBLE); 366 schedule(); 367 368 /* 369 * Do this periodically, like kupdated() did before. 370 */ 371 sync_supers(); 372 } 373 374 return 0; 375 } 376 377 static void arm_supers_timer(void) 378 { 379 unsigned long next; 380 381 next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies; 382 mod_timer(&sync_supers_timer, round_jiffies_up(next)); 383 } 384 385 static void sync_supers_timer_fn(unsigned long unused) 386 { 387 wake_up_process(sync_supers_tsk); 388 arm_supers_timer(); 389 } 390 391 static int bdi_forker_task(void *ptr) 392 { 393 struct bdi_writeback *me = ptr; 394 395 bdi_task_init(me->bdi, me); 396 397 for (;;) { 398 struct backing_dev_info *bdi, *tmp; 399 struct bdi_writeback *wb; 400 401 /* 402 * Temporary measure, we want to make sure we don't see 403 * dirty data on the default backing_dev_info 404 */ 405 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) 406 wb_do_writeback(me, 0); 407 408 spin_lock_bh(&bdi_lock); 409 410 /* 411 * Check if any existing bdi's have dirty data without 412 * a thread registered. If so, set that up. 413 */ 414 list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) { 415 if (bdi->wb.task) 416 continue; 417 if (list_empty(&bdi->work_list) && 418 !bdi_has_dirty_io(bdi)) 419 continue; 420 421 bdi_add_default_flusher_task(bdi); 422 } 423 424 set_current_state(TASK_INTERRUPTIBLE); 425 426 if (list_empty(&bdi_pending_list)) { 427 unsigned long wait; 428 429 spin_unlock_bh(&bdi_lock); 430 wait = msecs_to_jiffies(dirty_writeback_interval * 10); 431 schedule_timeout(wait); 432 try_to_freeze(); 433 continue; 434 } 435 436 __set_current_state(TASK_RUNNING); 437 438 /* 439 * This is our real job - check for pending entries in 440 * bdi_pending_list, and create the tasks that got added 441 */ 442 bdi = list_entry(bdi_pending_list.next, struct backing_dev_info, 443 bdi_list); 444 list_del_init(&bdi->bdi_list); 445 spin_unlock_bh(&bdi_lock); 446 447 wb = &bdi->wb; 448 wb->task = kthread_run(bdi_start_fn, wb, "flush-%s", 449 dev_name(bdi->dev)); 450 /* 451 * If task creation fails, then readd the bdi to 452 * the pending list and force writeout of the bdi 453 * from this forker thread. That will free some memory 454 * and we can try again. 455 */ 456 if (IS_ERR(wb->task)) { 457 wb->task = NULL; 458 459 /* 460 * Add this 'bdi' to the back, so we get 461 * a chance to flush other bdi's to free 462 * memory. 463 */ 464 spin_lock_bh(&bdi_lock); 465 list_add_tail(&bdi->bdi_list, &bdi_pending_list); 466 spin_unlock_bh(&bdi_lock); 467 468 bdi_flush_io(bdi); 469 } 470 } 471 472 return 0; 473 } 474 475 static void bdi_add_to_pending(struct rcu_head *head) 476 { 477 struct backing_dev_info *bdi; 478 479 bdi = container_of(head, struct backing_dev_info, rcu_head); 480 INIT_LIST_HEAD(&bdi->bdi_list); 481 482 spin_lock(&bdi_lock); 483 list_add_tail(&bdi->bdi_list, &bdi_pending_list); 484 spin_unlock(&bdi_lock); 485 486 /* 487 * We are now on the pending list, wake up bdi_forker_task() 488 * to finish the job and add us back to the active bdi_list 489 */ 490 wake_up_process(default_backing_dev_info.wb.task); 491 } 492 493 /* 494 * Add the default flusher task that gets created for any bdi 495 * that has dirty data pending writeout 496 */ 497 void static bdi_add_default_flusher_task(struct backing_dev_info *bdi) 498 { 499 if (!bdi_cap_writeback_dirty(bdi)) 500 return; 501 502 if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) { 503 printk(KERN_ERR "bdi %p/%s is not registered!\n", 504 bdi, bdi->name); 505 return; 506 } 507 508 /* 509 * Check with the helper whether to proceed adding a task. Will only 510 * abort if we two or more simultanous calls to 511 * bdi_add_default_flusher_task() occured, further additions will block 512 * waiting for previous additions to finish. 513 */ 514 if (!test_and_set_bit(BDI_pending, &bdi->state)) { 515 list_del_rcu(&bdi->bdi_list); 516 517 /* 518 * We must wait for the current RCU period to end before 519 * moving to the pending list. So schedule that operation 520 * from an RCU callback. 521 */ 522 call_rcu(&bdi->rcu_head, bdi_add_to_pending); 523 } 524 } 525 526 /* 527 * Remove bdi from bdi_list, and ensure that it is no longer visible 528 */ 529 static void bdi_remove_from_list(struct backing_dev_info *bdi) 530 { 531 spin_lock_bh(&bdi_lock); 532 list_del_rcu(&bdi->bdi_list); 533 spin_unlock_bh(&bdi_lock); 534 535 synchronize_rcu(); 536 } 537 538 int bdi_register(struct backing_dev_info *bdi, struct device *parent, 539 const char *fmt, ...) 540 { 541 va_list args; 542 int ret = 0; 543 struct device *dev; 544 545 if (bdi->dev) /* The driver needs to use separate queues per device */ 546 goto exit; 547 548 va_start(args, fmt); 549 dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args); 550 va_end(args); 551 if (IS_ERR(dev)) { 552 ret = PTR_ERR(dev); 553 goto exit; 554 } 555 556 spin_lock_bh(&bdi_lock); 557 list_add_tail_rcu(&bdi->bdi_list, &bdi_list); 558 spin_unlock_bh(&bdi_lock); 559 560 bdi->dev = dev; 561 562 /* 563 * Just start the forker thread for our default backing_dev_info, 564 * and add other bdi's to the list. They will get a thread created 565 * on-demand when they need it. 566 */ 567 if (bdi_cap_flush_forker(bdi)) { 568 struct bdi_writeback *wb = &bdi->wb; 569 570 wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s", 571 dev_name(dev)); 572 if (IS_ERR(wb->task)) { 573 wb->task = NULL; 574 ret = -ENOMEM; 575 576 bdi_remove_from_list(bdi); 577 goto exit; 578 } 579 } 580 581 bdi_debug_register(bdi, dev_name(dev)); 582 set_bit(BDI_registered, &bdi->state); 583 exit: 584 return ret; 585 } 586 EXPORT_SYMBOL(bdi_register); 587 588 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev) 589 { 590 return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev)); 591 } 592 EXPORT_SYMBOL(bdi_register_dev); 593 594 /* 595 * Remove bdi from the global list and shutdown any threads we have running 596 */ 597 static void bdi_wb_shutdown(struct backing_dev_info *bdi) 598 { 599 struct bdi_writeback *wb; 600 601 if (!bdi_cap_writeback_dirty(bdi)) 602 return; 603 604 /* 605 * If setup is pending, wait for that to complete first 606 */ 607 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, 608 TASK_UNINTERRUPTIBLE); 609 610 /* 611 * Make sure nobody finds us on the bdi_list anymore 612 */ 613 bdi_remove_from_list(bdi); 614 615 /* 616 * Finally, kill the kernel threads. We don't need to be RCU 617 * safe anymore, since the bdi is gone from visibility. Force 618 * unfreeze of the thread before calling kthread_stop(), otherwise 619 * it would never exet if it is currently stuck in the refrigerator. 620 */ 621 list_for_each_entry(wb, &bdi->wb_list, list) { 622 thaw_process(wb->task); 623 kthread_stop(wb->task); 624 } 625 } 626 627 /* 628 * This bdi is going away now, make sure that no super_blocks point to it 629 */ 630 static void bdi_prune_sb(struct backing_dev_info *bdi) 631 { 632 struct super_block *sb; 633 634 spin_lock(&sb_lock); 635 list_for_each_entry(sb, &super_blocks, s_list) { 636 if (sb->s_bdi == bdi) 637 sb->s_bdi = NULL; 638 } 639 spin_unlock(&sb_lock); 640 } 641 642 void bdi_unregister(struct backing_dev_info *bdi) 643 { 644 if (bdi->dev) { 645 bdi_prune_sb(bdi); 646 647 if (!bdi_cap_flush_forker(bdi)) 648 bdi_wb_shutdown(bdi); 649 bdi_debug_unregister(bdi); 650 device_unregister(bdi->dev); 651 bdi->dev = NULL; 652 } 653 } 654 EXPORT_SYMBOL(bdi_unregister); 655 656 int bdi_init(struct backing_dev_info *bdi) 657 { 658 int i, err; 659 660 bdi->dev = NULL; 661 662 bdi->min_ratio = 0; 663 bdi->max_ratio = 100; 664 bdi->max_prop_frac = PROP_FRAC_BASE; 665 spin_lock_init(&bdi->wb_lock); 666 INIT_RCU_HEAD(&bdi->rcu_head); 667 INIT_LIST_HEAD(&bdi->bdi_list); 668 INIT_LIST_HEAD(&bdi->wb_list); 669 INIT_LIST_HEAD(&bdi->work_list); 670 671 bdi_wb_init(&bdi->wb, bdi); 672 673 /* 674 * Just one thread support for now, hard code mask and count 675 */ 676 bdi->wb_mask = 1; 677 bdi->wb_cnt = 1; 678 679 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { 680 err = percpu_counter_init(&bdi->bdi_stat[i], 0); 681 if (err) 682 goto err; 683 } 684 685 bdi->dirty_exceeded = 0; 686 err = prop_local_init_percpu(&bdi->completions); 687 688 if (err) { 689 err: 690 while (i--) 691 percpu_counter_destroy(&bdi->bdi_stat[i]); 692 } 693 694 return err; 695 } 696 EXPORT_SYMBOL(bdi_init); 697 698 void bdi_destroy(struct backing_dev_info *bdi) 699 { 700 int i; 701 702 /* 703 * Splice our entries to the default_backing_dev_info, if this 704 * bdi disappears 705 */ 706 if (bdi_has_dirty_io(bdi)) { 707 struct bdi_writeback *dst = &default_backing_dev_info.wb; 708 709 spin_lock(&inode_lock); 710 list_splice(&bdi->wb.b_dirty, &dst->b_dirty); 711 list_splice(&bdi->wb.b_io, &dst->b_io); 712 list_splice(&bdi->wb.b_more_io, &dst->b_more_io); 713 spin_unlock(&inode_lock); 714 } 715 716 bdi_unregister(bdi); 717 718 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 719 percpu_counter_destroy(&bdi->bdi_stat[i]); 720 721 prop_local_destroy_percpu(&bdi->completions); 722 } 723 EXPORT_SYMBOL(bdi_destroy); 724 725 /* 726 * For use from filesystems to quickly init and register a bdi associated 727 * with dirty writeback 728 */ 729 int bdi_setup_and_register(struct backing_dev_info *bdi, char *name, 730 unsigned int cap) 731 { 732 char tmp[32]; 733 int err; 734 735 bdi->name = name; 736 bdi->capabilities = cap; 737 err = bdi_init(bdi); 738 if (err) 739 return err; 740 741 sprintf(tmp, "%.28s%s", name, "-%d"); 742 err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq)); 743 if (err) { 744 bdi_destroy(bdi); 745 return err; 746 } 747 748 return 0; 749 } 750 EXPORT_SYMBOL(bdi_setup_and_register); 751 752 static wait_queue_head_t congestion_wqh[2] = { 753 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 754 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 755 }; 756 757 void clear_bdi_congested(struct backing_dev_info *bdi, int sync) 758 { 759 enum bdi_state bit; 760 wait_queue_head_t *wqh = &congestion_wqh[sync]; 761 762 bit = sync ? BDI_sync_congested : BDI_async_congested; 763 clear_bit(bit, &bdi->state); 764 smp_mb__after_clear_bit(); 765 if (waitqueue_active(wqh)) 766 wake_up(wqh); 767 } 768 EXPORT_SYMBOL(clear_bdi_congested); 769 770 void set_bdi_congested(struct backing_dev_info *bdi, int sync) 771 { 772 enum bdi_state bit; 773 774 bit = sync ? BDI_sync_congested : BDI_async_congested; 775 set_bit(bit, &bdi->state); 776 } 777 EXPORT_SYMBOL(set_bdi_congested); 778 779 /** 780 * congestion_wait - wait for a backing_dev to become uncongested 781 * @sync: SYNC or ASYNC IO 782 * @timeout: timeout in jiffies 783 * 784 * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit 785 * write congestion. If no backing_devs are congested then just wait for the 786 * next write to be completed. 787 */ 788 long congestion_wait(int sync, long timeout) 789 { 790 long ret; 791 DEFINE_WAIT(wait); 792 wait_queue_head_t *wqh = &congestion_wqh[sync]; 793 794 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); 795 ret = io_schedule_timeout(timeout); 796 finish_wait(wqh, &wait); 797 return ret; 798 } 799 EXPORT_SYMBOL(congestion_wait); 800 801