btree.c (b0f32a56f27eb0df4124dbfc8eb6f09f423eed99) | btree.c (cb7a583e6a6ace661a5890803e115d2292a293df) |
---|---|
1/* 2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> 3 * 4 * Uses a block device as cache for other block devices; optimized for SSDs. 5 * All allocation is done in buckets, which should match the erase block size 6 * of the device. 7 * 8 * Buckets containing cached data are kept on a heap sorted by priority; --- 326 unchanged lines hidden (view full) --- 335 atomic_dec_bug(w->journal); 336 __closure_wake_up(&b->c->journal.wait); 337 } 338 339 w->prio_blocked = 0; 340 w->journal = NULL; 341} 342 | 1/* 2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> 3 * 4 * Uses a block device as cache for other block devices; optimized for SSDs. 5 * All allocation is done in buckets, which should match the erase block size 6 * of the device. 7 * 8 * Buckets containing cached data are kept on a heap sorted by priority; --- 326 unchanged lines hidden (view full) --- 335 atomic_dec_bug(w->journal); 336 __closure_wake_up(&b->c->journal.wait); 337 } 338 339 w->prio_blocked = 0; 340 w->journal = NULL; 341} 342 |
343static void btree_node_write_unlock(struct closure *cl) 344{ 345 struct btree *b = container_of(cl, struct btree, io); 346 347 up(&b->io_mutex); 348} 349 |
|
343static void __btree_node_write_done(struct closure *cl) 344{ | 350static void __btree_node_write_done(struct closure *cl) 351{ |
345 struct btree *b = container_of(cl, struct btree, io.cl); | 352 struct btree *b = container_of(cl, struct btree, io); |
346 struct btree_write *w = btree_prev_write(b); 347 348 bch_bbio_free(b->bio, b->c); 349 b->bio = NULL; 350 btree_complete_write(b, w); 351 352 if (btree_node_dirty(b)) 353 queue_delayed_work(btree_io_wq, &b->work, 354 msecs_to_jiffies(30000)); 355 | 353 struct btree_write *w = btree_prev_write(b); 354 355 bch_bbio_free(b->bio, b->c); 356 b->bio = NULL; 357 btree_complete_write(b, w); 358 359 if (btree_node_dirty(b)) 360 queue_delayed_work(btree_io_wq, &b->work, 361 msecs_to_jiffies(30000)); 362 |
356 closure_return(cl); | 363 closure_return_with_destructor(cl, btree_node_write_unlock); |
357} 358 359static void btree_node_write_done(struct closure *cl) 360{ | 364} 365 366static void btree_node_write_done(struct closure *cl) 367{ |
361 struct btree *b = container_of(cl, struct btree, io.cl); | 368 struct btree *b = container_of(cl, struct btree, io); |
362 struct bio_vec *bv; 363 int n; 364 365 bio_for_each_segment_all(bv, b->bio, n) 366 __free_page(bv->bv_page); 367 368 __btree_node_write_done(cl); 369} 370 371static void btree_node_write_endio(struct bio *bio, int error) 372{ 373 struct closure *cl = bio->bi_private; | 369 struct bio_vec *bv; 370 int n; 371 372 bio_for_each_segment_all(bv, b->bio, n) 373 __free_page(bv->bv_page); 374 375 __btree_node_write_done(cl); 376} 377 378static void btree_node_write_endio(struct bio *bio, int error) 379{ 380 struct closure *cl = bio->bi_private; |
374 struct btree *b = container_of(cl, struct btree, io.cl); | 381 struct btree *b = container_of(cl, struct btree, io); |
375 376 if (error) 377 set_btree_node_io_error(b); 378 379 bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); 380 closure_put(cl); 381} 382 383static void do_btree_node_write(struct btree *b) 384{ | 382 383 if (error) 384 set_btree_node_io_error(b); 385 386 bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); 387 closure_put(cl); 388} 389 390static void do_btree_node_write(struct btree *b) 391{ |
385 struct closure *cl = &b->io.cl; | 392 struct closure *cl = &b->io; |
386 struct bset *i = b->sets[b->nsets].data; 387 BKEY_PADDED(key) k; 388 389 i->version = BCACHE_BSET_VERSION; 390 i->csum = btree_csum_set(b, i); 391 392 BUG_ON(b->bio); 393 b->bio = bch_bbio_alloc(b->c); --- 36 unchanged lines hidden (view full) --- 430 continue_at(cl, btree_node_write_done, NULL); 431 } else { 432 b->bio->bi_vcnt = 0; 433 bch_bio_map(b->bio, i); 434 435 bch_submit_bbio(b->bio, b->c, &k.key, 0); 436 437 closure_sync(cl); | 393 struct bset *i = b->sets[b->nsets].data; 394 BKEY_PADDED(key) k; 395 396 i->version = BCACHE_BSET_VERSION; 397 i->csum = btree_csum_set(b, i); 398 399 BUG_ON(b->bio); 400 b->bio = bch_bbio_alloc(b->c); --- 36 unchanged lines hidden (view full) --- 437 continue_at(cl, btree_node_write_done, NULL); 438 } else { 439 b->bio->bi_vcnt = 0; 440 bch_bio_map(b->bio, i); 441 442 bch_submit_bbio(b->bio, b->c, &k.key, 0); 443 444 closure_sync(cl); |
438 __btree_node_write_done(cl); | 445 continue_at_nobarrier(cl, __btree_node_write_done, NULL); |
439 } 440} 441 442void bch_btree_node_write(struct btree *b, struct closure *parent) 443{ 444 struct bset *i = b->sets[b->nsets].data; 445 446 trace_bcache_btree_write(b); 447 448 BUG_ON(current->bio_list); 449 BUG_ON(b->written >= btree_blocks(b)); 450 BUG_ON(b->written && !i->keys); 451 BUG_ON(b->sets->data->seq != i->seq); 452 bch_check_keys(b, "writing"); 453 454 cancel_delayed_work(&b->work); 455 456 /* If caller isn't waiting for write, parent refcount is cache set */ | 446 } 447} 448 449void bch_btree_node_write(struct btree *b, struct closure *parent) 450{ 451 struct bset *i = b->sets[b->nsets].data; 452 453 trace_bcache_btree_write(b); 454 455 BUG_ON(current->bio_list); 456 BUG_ON(b->written >= btree_blocks(b)); 457 BUG_ON(b->written && !i->keys); 458 BUG_ON(b->sets->data->seq != i->seq); 459 bch_check_keys(b, "writing"); 460 461 cancel_delayed_work(&b->work); 462 463 /* If caller isn't waiting for write, parent refcount is cache set */ |
457 closure_lock(&b->io, parent ?: &b->c->cl); | 464 down(&b->io_mutex); 465 closure_init(&b->io, parent ?: &b->c->cl); |
458 459 clear_bit(BTREE_NODE_dirty, &b->flags); 460 change_bit(BTREE_NODE_write_idx, &b->flags); 461 462 do_btree_node_write(b); 463 464 b->written += set_blocks(i, b->c); 465 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, --- 83 unchanged lines hidden (view full) --- 549#define mca_reserve(c) (((c->root && c->root->level) \ 550 ? c->root->level : 1) * 8 + 16) 551#define mca_can_free(c) \ 552 max_t(int, 0, c->bucket_cache_used - mca_reserve(c)) 553 554static void mca_data_free(struct btree *b) 555{ 556 struct bset_tree *t = b->sets; | 466 467 clear_bit(BTREE_NODE_dirty, &b->flags); 468 change_bit(BTREE_NODE_write_idx, &b->flags); 469 470 do_btree_node_write(b); 471 472 b->written += set_blocks(i, b->c); 473 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size, --- 83 unchanged lines hidden (view full) --- 557#define mca_reserve(c) (((c->root && c->root->level) \ 558 ? c->root->level : 1) * 8 + 16) 559#define mca_can_free(c) \ 560 max_t(int, 0, c->bucket_cache_used - mca_reserve(c)) 561 562static void mca_data_free(struct btree *b) 563{ 564 struct bset_tree *t = b->sets; |
557 BUG_ON(!closure_is_unlocked(&b->io.cl)); | |
558 | 565 |
566 BUG_ON(b->io_mutex.count != 1); 567 |
|
559 if (bset_prev_bytes(b) < PAGE_SIZE) 560 kfree(t->prev); 561 else 562 free_pages((unsigned long) t->prev, 563 get_order(bset_prev_bytes(b))); 564 565 if (bset_tree_bytes(b) < PAGE_SIZE) 566 kfree(t->tree); --- 63 unchanged lines hidden (view full) --- 630 if (!b) 631 return NULL; 632 633 init_rwsem(&b->lock); 634 lockdep_set_novalidate_class(&b->lock); 635 INIT_LIST_HEAD(&b->list); 636 INIT_DELAYED_WORK(&b->work, btree_node_write_work); 637 b->c = c; | 568 if (bset_prev_bytes(b) < PAGE_SIZE) 569 kfree(t->prev); 570 else 571 free_pages((unsigned long) t->prev, 572 get_order(bset_prev_bytes(b))); 573 574 if (bset_tree_bytes(b) < PAGE_SIZE) 575 kfree(t->tree); --- 63 unchanged lines hidden (view full) --- 639 if (!b) 640 return NULL; 641 642 init_rwsem(&b->lock); 643 lockdep_set_novalidate_class(&b->lock); 644 INIT_LIST_HEAD(&b->list); 645 INIT_DELAYED_WORK(&b->work, btree_node_write_work); 646 b->c = c; |
638 closure_init_unlocked(&b->io); | 647 sema_init(&b->io_mutex, 1); |
639 640 mca_data_alloc(b, k, gfp); 641 return b; 642} 643 644static int mca_reap(struct btree *b, unsigned min_order, bool flush) 645{ 646 struct closure cl; 647 648 closure_init_stack(&cl); 649 lockdep_assert_held(&b->c->bucket_lock); 650 651 if (!down_write_trylock(&b->lock)) 652 return -ENOMEM; 653 654 BUG_ON(btree_node_dirty(b) && !b->sets[0].data); 655 | 648 649 mca_data_alloc(b, k, gfp); 650 return b; 651} 652 653static int mca_reap(struct btree *b, unsigned min_order, bool flush) 654{ 655 struct closure cl; 656 657 closure_init_stack(&cl); 658 lockdep_assert_held(&b->c->bucket_lock); 659 660 if (!down_write_trylock(&b->lock)) 661 return -ENOMEM; 662 663 BUG_ON(btree_node_dirty(b) && !b->sets[0].data); 664 |
656 if (b->page_order < min_order || 657 (!flush && 658 (btree_node_dirty(b) || 659 atomic_read(&b->io.cl.remaining) != -1))) { 660 rw_unlock(true, b); 661 return -ENOMEM; | 665 if (b->page_order < min_order) 666 goto out_unlock; 667 668 if (!flush) { 669 if (btree_node_dirty(b)) 670 goto out_unlock; 671 672 if (down_trylock(&b->io_mutex)) 673 goto out_unlock; 674 up(&b->io_mutex); |
662 } 663 664 if (btree_node_dirty(b)) 665 bch_btree_node_write_sync(b); 666 667 /* wait for any in flight btree write */ | 675 } 676 677 if (btree_node_dirty(b)) 678 bch_btree_node_write_sync(b); 679 680 /* wait for any in flight btree write */ |
668 closure_wait_event(&b->io.wait, &cl, 669 atomic_read(&b->io.cl.remaining) == -1); | 681 down(&b->io_mutex); 682 up(&b->io_mutex); |
670 671 return 0; | 683 684 return 0; |
685out_unlock: 686 rw_unlock(true, b); 687 return -ENOMEM; |
|
672} 673 674static unsigned long bch_mca_scan(struct shrinker *shrink, 675 struct shrink_control *sc) 676{ 677 struct cache_set *c = container_of(shrink, struct cache_set, shrink); 678 struct btree *b, *t; 679 unsigned long i, nr = sc->nr_to_scan; --- 233 unchanged lines hidden (view full) --- 913 b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO); 914 if (!b) 915 goto err; 916 917 BUG_ON(!down_write_trylock(&b->lock)); 918 if (!b->sets->data) 919 goto err; 920out: | 688} 689 690static unsigned long bch_mca_scan(struct shrinker *shrink, 691 struct shrink_control *sc) 692{ 693 struct cache_set *c = container_of(shrink, struct cache_set, shrink); 694 struct btree *b, *t; 695 unsigned long i, nr = sc->nr_to_scan; --- 233 unchanged lines hidden (view full) --- 929 b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO); 930 if (!b) 931 goto err; 932 933 BUG_ON(!down_write_trylock(&b->lock)); 934 if (!b->sets->data) 935 goto err; 936out: |
921 BUG_ON(!closure_is_unlocked(&b->io.cl)); | 937 BUG_ON(b->io_mutex.count != 1); |
922 923 bkey_copy(&b->key, k); 924 list_move(&b->list, &c->btree_cache); 925 hlist_del_init_rcu(&b->hash); 926 hlist_add_head_rcu(&b->hash, mca_hash(c, k)); 927 928 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); 929 b->level = level; --- 1663 unchanged lines hidden --- | 938 939 bkey_copy(&b->key, k); 940 list_move(&b->list, &c->btree_cache); 941 hlist_del_init_rcu(&b->hash); 942 hlist_add_head_rcu(&b->hash, mca_hash(c, k)); 943 944 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_); 945 b->level = level; --- 1663 unchanged lines hidden --- |