btree.c (b0f32a56f27eb0df4124dbfc8eb6f09f423eed99) btree.c (cb7a583e6a6ace661a5890803e115d2292a293df)
1/*
2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
3 *
4 * Uses a block device as cache for other block devices; optimized for SSDs.
5 * All allocation is done in buckets, which should match the erase block size
6 * of the device.
7 *
8 * Buckets containing cached data are kept on a heap sorted by priority;

--- 326 unchanged lines hidden (view full) ---

335 atomic_dec_bug(w->journal);
336 __closure_wake_up(&b->c->journal.wait);
337 }
338
339 w->prio_blocked = 0;
340 w->journal = NULL;
341}
342
1/*
2 * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
3 *
4 * Uses a block device as cache for other block devices; optimized for SSDs.
5 * All allocation is done in buckets, which should match the erase block size
6 * of the device.
7 *
8 * Buckets containing cached data are kept on a heap sorted by priority;

--- 326 unchanged lines hidden (view full) ---

335 atomic_dec_bug(w->journal);
336 __closure_wake_up(&b->c->journal.wait);
337 }
338
339 w->prio_blocked = 0;
340 w->journal = NULL;
341}
342
343static void btree_node_write_unlock(struct closure *cl)
344{
345 struct btree *b = container_of(cl, struct btree, io);
346
347 up(&b->io_mutex);
348}
349
343static void __btree_node_write_done(struct closure *cl)
344{
350static void __btree_node_write_done(struct closure *cl)
351{
345 struct btree *b = container_of(cl, struct btree, io.cl);
352 struct btree *b = container_of(cl, struct btree, io);
346 struct btree_write *w = btree_prev_write(b);
347
348 bch_bbio_free(b->bio, b->c);
349 b->bio = NULL;
350 btree_complete_write(b, w);
351
352 if (btree_node_dirty(b))
353 queue_delayed_work(btree_io_wq, &b->work,
354 msecs_to_jiffies(30000));
355
353 struct btree_write *w = btree_prev_write(b);
354
355 bch_bbio_free(b->bio, b->c);
356 b->bio = NULL;
357 btree_complete_write(b, w);
358
359 if (btree_node_dirty(b))
360 queue_delayed_work(btree_io_wq, &b->work,
361 msecs_to_jiffies(30000));
362
356 closure_return(cl);
363 closure_return_with_destructor(cl, btree_node_write_unlock);
357}
358
359static void btree_node_write_done(struct closure *cl)
360{
364}
365
366static void btree_node_write_done(struct closure *cl)
367{
361 struct btree *b = container_of(cl, struct btree, io.cl);
368 struct btree *b = container_of(cl, struct btree, io);
362 struct bio_vec *bv;
363 int n;
364
365 bio_for_each_segment_all(bv, b->bio, n)
366 __free_page(bv->bv_page);
367
368 __btree_node_write_done(cl);
369}
370
371static void btree_node_write_endio(struct bio *bio, int error)
372{
373 struct closure *cl = bio->bi_private;
369 struct bio_vec *bv;
370 int n;
371
372 bio_for_each_segment_all(bv, b->bio, n)
373 __free_page(bv->bv_page);
374
375 __btree_node_write_done(cl);
376}
377
378static void btree_node_write_endio(struct bio *bio, int error)
379{
380 struct closure *cl = bio->bi_private;
374 struct btree *b = container_of(cl, struct btree, io.cl);
381 struct btree *b = container_of(cl, struct btree, io);
375
376 if (error)
377 set_btree_node_io_error(b);
378
379 bch_bbio_count_io_errors(b->c, bio, error, "writing btree");
380 closure_put(cl);
381}
382
383static void do_btree_node_write(struct btree *b)
384{
382
383 if (error)
384 set_btree_node_io_error(b);
385
386 bch_bbio_count_io_errors(b->c, bio, error, "writing btree");
387 closure_put(cl);
388}
389
390static void do_btree_node_write(struct btree *b)
391{
385 struct closure *cl = &b->io.cl;
392 struct closure *cl = &b->io;
386 struct bset *i = b->sets[b->nsets].data;
387 BKEY_PADDED(key) k;
388
389 i->version = BCACHE_BSET_VERSION;
390 i->csum = btree_csum_set(b, i);
391
392 BUG_ON(b->bio);
393 b->bio = bch_bbio_alloc(b->c);

--- 36 unchanged lines hidden (view full) ---

430 continue_at(cl, btree_node_write_done, NULL);
431 } else {
432 b->bio->bi_vcnt = 0;
433 bch_bio_map(b->bio, i);
434
435 bch_submit_bbio(b->bio, b->c, &k.key, 0);
436
437 closure_sync(cl);
393 struct bset *i = b->sets[b->nsets].data;
394 BKEY_PADDED(key) k;
395
396 i->version = BCACHE_BSET_VERSION;
397 i->csum = btree_csum_set(b, i);
398
399 BUG_ON(b->bio);
400 b->bio = bch_bbio_alloc(b->c);

--- 36 unchanged lines hidden (view full) ---

437 continue_at(cl, btree_node_write_done, NULL);
438 } else {
439 b->bio->bi_vcnt = 0;
440 bch_bio_map(b->bio, i);
441
442 bch_submit_bbio(b->bio, b->c, &k.key, 0);
443
444 closure_sync(cl);
438 __btree_node_write_done(cl);
445 continue_at_nobarrier(cl, __btree_node_write_done, NULL);
439 }
440}
441
442void bch_btree_node_write(struct btree *b, struct closure *parent)
443{
444 struct bset *i = b->sets[b->nsets].data;
445
446 trace_bcache_btree_write(b);
447
448 BUG_ON(current->bio_list);
449 BUG_ON(b->written >= btree_blocks(b));
450 BUG_ON(b->written && !i->keys);
451 BUG_ON(b->sets->data->seq != i->seq);
452 bch_check_keys(b, "writing");
453
454 cancel_delayed_work(&b->work);
455
456 /* If caller isn't waiting for write, parent refcount is cache set */
446 }
447}
448
449void bch_btree_node_write(struct btree *b, struct closure *parent)
450{
451 struct bset *i = b->sets[b->nsets].data;
452
453 trace_bcache_btree_write(b);
454
455 BUG_ON(current->bio_list);
456 BUG_ON(b->written >= btree_blocks(b));
457 BUG_ON(b->written && !i->keys);
458 BUG_ON(b->sets->data->seq != i->seq);
459 bch_check_keys(b, "writing");
460
461 cancel_delayed_work(&b->work);
462
463 /* If caller isn't waiting for write, parent refcount is cache set */
457 closure_lock(&b->io, parent ?: &b->c->cl);
464 down(&b->io_mutex);
465 closure_init(&b->io, parent ?: &b->c->cl);
458
459 clear_bit(BTREE_NODE_dirty, &b->flags);
460 change_bit(BTREE_NODE_write_idx, &b->flags);
461
462 do_btree_node_write(b);
463
464 b->written += set_blocks(i, b->c);
465 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,

--- 83 unchanged lines hidden (view full) ---

549#define mca_reserve(c) (((c->root && c->root->level) \
550 ? c->root->level : 1) * 8 + 16)
551#define mca_can_free(c) \
552 max_t(int, 0, c->bucket_cache_used - mca_reserve(c))
553
554static void mca_data_free(struct btree *b)
555{
556 struct bset_tree *t = b->sets;
466
467 clear_bit(BTREE_NODE_dirty, &b->flags);
468 change_bit(BTREE_NODE_write_idx, &b->flags);
469
470 do_btree_node_write(b);
471
472 b->written += set_blocks(i, b->c);
473 atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,

--- 83 unchanged lines hidden (view full) ---

557#define mca_reserve(c) (((c->root && c->root->level) \
558 ? c->root->level : 1) * 8 + 16)
559#define mca_can_free(c) \
560 max_t(int, 0, c->bucket_cache_used - mca_reserve(c))
561
562static void mca_data_free(struct btree *b)
563{
564 struct bset_tree *t = b->sets;
557 BUG_ON(!closure_is_unlocked(&b->io.cl));
558
565
566 BUG_ON(b->io_mutex.count != 1);
567
559 if (bset_prev_bytes(b) < PAGE_SIZE)
560 kfree(t->prev);
561 else
562 free_pages((unsigned long) t->prev,
563 get_order(bset_prev_bytes(b)));
564
565 if (bset_tree_bytes(b) < PAGE_SIZE)
566 kfree(t->tree);

--- 63 unchanged lines hidden (view full) ---

630 if (!b)
631 return NULL;
632
633 init_rwsem(&b->lock);
634 lockdep_set_novalidate_class(&b->lock);
635 INIT_LIST_HEAD(&b->list);
636 INIT_DELAYED_WORK(&b->work, btree_node_write_work);
637 b->c = c;
568 if (bset_prev_bytes(b) < PAGE_SIZE)
569 kfree(t->prev);
570 else
571 free_pages((unsigned long) t->prev,
572 get_order(bset_prev_bytes(b)));
573
574 if (bset_tree_bytes(b) < PAGE_SIZE)
575 kfree(t->tree);

--- 63 unchanged lines hidden (view full) ---

639 if (!b)
640 return NULL;
641
642 init_rwsem(&b->lock);
643 lockdep_set_novalidate_class(&b->lock);
644 INIT_LIST_HEAD(&b->list);
645 INIT_DELAYED_WORK(&b->work, btree_node_write_work);
646 b->c = c;
638 closure_init_unlocked(&b->io);
647 sema_init(&b->io_mutex, 1);
639
640 mca_data_alloc(b, k, gfp);
641 return b;
642}
643
644static int mca_reap(struct btree *b, unsigned min_order, bool flush)
645{
646 struct closure cl;
647
648 closure_init_stack(&cl);
649 lockdep_assert_held(&b->c->bucket_lock);
650
651 if (!down_write_trylock(&b->lock))
652 return -ENOMEM;
653
654 BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
655
648
649 mca_data_alloc(b, k, gfp);
650 return b;
651}
652
653static int mca_reap(struct btree *b, unsigned min_order, bool flush)
654{
655 struct closure cl;
656
657 closure_init_stack(&cl);
658 lockdep_assert_held(&b->c->bucket_lock);
659
660 if (!down_write_trylock(&b->lock))
661 return -ENOMEM;
662
663 BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
664
656 if (b->page_order < min_order ||
657 (!flush &&
658 (btree_node_dirty(b) ||
659 atomic_read(&b->io.cl.remaining) != -1))) {
660 rw_unlock(true, b);
661 return -ENOMEM;
665 if (b->page_order < min_order)
666 goto out_unlock;
667
668 if (!flush) {
669 if (btree_node_dirty(b))
670 goto out_unlock;
671
672 if (down_trylock(&b->io_mutex))
673 goto out_unlock;
674 up(&b->io_mutex);
662 }
663
664 if (btree_node_dirty(b))
665 bch_btree_node_write_sync(b);
666
667 /* wait for any in flight btree write */
675 }
676
677 if (btree_node_dirty(b))
678 bch_btree_node_write_sync(b);
679
680 /* wait for any in flight btree write */
668 closure_wait_event(&b->io.wait, &cl,
669 atomic_read(&b->io.cl.remaining) == -1);
681 down(&b->io_mutex);
682 up(&b->io_mutex);
670
671 return 0;
683
684 return 0;
685out_unlock:
686 rw_unlock(true, b);
687 return -ENOMEM;
672}
673
674static unsigned long bch_mca_scan(struct shrinker *shrink,
675 struct shrink_control *sc)
676{
677 struct cache_set *c = container_of(shrink, struct cache_set, shrink);
678 struct btree *b, *t;
679 unsigned long i, nr = sc->nr_to_scan;

--- 233 unchanged lines hidden (view full) ---

913 b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO);
914 if (!b)
915 goto err;
916
917 BUG_ON(!down_write_trylock(&b->lock));
918 if (!b->sets->data)
919 goto err;
920out:
688}
689
690static unsigned long bch_mca_scan(struct shrinker *shrink,
691 struct shrink_control *sc)
692{
693 struct cache_set *c = container_of(shrink, struct cache_set, shrink);
694 struct btree *b, *t;
695 unsigned long i, nr = sc->nr_to_scan;

--- 233 unchanged lines hidden (view full) ---

929 b = mca_bucket_alloc(c, k, __GFP_NOWARN|GFP_NOIO);
930 if (!b)
931 goto err;
932
933 BUG_ON(!down_write_trylock(&b->lock));
934 if (!b->sets->data)
935 goto err;
936out:
921 BUG_ON(!closure_is_unlocked(&b->io.cl));
937 BUG_ON(b->io_mutex.count != 1);
922
923 bkey_copy(&b->key, k);
924 list_move(&b->list, &c->btree_cache);
925 hlist_del_init_rcu(&b->hash);
926 hlist_add_head_rcu(&b->hash, mca_hash(c, k));
927
928 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_);
929 b->level = level;

--- 1663 unchanged lines hidden ---
938
939 bkey_copy(&b->key, k);
940 list_move(&b->list, &c->btree_cache);
941 hlist_del_init_rcu(&b->hash);
942 hlist_add_head_rcu(&b->hash, mca_hash(c, k));
943
944 lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_);
945 b->level = level;

--- 1663 unchanged lines hidden ---