1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Primary bucket allocation code
4 *
5 * Copyright 2012 Google, Inc.
6 *
7 * Allocation in bcache is done in terms of buckets:
8 *
9 * Each bucket has associated an 8 bit gen; this gen corresponds to the gen in
10 * btree pointers - they must match for the pointer to be considered valid.
11 *
12 * Thus (assuming a bucket has no dirty data or metadata in it) we can reuse a
13 * bucket simply by incrementing its gen.
14 *
15 * The gens (along with the priorities; it's really the gens are important but
16 * the code is named as if it's the priorities) are written in an arbitrary list
17 * of buckets on disk, with a pointer to them in the journal header.
18 *
19 * When we invalidate a bucket, we have to write its new gen to disk and wait
20 * for that write to complete before we use it - otherwise after a crash we
21 * could have pointers that appeared to be good but pointed to data that had
22 * been overwritten.
23 *
24 * Since the gens and priorities are all stored contiguously on disk, we can
25 * batch this up: We fill up the free_inc list with freshly invalidated buckets,
26 * call prio_write(), and when prio_write() finishes we pull buckets off the
27 * free_inc list and optionally discard them.
28 *
29 * free_inc isn't the only freelist - if it was, we'd often to sleep while
30 * priorities and gens were being written before we could allocate. c->free is a
31 * smaller freelist, and buckets on that list are always ready to be used.
32 *
33 * If we've got discards enabled, that happens when a bucket moves from the
34 * free_inc list to the free list.
35 *
36 * There is another freelist, because sometimes we have buckets that we know
37 * have nothing pointing into them - these we can reuse without waiting for
38 * priorities to be rewritten. These come from freed btree nodes and buckets
39 * that garbage collection discovered no longer had valid keys pointing into
40 * them (because they were overwritten). That's the unused list - buckets on the
41 * unused list move to the free list, optionally being discarded in the process.
42 *
43 * It's also important to ensure that gens don't wrap around - with respect to
44 * either the oldest gen in the btree or the gen on disk. This is quite
45 * difficult to do in practice, but we explicitly guard against it anyways - if
46 * a bucket is in danger of wrapping around we simply skip invalidating it that
47 * time around, and we garbage collect or rewrite the priorities sooner than we
48 * would have otherwise.
49 *
50 * bch_bucket_alloc() allocates a single bucket from a specific cache.
51 *
52 * bch_bucket_alloc_set() allocates one bucket from different caches
53 * out of a cache set.
54 *
55 * free_some_buckets() drives all the processes described above. It's called
56 * from bch_bucket_alloc() and a few other places that need to make sure free
57 * buckets are ready.
58 *
59 * invalidate_buckets_(lru|fifo)() find buckets that are available to be
60 * invalidated, and then invalidate them and stick them on the free_inc list -
61 * in either lru or fifo order.
62 */
63
64 #include "bcache.h"
65 #include "btree.h"
66
67 #include <linux/blkdev.h>
68 #include <linux/kthread.h>
69 #include <linux/random.h>
70 #include <trace/events/bcache.h>
71
72 #define MAX_OPEN_BUCKETS 128
73
74 /* Bucket heap / gen */
75
bch_inc_gen(struct cache * ca,struct bucket * b)76 uint8_t bch_inc_gen(struct cache *ca, struct bucket *b)
77 {
78 uint8_t ret = ++b->gen;
79
80 ca->set->need_gc = max(ca->set->need_gc, bucket_gc_gen(b));
81 WARN_ON_ONCE(ca->set->need_gc > BUCKET_GC_GEN_MAX);
82
83 return ret;
84 }
85
bch_rescale_priorities(struct cache_set * c,int sectors)86 void bch_rescale_priorities(struct cache_set *c, int sectors)
87 {
88 struct cache *ca;
89 struct bucket *b;
90 unsigned long next = c->nbuckets * c->cache->sb.bucket_size / 1024;
91 int r;
92
93 atomic_sub(sectors, &c->rescale);
94
95 do {
96 r = atomic_read(&c->rescale);
97
98 if (r >= 0)
99 return;
100 } while (atomic_cmpxchg(&c->rescale, r, r + next) != r);
101
102 mutex_lock(&c->bucket_lock);
103
104 c->min_prio = USHRT_MAX;
105
106 ca = c->cache;
107 for_each_bucket(b, ca)
108 if (b->prio &&
109 b->prio != BTREE_PRIO &&
110 !atomic_read(&b->pin)) {
111 b->prio--;
112 c->min_prio = min(c->min_prio, b->prio);
113 }
114
115 mutex_unlock(&c->bucket_lock);
116 }
117
118 /*
119 * Background allocation thread: scans for buckets to be invalidated,
120 * invalidates them, rewrites prios/gens (marking them as invalidated on disk),
121 * then optionally issues discard commands to the newly free buckets, then puts
122 * them on the various freelists.
123 */
124
can_inc_bucket_gen(struct bucket * b)125 static inline bool can_inc_bucket_gen(struct bucket *b)
126 {
127 return bucket_gc_gen(b) < BUCKET_GC_GEN_MAX;
128 }
129
bch_can_invalidate_bucket(struct cache * ca,struct bucket * b)130 bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
131 {
132 return (ca->set->gc_mark_valid || b->reclaimable_in_gc) &&
133 ((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
134 !atomic_read(&b->pin) && can_inc_bucket_gen(b));
135 }
136
__bch_invalidate_one_bucket(struct cache * ca,struct bucket * b)137 void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
138 {
139 lockdep_assert_held(&ca->set->bucket_lock);
140 BUG_ON(GC_MARK(b) && GC_MARK(b) != GC_MARK_RECLAIMABLE);
141
142 if (GC_SECTORS_USED(b))
143 trace_bcache_invalidate(ca, b - ca->buckets);
144
145 bch_inc_gen(ca, b);
146 b->prio = INITIAL_PRIO;
147 atomic_inc(&b->pin);
148 b->reclaimable_in_gc = 0;
149 }
150
bch_invalidate_one_bucket(struct cache * ca,struct bucket * b)151 static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
152 {
153 __bch_invalidate_one_bucket(ca, b);
154
155 fifo_push(&ca->free_inc, b - ca->buckets);
156 }
157
158 /*
159 * Determines what order we're going to reuse buckets, smallest bucket_prio()
160 * first: we also take into account the number of sectors of live data in that
161 * bucket, and in order for that multiply to make sense we have to scale bucket
162 *
163 * Thus, we scale the bucket priorities so that the bucket with the smallest
164 * prio is worth 1/8th of what INITIAL_PRIO is worth.
165 */
166
new_bucket_prio(struct cache * ca,struct bucket * b)167 static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b)
168 {
169 unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;
170
171 return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);
172 }
173
new_bucket_max_cmp(const void * l,const void * r,void * args)174 static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args)
175 {
176 struct bucket **lhs = (struct bucket **)l;
177 struct bucket **rhs = (struct bucket **)r;
178 struct cache *ca = args;
179
180 return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs);
181 }
182
new_bucket_min_cmp(const void * l,const void * r,void * args)183 static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args)
184 {
185 struct bucket **lhs = (struct bucket **)l;
186 struct bucket **rhs = (struct bucket **)r;
187 struct cache *ca = args;
188
189 return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
190 }
191
invalidate_buckets_lru(struct cache * ca)192 static void invalidate_buckets_lru(struct cache *ca)
193 {
194 struct bucket *b;
195 const struct min_heap_callbacks bucket_max_cmp_callback = {
196 .less = new_bucket_max_cmp,
197 .swp = NULL,
198 };
199 const struct min_heap_callbacks bucket_min_cmp_callback = {
200 .less = new_bucket_min_cmp,
201 .swp = NULL,
202 };
203
204 ca->heap.nr = 0;
205
206 for_each_bucket(b, ca) {
207 if (!bch_can_invalidate_bucket(ca, b))
208 continue;
209
210 if (!min_heap_full(&ca->heap))
211 min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
212 else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
213 ca->heap.data[0] = b;
214 min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
215 }
216 }
217
218 min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
219
220 while (!fifo_full(&ca->free_inc)) {
221 if (!ca->heap.nr) {
222 /*
223 * We don't want to be calling invalidate_buckets()
224 * multiple times when it can't do anything
225 */
226 ca->invalidate_needs_gc = 1;
227 wake_up_gc(ca->set);
228 return;
229 }
230 b = min_heap_peek(&ca->heap)[0];
231 min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
232
233 bch_invalidate_one_bucket(ca, b);
234 }
235 }
236
invalidate_buckets_fifo(struct cache * ca)237 static void invalidate_buckets_fifo(struct cache *ca)
238 {
239 struct bucket *b;
240 size_t checked = 0;
241
242 while (!fifo_full(&ca->free_inc)) {
243 if (ca->fifo_last_bucket < ca->sb.first_bucket ||
244 ca->fifo_last_bucket >= ca->sb.nbuckets)
245 ca->fifo_last_bucket = ca->sb.first_bucket;
246
247 b = ca->buckets + ca->fifo_last_bucket++;
248
249 if (bch_can_invalidate_bucket(ca, b))
250 bch_invalidate_one_bucket(ca, b);
251
252 if (++checked >= ca->sb.nbuckets) {
253 ca->invalidate_needs_gc = 1;
254 wake_up_gc(ca->set);
255 return;
256 }
257 }
258 }
259
invalidate_buckets_random(struct cache * ca)260 static void invalidate_buckets_random(struct cache *ca)
261 {
262 struct bucket *b;
263 size_t checked = 0;
264
265 while (!fifo_full(&ca->free_inc)) {
266 size_t n;
267
268 get_random_bytes(&n, sizeof(n));
269
270 n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket);
271 n += ca->sb.first_bucket;
272
273 b = ca->buckets + n;
274
275 if (bch_can_invalidate_bucket(ca, b))
276 bch_invalidate_one_bucket(ca, b);
277
278 if (++checked >= ca->sb.nbuckets / 2) {
279 ca->invalidate_needs_gc = 1;
280 wake_up_gc(ca->set);
281 return;
282 }
283 }
284 }
285
invalidate_buckets(struct cache * ca)286 static void invalidate_buckets(struct cache *ca)
287 {
288 BUG_ON(ca->invalidate_needs_gc);
289
290 switch (CACHE_REPLACEMENT(&ca->sb)) {
291 case CACHE_REPLACEMENT_LRU:
292 invalidate_buckets_lru(ca);
293 break;
294 case CACHE_REPLACEMENT_FIFO:
295 invalidate_buckets_fifo(ca);
296 break;
297 case CACHE_REPLACEMENT_RANDOM:
298 invalidate_buckets_random(ca);
299 break;
300 }
301 }
302
303 #define allocator_wait(ca, cond) \
304 do { \
305 while (1) { \
306 set_current_state(TASK_INTERRUPTIBLE); \
307 if (cond) \
308 break; \
309 \
310 mutex_unlock(&(ca)->set->bucket_lock); \
311 if (kthread_should_stop() || \
312 test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
313 set_current_state(TASK_RUNNING); \
314 goto out; \
315 } \
316 \
317 schedule(); \
318 mutex_lock(&(ca)->set->bucket_lock); \
319 } \
320 __set_current_state(TASK_RUNNING); \
321 } while (0)
322
bch_allocator_push(struct cache * ca,long bucket)323 static int bch_allocator_push(struct cache *ca, long bucket)
324 {
325 unsigned int i;
326
327 /* Prios/gens are actually the most important reserve */
328 if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
329 return true;
330
331 for (i = 0; i < RESERVE_NR; i++)
332 if (fifo_push(&ca->free[i], bucket))
333 return true;
334
335 return false;
336 }
337
bch_allocator_thread(void * arg)338 static int bch_allocator_thread(void *arg)
339 {
340 struct cache *ca = arg;
341
342 mutex_lock(&ca->set->bucket_lock);
343
344 while (1) {
345 /*
346 * First, we pull buckets off of the unused and free_inc lists,
347 * possibly issue discards to them, then we add the bucket to
348 * the free list:
349 */
350 while (1) {
351 long bucket;
352
353 if (!fifo_pop(&ca->free_inc, bucket))
354 break;
355
356 if (ca->discard) {
357 mutex_unlock(&ca->set->bucket_lock);
358 blkdev_issue_discard(ca->bdev,
359 bucket_to_sector(ca->set, bucket),
360 ca->sb.bucket_size, GFP_KERNEL);
361 mutex_lock(&ca->set->bucket_lock);
362 }
363
364 allocator_wait(ca, bch_allocator_push(ca, bucket));
365 wake_up(&ca->set->btree_cache_wait);
366 wake_up(&ca->set->bucket_wait);
367 }
368
369 /*
370 * We've run out of free buckets, we need to find some buckets
371 * we can invalidate. First, invalidate them in memory and add
372 * them to the free_inc list:
373 */
374
375 retry_invalidate:
376 allocator_wait(ca, !ca->invalidate_needs_gc);
377 invalidate_buckets(ca);
378
379 /*
380 * Now, we write their new gens to disk so we can start writing
381 * new stuff to them:
382 */
383 allocator_wait(ca, !atomic_read(&ca->set->prio_blocked));
384 if (CACHE_SYNC(&ca->sb)) {
385 /*
386 * This could deadlock if an allocation with a btree
387 * node locked ever blocked - having the btree node
388 * locked would block garbage collection, but here we're
389 * waiting on garbage collection before we invalidate
390 * and free anything.
391 *
392 * But this should be safe since the btree code always
393 * uses btree_check_reserve() before allocating now, and
394 * if it fails it blocks without btree nodes locked.
395 */
396 if (!fifo_full(&ca->free_inc))
397 goto retry_invalidate;
398
399 if (bch_prio_write(ca, false) < 0) {
400 ca->invalidate_needs_gc = 1;
401 wake_up_gc(ca->set);
402 }
403 }
404 }
405 out:
406 wait_for_kthread_stop();
407 return 0;
408 }
409
410 /* Allocation */
411
bch_bucket_alloc(struct cache * ca,unsigned int reserve,bool wait)412 long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
413 {
414 DEFINE_WAIT(w);
415 struct bucket *b;
416 long r;
417
418
419 /* No allocation if CACHE_SET_IO_DISABLE bit is set */
420 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)))
421 return -1;
422
423 /* fastpath */
424 if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
425 fifo_pop(&ca->free[reserve], r))
426 goto out;
427
428 if (!wait) {
429 trace_bcache_alloc_fail(ca, reserve);
430 return -1;
431 }
432
433 do {
434 prepare_to_wait(&ca->set->bucket_wait, &w,
435 TASK_UNINTERRUPTIBLE);
436
437 mutex_unlock(&ca->set->bucket_lock);
438 schedule();
439 mutex_lock(&ca->set->bucket_lock);
440 } while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
441 !fifo_pop(&ca->free[reserve], r));
442
443 finish_wait(&ca->set->bucket_wait, &w);
444 out:
445 if (ca->alloc_thread)
446 wake_up_process(ca->alloc_thread);
447
448 trace_bcache_alloc(ca, reserve);
449
450 if (expensive_debug_checks(ca->set)) {
451 size_t iter;
452 long i;
453 unsigned int j;
454
455 for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
456 BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
457
458 for (j = 0; j < RESERVE_NR; j++)
459 fifo_for_each(i, &ca->free[j], iter)
460 BUG_ON(i == r);
461 fifo_for_each(i, &ca->free_inc, iter)
462 BUG_ON(i == r);
463 }
464
465 b = ca->buckets + r;
466
467 BUG_ON(atomic_read(&b->pin) != 1);
468
469 SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
470
471 if (reserve <= RESERVE_PRIO) {
472 SET_GC_MARK(b, GC_MARK_METADATA);
473 SET_GC_MOVE(b, 0);
474 b->prio = BTREE_PRIO;
475 } else {
476 SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
477 SET_GC_MOVE(b, 0);
478 b->prio = INITIAL_PRIO;
479 }
480
481 if (ca->set->avail_nbuckets > 0) {
482 ca->set->avail_nbuckets--;
483 bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
484 }
485
486 return r;
487 }
488
__bch_bucket_free(struct cache * ca,struct bucket * b)489 void __bch_bucket_free(struct cache *ca, struct bucket *b)
490 {
491 SET_GC_MARK(b, 0);
492 SET_GC_SECTORS_USED(b, 0);
493
494 if (ca->set->avail_nbuckets < ca->set->nbuckets) {
495 ca->set->avail_nbuckets++;
496 bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
497 }
498 }
499
bch_bucket_free(struct cache_set * c,struct bkey * k)500 void bch_bucket_free(struct cache_set *c, struct bkey *k)
501 {
502 unsigned int i;
503
504 for (i = 0; i < KEY_PTRS(k); i++)
505 __bch_bucket_free(c->cache, PTR_BUCKET(c, k, i));
506 }
507
__bch_bucket_alloc_set(struct cache_set * c,unsigned int reserve,struct bkey * k,bool wait)508 int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
509 struct bkey *k, bool wait)
510 {
511 struct cache *ca;
512 long b;
513
514 /* No allocation if CACHE_SET_IO_DISABLE bit is set */
515 if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags)))
516 return -1;
517
518 lockdep_assert_held(&c->bucket_lock);
519
520 bkey_init(k);
521
522 ca = c->cache;
523 b = bch_bucket_alloc(ca, reserve, wait);
524 if (b < 0)
525 return -1;
526
527 k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
528 bucket_to_sector(c, b),
529 ca->sb.nr_this_dev);
530
531 SET_KEY_PTRS(k, 1);
532
533 return 0;
534 }
535
bch_bucket_alloc_set(struct cache_set * c,unsigned int reserve,struct bkey * k,bool wait)536 int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
537 struct bkey *k, bool wait)
538 {
539 int ret;
540
541 mutex_lock(&c->bucket_lock);
542 ret = __bch_bucket_alloc_set(c, reserve, k, wait);
543 mutex_unlock(&c->bucket_lock);
544 return ret;
545 }
546
547 /* Sector allocator */
548
549 struct open_bucket {
550 struct list_head list;
551 unsigned int last_write_point;
552 unsigned int sectors_free;
553 BKEY_PADDED(key);
554 };
555
556 /*
557 * We keep multiple buckets open for writes, and try to segregate different
558 * write streams for better cache utilization: first we try to segregate flash
559 * only volume write streams from cached devices, secondly we look for a bucket
560 * where the last write to it was sequential with the current write, and
561 * failing that we look for a bucket that was last used by the same task.
562 *
563 * The ideas is if you've got multiple tasks pulling data into the cache at the
564 * same time, you'll get better cache utilization if you try to segregate their
565 * data and preserve locality.
566 *
567 * For example, dirty sectors of flash only volume is not reclaimable, if their
568 * dirty sectors mixed with dirty sectors of cached device, such buckets will
569 * be marked as dirty and won't be reclaimed, though the dirty data of cached
570 * device have been written back to backend device.
571 *
572 * And say you've starting Firefox at the same time you're copying a
573 * bunch of files. Firefox will likely end up being fairly hot and stay in the
574 * cache awhile, but the data you copied might not be; if you wrote all that
575 * data to the same buckets it'd get invalidated at the same time.
576 *
577 * Both of those tasks will be doing fairly random IO so we can't rely on
578 * detecting sequential IO to segregate their data, but going off of the task
579 * should be a sane heuristic.
580 */
pick_data_bucket(struct cache_set * c,const struct bkey * search,unsigned int write_point,struct bkey * alloc)581 static struct open_bucket *pick_data_bucket(struct cache_set *c,
582 const struct bkey *search,
583 unsigned int write_point,
584 struct bkey *alloc)
585 {
586 struct open_bucket *ret, *ret_task = NULL;
587
588 list_for_each_entry_reverse(ret, &c->data_buckets, list)
589 if (UUID_FLASH_ONLY(&c->uuids[KEY_INODE(&ret->key)]) !=
590 UUID_FLASH_ONLY(&c->uuids[KEY_INODE(search)]))
591 continue;
592 else if (!bkey_cmp(&ret->key, search))
593 goto found;
594 else if (ret->last_write_point == write_point)
595 ret_task = ret;
596
597 ret = ret_task ?: list_first_entry(&c->data_buckets,
598 struct open_bucket, list);
599 found:
600 if (!ret->sectors_free && KEY_PTRS(alloc)) {
601 ret->sectors_free = c->cache->sb.bucket_size;
602 bkey_copy(&ret->key, alloc);
603 bkey_init(alloc);
604 }
605
606 if (!ret->sectors_free)
607 ret = NULL;
608
609 return ret;
610 }
611
612 /*
613 * Allocates some space in the cache to write to, and k to point to the newly
614 * allocated space, and updates KEY_SIZE(k) and KEY_OFFSET(k) (to point to the
615 * end of the newly allocated space).
616 *
617 * May allocate fewer sectors than @sectors, KEY_SIZE(k) indicates how many
618 * sectors were actually allocated.
619 *
620 * If s->writeback is true, will not fail.
621 */
bch_alloc_sectors(struct cache_set * c,struct bkey * k,unsigned int sectors,unsigned int write_point,unsigned int write_prio,bool wait)622 bool bch_alloc_sectors(struct cache_set *c,
623 struct bkey *k,
624 unsigned int sectors,
625 unsigned int write_point,
626 unsigned int write_prio,
627 bool wait)
628 {
629 struct open_bucket *b;
630 BKEY_PADDED(key) alloc;
631 unsigned int i;
632
633 /*
634 * We might have to allocate a new bucket, which we can't do with a
635 * spinlock held. So if we have to allocate, we drop the lock, allocate
636 * and then retry. KEY_PTRS() indicates whether alloc points to
637 * allocated bucket(s).
638 */
639
640 bkey_init(&alloc.key);
641 spin_lock(&c->data_bucket_lock);
642
643 while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
644 unsigned int watermark = write_prio
645 ? RESERVE_MOVINGGC
646 : RESERVE_NONE;
647
648 spin_unlock(&c->data_bucket_lock);
649
650 if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait))
651 return false;
652
653 spin_lock(&c->data_bucket_lock);
654 }
655
656 /*
657 * If we had to allocate, we might race and not need to allocate the
658 * second time we call pick_data_bucket(). If we allocated a bucket but
659 * didn't use it, drop the refcount bch_bucket_alloc_set() took:
660 */
661 if (KEY_PTRS(&alloc.key))
662 bkey_put(c, &alloc.key);
663
664 for (i = 0; i < KEY_PTRS(&b->key); i++)
665 EBUG_ON(ptr_stale(c, &b->key, i));
666
667 /* Set up the pointer to the space we're allocating: */
668
669 for (i = 0; i < KEY_PTRS(&b->key); i++)
670 k->ptr[i] = b->key.ptr[i];
671
672 sectors = min(sectors, b->sectors_free);
673
674 SET_KEY_OFFSET(k, KEY_OFFSET(k) + sectors);
675 SET_KEY_SIZE(k, sectors);
676 SET_KEY_PTRS(k, KEY_PTRS(&b->key));
677
678 /*
679 * Move b to the end of the lru, and keep track of what this bucket was
680 * last used for:
681 */
682 list_move_tail(&b->list, &c->data_buckets);
683 bkey_copy_key(&b->key, k);
684 b->last_write_point = write_point;
685
686 b->sectors_free -= sectors;
687
688 for (i = 0; i < KEY_PTRS(&b->key); i++) {
689 SET_PTR_OFFSET(&b->key, i, PTR_OFFSET(&b->key, i) + sectors);
690
691 atomic_long_add(sectors,
692 &c->cache->sectors_written);
693 }
694
695 if (b->sectors_free < c->cache->sb.block_size)
696 b->sectors_free = 0;
697
698 /*
699 * k takes refcounts on the buckets it points to until it's inserted
700 * into the btree, but if we're done with this bucket we just transfer
701 * get_data_bucket()'s refcount.
702 */
703 if (b->sectors_free)
704 for (i = 0; i < KEY_PTRS(&b->key); i++)
705 atomic_inc(&PTR_BUCKET(c, &b->key, i)->pin);
706
707 spin_unlock(&c->data_bucket_lock);
708 return true;
709 }
710
711 /* Init */
712
bch_open_buckets_free(struct cache_set * c)713 void bch_open_buckets_free(struct cache_set *c)
714 {
715 struct open_bucket *b;
716
717 while (!list_empty(&c->data_buckets)) {
718 b = list_first_entry(&c->data_buckets,
719 struct open_bucket, list);
720 list_del(&b->list);
721 kfree(b);
722 }
723 }
724
bch_open_buckets_alloc(struct cache_set * c)725 int bch_open_buckets_alloc(struct cache_set *c)
726 {
727 int i;
728
729 spin_lock_init(&c->data_bucket_lock);
730
731 for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
732 struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
733
734 if (!b)
735 return -ENOMEM;
736
737 list_add(&b->list, &c->data_buckets);
738 }
739
740 return 0;
741 }
742
bch_cache_allocator_start(struct cache * ca)743 int bch_cache_allocator_start(struct cache *ca)
744 {
745 struct task_struct *k = kthread_run(bch_allocator_thread,
746 ca, "bcache_allocator");
747 if (IS_ERR(k))
748 return PTR_ERR(k);
749
750 ca->alloc_thread = k;
751 return 0;
752 }
753