Lines Matching +full:- +full:b

1 // SPDX-License-Identifier: GPL-2.0
23 bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \
37 if (!c->btree_roots_known[0].b) in bch2_recalc_btree_reserve()
43 if (r->b) in bch2_recalc_btree_reserve()
44 reserve += min_t(unsigned, 1, r->b->c.level) * 8; in bch2_recalc_btree_reserve()
47 c->btree_cache.nr_reserve = reserve; in bch2_recalc_btree_reserve()
52 struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); in btree_cache_can_free()
54 size_t can_free = list->nr; in btree_cache_can_free()
55 if (!list->idx) in btree_cache_can_free()
56 can_free = max_t(ssize_t, 0, can_free - bc->nr_reserve); in btree_cache_can_free()
60 static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) in btree_node_to_freedlist() argument
62 if (b->c.lock.readers) in btree_node_to_freedlist()
63 list_move(&b->list, &bc->freed_pcpu); in btree_node_to_freedlist()
65 list_move(&b->list, &bc->freed_nonpcpu); in btree_node_to_freedlist()
68 static void btree_node_data_free(struct bch_fs *c, struct btree *b) in btree_node_data_free() argument
70 struct btree_cache *bc = &c->btree_cache; in btree_node_data_free()
72 BUG_ON(btree_node_hashed(b)); in btree_node_data_free()
79 if (b->data) in btree_node_data_free()
80 mm_account_reclaimed_pages(btree_buf_bytes(b) / PAGE_SIZE); in btree_node_data_free()
81 if (b->aux_data) in btree_node_data_free()
82 mm_account_reclaimed_pages(btree_aux_data_bytes(b) / PAGE_SIZE); in btree_node_data_free()
84 EBUG_ON(btree_node_write_in_flight(b)); in btree_node_data_free()
86 clear_btree_node_just_written(b); in btree_node_data_free()
88 kvfree(b->data); in btree_node_data_free()
89 b->data = NULL; in btree_node_data_free()
91 kvfree(b->aux_data); in btree_node_data_free()
93 munmap(b->aux_data, btree_aux_data_bytes(b)); in btree_node_data_free()
95 b->aux_data = NULL; in btree_node_data_free()
97 bc->nr_freeable--; in btree_node_data_free()
99 btree_node_to_freedlist(bc, b); in btree_node_data_free()
105 const struct btree *b = obj; in bch2_btree_cache_cmp_fn() local
106 const u64 *v = arg->key; in bch2_btree_cache_cmp_fn()
108 return b->hash_val == *v ? 0 : 1; in bch2_btree_cache_cmp_fn()
119 static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) in btree_node_data_alloc() argument
121 BUG_ON(b->data || b->aux_data); in btree_node_data_alloc()
125 b->data = kvmalloc(btree_buf_bytes(b), gfp); in btree_node_data_alloc()
126 if (!b->data) in btree_node_data_alloc()
127 return -BCH_ERR_ENOMEM_btree_node_mem_alloc; in btree_node_data_alloc()
129 b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); in btree_node_data_alloc()
131 b->aux_data = mmap(NULL, btree_aux_data_bytes(b), in btree_node_data_alloc()
134 if (b->aux_data == MAP_FAILED) in btree_node_data_alloc()
135 b->aux_data = NULL; in btree_node_data_alloc()
137 if (!b->aux_data) { in btree_node_data_alloc()
138 kvfree(b->data); in btree_node_data_alloc()
139 b->data = NULL; in btree_node_data_alloc()
140 return -BCH_ERR_ENOMEM_btree_node_mem_alloc; in btree_node_data_alloc()
148 struct btree *b; in __btree_node_mem_alloc() local
150 b = kzalloc(sizeof(struct btree), gfp); in __btree_node_mem_alloc()
151 if (!b) in __btree_node_mem_alloc()
154 bkey_btree_ptr_init(&b->key); in __btree_node_mem_alloc()
155 INIT_LIST_HEAD(&b->list); in __btree_node_mem_alloc()
156 INIT_LIST_HEAD(&b->write_blocked); in __btree_node_mem_alloc()
157 b->byte_order = ilog2(c->opts.btree_node_size); in __btree_node_mem_alloc()
158 return b; in __btree_node_mem_alloc()
163 struct btree_cache *bc = &c->btree_cache; in __bch2_btree_node_mem_alloc()
164 struct btree *b; in __bch2_btree_node_mem_alloc() local
166 b = __btree_node_mem_alloc(c, GFP_KERNEL); in __bch2_btree_node_mem_alloc()
167 if (!b) in __bch2_btree_node_mem_alloc()
170 if (btree_node_data_alloc(c, b, GFP_KERNEL)) { in __bch2_btree_node_mem_alloc()
171 kfree(b); in __bch2_btree_node_mem_alloc()
175 bch2_btree_lock_init(&b->c, 0); in __bch2_btree_node_mem_alloc()
177 bc->nr_freeable++; in __bch2_btree_node_mem_alloc()
178 list_add(&b->list, &bc->freeable); in __bch2_btree_node_mem_alloc()
179 return b; in __bch2_btree_node_mem_alloc()
182 void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) in bch2_btree_node_to_freelist() argument
184 mutex_lock(&c->btree_cache.lock); in bch2_btree_node_to_freelist()
185 list_move(&b->list, &c->btree_cache.freeable); in bch2_btree_node_to_freelist()
186 mutex_unlock(&c->btree_cache.lock); in bch2_btree_node_to_freelist()
188 six_unlock_write(&b->c.lock); in bch2_btree_node_to_freelist()
189 six_unlock_intent(&b->c.lock); in bch2_btree_node_to_freelist()
192 static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b) in __btree_node_pinned() argument
194 struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p); in __btree_node_pinned()
196 u64 mask = bc->pinned_nodes_mask[!!b->c.level]; in __btree_node_pinned()
198 return ((mask & BIT_ULL(b->c.btree_id)) && in __btree_node_pinned()
199 bbpos_cmp(bc->pinned_nodes_start, pos) < 0 && in __btree_node_pinned()
200 bbpos_cmp(bc->pinned_nodes_end, pos) >= 0); in __btree_node_pinned()
203 void bch2_node_pin(struct bch_fs *c, struct btree *b) in bch2_node_pin() argument
205 struct btree_cache *bc = &c->btree_cache; in bch2_node_pin()
207 mutex_lock(&bc->lock); in bch2_node_pin()
208 BUG_ON(!__btree_node_pinned(bc, b)); in bch2_node_pin()
209 if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { in bch2_node_pin()
210 set_btree_node_pinned(b); in bch2_node_pin()
211 list_move(&b->list, &bc->live[1].list); in bch2_node_pin()
212 bc->live[0].nr--; in bch2_node_pin()
213 bc->live[1].nr++; in bch2_node_pin()
215 mutex_unlock(&bc->lock); in bch2_node_pin()
220 struct btree_cache *bc = &c->btree_cache; in bch2_btree_cache_unpin()
221 struct btree *b, *n; in bch2_btree_cache_unpin() local
223 mutex_lock(&bc->lock); in bch2_btree_cache_unpin()
224 c->btree_cache.pinned_nodes_mask[0] = 0; in bch2_btree_cache_unpin()
225 c->btree_cache.pinned_nodes_mask[1] = 0; in bch2_btree_cache_unpin()
227 list_for_each_entry_safe(b, n, &bc->live[1].list, list) { in bch2_btree_cache_unpin()
228 clear_btree_node_pinned(b); in bch2_btree_cache_unpin()
229 list_move(&b->list, &bc->live[0].list); in bch2_btree_cache_unpin()
230 bc->live[0].nr++; in bch2_btree_cache_unpin()
231 bc->live[1].nr--; in bch2_btree_cache_unpin()
234 mutex_unlock(&bc->lock); in bch2_btree_cache_unpin()
237 /* Btree in memory cache - hash table */
239 void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) in bch2_btree_node_hash_remove() argument
241 lockdep_assert_held(&bc->lock); in bch2_btree_node_hash_remove()
242 int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); in bch2_btree_node_hash_remove()
247 b->hash_val = 0; in bch2_btree_node_hash_remove()
249 if (b->c.btree_id < BTREE_ID_NR) in bch2_btree_node_hash_remove()
250 --bc->nr_by_btree[b->c.btree_id]; in bch2_btree_node_hash_remove()
252 bc->live[btree_node_pinned(b)].nr--; in bch2_btree_node_hash_remove()
253 bc->nr_freeable++; in bch2_btree_node_hash_remove()
254 list_move(&b->list, &bc->freeable); in bch2_btree_node_hash_remove()
257 int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) in __bch2_btree_node_hash_insert() argument
259 BUG_ON(b->hash_val); in __bch2_btree_node_hash_insert()
260 b->hash_val = btree_ptr_hash_val(&b->key); in __bch2_btree_node_hash_insert()
262 int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, in __bch2_btree_node_hash_insert()
267 if (b->c.btree_id < BTREE_ID_NR) in __bch2_btree_node_hash_insert()
268 bc->nr_by_btree[b->c.btree_id]++; in __bch2_btree_node_hash_insert()
270 bool p = __btree_node_pinned(bc, b); in __bch2_btree_node_hash_insert()
271 mod_bit(BTREE_NODE_pinned, &b->flags, p); in __bch2_btree_node_hash_insert()
273 list_move_tail(&b->list, &bc->live[p].list); in __bch2_btree_node_hash_insert()
274 bc->live[p].nr++; in __bch2_btree_node_hash_insert()
276 bc->nr_freeable--; in __bch2_btree_node_hash_insert()
280 int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, in bch2_btree_node_hash_insert() argument
283 b->c.level = level; in bch2_btree_node_hash_insert()
284 b->c.btree_id = id; in bch2_btree_node_hash_insert()
286 mutex_lock(&bc->lock); in bch2_btree_node_hash_insert()
287 int ret = __bch2_btree_node_hash_insert(bc, b); in bch2_btree_node_hash_insert()
288 mutex_unlock(&bc->lock); in bch2_btree_node_hash_insert()
297 struct bch_fs *c = trans->c; in bch2_btree_node_update_key_early()
298 struct btree *b; in bch2_btree_node_update_key_early() local
305 b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); in bch2_btree_node_update_key_early()
306 if (!IS_ERR_OR_NULL(b)) { in bch2_btree_node_update_key_early()
307 mutex_lock(&c->btree_cache.lock); in bch2_btree_node_update_key_early()
309 bch2_btree_node_hash_remove(&c->btree_cache, b); in bch2_btree_node_update_key_early()
311 bkey_copy(&b->key, new); in bch2_btree_node_update_key_early()
312 ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); in bch2_btree_node_update_key_early()
315 mutex_unlock(&c->btree_cache.lock); in bch2_btree_node_update_key_early()
316 six_unlock_read(&b->c.lock); in bch2_btree_node_update_key_early()
328 return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); in btree_cache_find()
335 static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counte… in __btree_node_reclaim() argument
337 struct btree_cache *bc = &c->btree_cache; in __btree_node_reclaim()
340 lockdep_assert_held(&bc->lock); in __btree_node_reclaim()
342 if (b->flags & ((1U << BTREE_NODE_dirty)| in __btree_node_reclaim()
346 if (btree_node_dirty(b)) in __btree_node_reclaim()
348 else if (btree_node_read_in_flight(b)) in __btree_node_reclaim()
350 else if (btree_node_write_in_flight(b)) in __btree_node_reclaim()
352 return -BCH_ERR_ENOMEM_btree_node_reclaim; in __btree_node_reclaim()
356 bch2_btree_node_wait_on_read(b); in __btree_node_reclaim()
357 bch2_btree_node_wait_on_write(b); in __btree_node_reclaim()
360 if (!six_trylock_intent(&b->c.lock)) { in __btree_node_reclaim()
362 return -BCH_ERR_ENOMEM_btree_node_reclaim; in __btree_node_reclaim()
365 if (!six_trylock_write(&b->c.lock)) { in __btree_node_reclaim()
371 if (b->flags & ((1U << BTREE_NODE_read_in_flight)| in __btree_node_reclaim()
374 if (btree_node_read_in_flight(b)) in __btree_node_reclaim()
376 else if (btree_node_write_in_flight(b)) in __btree_node_reclaim()
380 six_unlock_write(&b->c.lock); in __btree_node_reclaim()
381 six_unlock_intent(&b->c.lock); in __btree_node_reclaim()
385 if (btree_node_noevict(b)) { in __btree_node_reclaim()
389 if (btree_node_write_blocked(b)) { in __btree_node_reclaim()
393 if (btree_node_will_make_reachable(b)) { in __btree_node_reclaim()
398 if (btree_node_dirty(b)) { in __btree_node_reclaim()
406 * - unless btree verify mode is enabled, since it runs out of in __btree_node_reclaim()
410 bch2_btree_node_write(c, b, SIX_LOCK_intent, in __btree_node_reclaim()
413 __bch2_btree_node_write(c, b, in __btree_node_reclaim()
416 six_unlock_write(&b->c.lock); in __btree_node_reclaim()
417 six_unlock_intent(&b->c.lock); in __btree_node_reclaim()
421 if (b->hash_val && !ret) in __btree_node_reclaim()
422 trace_and_count(c, btree_cache_reap, c, b); in __btree_node_reclaim()
425 six_unlock_write(&b->c.lock); in __btree_node_reclaim()
427 six_unlock_intent(&b->c.lock); in __btree_node_reclaim()
428 ret = -BCH_ERR_ENOMEM_btree_node_reclaim; in __btree_node_reclaim()
432 static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter) in btree_node_reclaim() argument
434 return __btree_node_reclaim(c, b, false, shrinker_counter); in btree_node_reclaim()
437 static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) in btree_node_write_and_reclaim() argument
439 return __btree_node_reclaim(c, b, true, false); in btree_node_write_and_reclaim()
445 struct btree_cache_list *list = shrink->private_data; in bch2_btree_cache_scan()
446 struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); in bch2_btree_cache_scan()
448 struct btree *b, *t; in bch2_btree_cache_scan() local
449 unsigned long nr = sc->nr_to_scan; in bch2_btree_cache_scan()
455 bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4; in bch2_btree_cache_scan()
460 mutex_lock(&bc->lock); in bch2_btree_cache_scan()
464 * It's _really_ critical that we don't free too many btree nodes - we in bch2_btree_cache_scan()
474 list_for_each_entry_safe(b, t, &bc->freeable, list) { in bch2_btree_cache_scan()
487 if (!btree_node_reclaim(c, b, true)) { in bch2_btree_cache_scan()
488 btree_node_data_free(c, b); in bch2_btree_cache_scan()
489 six_unlock_write(&b->c.lock); in bch2_btree_cache_scan()
490 six_unlock_intent(&b->c.lock); in bch2_btree_cache_scan()
492 bc->nr_freed++; in bch2_btree_cache_scan()
496 list_for_each_entry_safe(b, t, &list->list, list) { in bch2_btree_cache_scan()
499 if (btree_node_accessed(b)) { in bch2_btree_cache_scan()
500 clear_btree_node_accessed(b); in bch2_btree_cache_scan()
501 bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; in bch2_btree_cache_scan()
502 --touched;; in bch2_btree_cache_scan()
503 } else if (!btree_node_reclaim(c, b, true)) { in bch2_btree_cache_scan()
504 bch2_btree_node_hash_remove(bc, b); in bch2_btree_cache_scan()
507 btree_node_data_free(c, b); in bch2_btree_cache_scan()
508 bc->nr_freed++; in bch2_btree_cache_scan()
510 six_unlock_write(&b->c.lock); in bch2_btree_cache_scan()
511 six_unlock_intent(&b->c.lock); in bch2_btree_cache_scan()
516 btree_node_dirty(b) && in bch2_btree_cache_scan()
517 !btree_node_will_make_reachable(b) && in bch2_btree_cache_scan()
518 !btree_node_write_blocked(b) && in bch2_btree_cache_scan()
519 six_trylock_read(&b->c.lock)) { in bch2_btree_cache_scan()
520 list_move(&list->list, &b->list); in bch2_btree_cache_scan()
521 mutex_unlock(&bc->lock); in bch2_btree_cache_scan()
522 __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); in bch2_btree_cache_scan()
523 six_unlock_read(&b->c.lock); in bch2_btree_cache_scan()
526 mutex_lock(&bc->lock); in bch2_btree_cache_scan()
534 if (&t->list != &list->list) in bch2_btree_cache_scan()
535 list_move_tail(&list->list, &t->list); in bch2_btree_cache_scan()
537 mutex_unlock(&bc->lock); in bch2_btree_cache_scan()
541 trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); in bch2_btree_cache_scan()
548 struct btree_cache_list *list = shrink->private_data; in bch2_btree_cache_count()
558 struct btree_cache *bc = &c->btree_cache; in bch2_fs_btree_cache_exit()
559 struct btree *b, *t; in bch2_fs_btree_cache_exit() local
562 shrinker_free(bc->live[1].shrink); in bch2_fs_btree_cache_exit()
563 shrinker_free(bc->live[0].shrink); in bch2_fs_btree_cache_exit()
567 mutex_lock(&bc->lock); in bch2_fs_btree_cache_exit()
569 if (c->verify_data) in bch2_fs_btree_cache_exit()
570 list_move(&c->verify_data->list, &bc->live[0].list); in bch2_fs_btree_cache_exit()
572 kvfree(c->verify_ondisk); in bch2_fs_btree_cache_exit()
577 if (r->b) in bch2_fs_btree_cache_exit()
578 list_add(&r->b->list, &bc->live[0].list); in bch2_fs_btree_cache_exit()
581 list_for_each_entry_safe(b, t, &bc->live[1].list, list) in bch2_fs_btree_cache_exit()
582 bch2_btree_node_hash_remove(bc, b); in bch2_fs_btree_cache_exit()
583 list_for_each_entry_safe(b, t, &bc->live[0].list, list) in bch2_fs_btree_cache_exit()
584 bch2_btree_node_hash_remove(bc, b); in bch2_fs_btree_cache_exit()
586 list_for_each_entry_safe(b, t, &bc->freeable, list) { in bch2_fs_btree_cache_exit()
587 BUG_ON(btree_node_read_in_flight(b) || in bch2_fs_btree_cache_exit()
588 btree_node_write_in_flight(b)); in bch2_fs_btree_cache_exit()
590 btree_node_data_free(c, b); in bch2_fs_btree_cache_exit()
593 BUG_ON(!bch2_journal_error(&c->journal) && in bch2_fs_btree_cache_exit()
594 atomic_long_read(&c->btree_cache.nr_dirty)); in bch2_fs_btree_cache_exit()
596 list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); in bch2_fs_btree_cache_exit()
598 list_for_each_entry_safe(b, t, &bc->freed_nonpcpu, list) { in bch2_fs_btree_cache_exit()
599 list_del(&b->list); in bch2_fs_btree_cache_exit()
600 six_lock_exit(&b->c.lock); in bch2_fs_btree_cache_exit()
601 kfree(b); in bch2_fs_btree_cache_exit()
604 mutex_unlock(&bc->lock); in bch2_fs_btree_cache_exit()
607 for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) in bch2_fs_btree_cache_exit()
608 BUG_ON(bc->nr_by_btree[i]); in bch2_fs_btree_cache_exit()
609 BUG_ON(bc->live[0].nr); in bch2_fs_btree_cache_exit()
610 BUG_ON(bc->live[1].nr); in bch2_fs_btree_cache_exit()
611 BUG_ON(bc->nr_freeable); in bch2_fs_btree_cache_exit()
613 if (bc->table_init_done) in bch2_fs_btree_cache_exit()
614 rhashtable_destroy(&bc->table); in bch2_fs_btree_cache_exit()
619 struct btree_cache *bc = &c->btree_cache; in bch2_fs_btree_cache_init()
624 ret = rhashtable_init(&bc->table, &bch_btree_cache_params); in bch2_fs_btree_cache_init()
628 bc->table_init_done = true; in bch2_fs_btree_cache_init()
632 for (i = 0; i < bc->nr_reserve; i++) in bch2_fs_btree_cache_init()
636 list_splice_init(&bc->live[0].list, &bc->freeable); in bch2_fs_btree_cache_init()
638 mutex_init(&c->verify_lock); in bch2_fs_btree_cache_init()
640 shrink = shrinker_alloc(0, "%s-btree_cache", c->name); in bch2_fs_btree_cache_init()
643 bc->live[0].shrink = shrink; in bch2_fs_btree_cache_init()
644 shrink->count_objects = bch2_btree_cache_count; in bch2_fs_btree_cache_init()
645 shrink->scan_objects = bch2_btree_cache_scan; in bch2_fs_btree_cache_init()
646 shrink->seeks = 2; in bch2_fs_btree_cache_init()
647 shrink->private_data = &bc->live[0]; in bch2_fs_btree_cache_init()
650 shrink = shrinker_alloc(0, "%s-btree_cache-pinned", c->name); in bch2_fs_btree_cache_init()
653 bc->live[1].shrink = shrink; in bch2_fs_btree_cache_init()
654 shrink->count_objects = bch2_btree_cache_count; in bch2_fs_btree_cache_init()
655 shrink->scan_objects = bch2_btree_cache_scan; in bch2_fs_btree_cache_init()
656 shrink->seeks = 8; in bch2_fs_btree_cache_init()
657 shrink->private_data = &bc->live[1]; in bch2_fs_btree_cache_init()
662 return -BCH_ERR_ENOMEM_fs_btree_cache_init; in bch2_fs_btree_cache_init()
667 mutex_init(&bc->lock); in bch2_fs_btree_cache_init_early()
668 for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) { in bch2_fs_btree_cache_init_early()
669 bc->live[i].idx = i; in bch2_fs_btree_cache_init_early()
670 INIT_LIST_HEAD(&bc->live[i].list); in bch2_fs_btree_cache_init_early()
672 INIT_LIST_HEAD(&bc->freeable); in bch2_fs_btree_cache_init_early()
673 INIT_LIST_HEAD(&bc->freed_pcpu); in bch2_fs_btree_cache_init_early()
674 INIT_LIST_HEAD(&bc->freed_nonpcpu); in bch2_fs_btree_cache_init_early()
685 struct bch_fs *c = trans->c; in bch2_btree_cache_cannibalize_unlock()
686 struct btree_cache *bc = &c->btree_cache; in bch2_btree_cache_cannibalize_unlock()
688 if (bc->alloc_lock == current) { in bch2_btree_cache_cannibalize_unlock()
690 bc->alloc_lock = NULL; in bch2_btree_cache_cannibalize_unlock()
691 closure_wake_up(&bc->alloc_wait); in bch2_btree_cache_cannibalize_unlock()
697 struct bch_fs *c = trans->c; in bch2_btree_cache_cannibalize_lock()
698 struct btree_cache *bc = &c->btree_cache; in bch2_btree_cache_cannibalize_lock()
702 if (try_cmpxchg(&bc->alloc_lock, &old, current) || old == current) in bch2_btree_cache_cannibalize_lock()
707 return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; in bch2_btree_cache_cannibalize_lock()
710 closure_wait(&bc->alloc_wait, cl); in bch2_btree_cache_cannibalize_lock()
714 if (try_cmpxchg(&bc->alloc_lock, &old, current) || old == current) { in bch2_btree_cache_cannibalize_lock()
716 closure_wake_up(&bc->alloc_wait); in bch2_btree_cache_cannibalize_lock()
721 return -BCH_ERR_btree_cache_cannibalize_lock_blocked; in bch2_btree_cache_cannibalize_lock()
730 struct btree_cache *bc = &c->btree_cache; in btree_node_cannibalize()
731 struct btree *b; in btree_node_cannibalize() local
733 for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) in btree_node_cannibalize()
734 list_for_each_entry_reverse(b, &bc->live[i].list, list) in btree_node_cannibalize()
735 if (!btree_node_reclaim(c, b, false)) in btree_node_cannibalize()
736 return b; in btree_node_cannibalize()
739 for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) in btree_node_cannibalize()
740 list_for_each_entry_reverse(b, &bc->live[i].list, list) in btree_node_cannibalize()
741 if (!btree_node_write_and_reclaim(c, b)) in btree_node_cannibalize()
742 return b; in btree_node_cannibalize()
745 * Rare case: all nodes were intent-locked. in btree_node_cannibalize()
746 * Just busy-wait. in btree_node_cannibalize()
755 struct bch_fs *c = trans->c; in bch2_btree_node_mem_alloc()
756 struct btree_cache *bc = &c->btree_cache; in bch2_btree_node_mem_alloc()
758 ? &bc->freed_pcpu in bch2_btree_node_mem_alloc()
759 : &bc->freed_nonpcpu; in bch2_btree_node_mem_alloc()
760 struct btree *b, *b2; in bch2_btree_node_mem_alloc() local
763 mutex_lock(&bc->lock); in bch2_btree_node_mem_alloc()
769 list_for_each_entry(b, freed, list) in bch2_btree_node_mem_alloc()
770 if (!btree_node_reclaim(c, b, false)) { in bch2_btree_node_mem_alloc()
771 list_del_init(&b->list); in bch2_btree_node_mem_alloc()
775 b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); in bch2_btree_node_mem_alloc()
776 if (!b) { in bch2_btree_node_mem_alloc()
777 mutex_unlock(&bc->lock); in bch2_btree_node_mem_alloc()
779 b = __btree_node_mem_alloc(c, GFP_KERNEL); in bch2_btree_node_mem_alloc()
780 if (!b) in bch2_btree_node_mem_alloc()
782 mutex_lock(&bc->lock); in bch2_btree_node_mem_alloc()
785 bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); in bch2_btree_node_mem_alloc()
787 BUG_ON(!six_trylock_intent(&b->c.lock)); in bch2_btree_node_mem_alloc()
788 BUG_ON(!six_trylock_write(&b->c.lock)); in bch2_btree_node_mem_alloc()
795 list_for_each_entry(b2, &bc->freeable, list) in bch2_btree_node_mem_alloc()
797 swap(b->data, b2->data); in bch2_btree_node_mem_alloc()
798 swap(b->aux_data, b2->aux_data); in bch2_btree_node_mem_alloc()
800 six_unlock_write(&b2->c.lock); in bch2_btree_node_mem_alloc()
801 six_unlock_intent(&b2->c.lock); in bch2_btree_node_mem_alloc()
805 mutex_unlock(&bc->lock); in bch2_btree_node_mem_alloc()
807 if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { in bch2_btree_node_mem_alloc()
809 if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) in bch2_btree_node_mem_alloc()
813 mutex_lock(&bc->lock); in bch2_btree_node_mem_alloc()
814 bc->nr_freeable++; in bch2_btree_node_mem_alloc()
816 mutex_unlock(&bc->lock); in bch2_btree_node_mem_alloc()
818 BUG_ON(btree_node_hashed(b)); in bch2_btree_node_mem_alloc()
819 BUG_ON(btree_node_dirty(b)); in bch2_btree_node_mem_alloc()
820 BUG_ON(btree_node_write_in_flight(b)); in bch2_btree_node_mem_alloc()
822 b->flags = 0; in bch2_btree_node_mem_alloc()
823 b->written = 0; in bch2_btree_node_mem_alloc()
824 b->nsets = 0; in bch2_btree_node_mem_alloc()
825 b->sib_u64s[0] = 0; in bch2_btree_node_mem_alloc()
826 b->sib_u64s[1] = 0; in bch2_btree_node_mem_alloc()
827 b->whiteout_u64s = 0; in bch2_btree_node_mem_alloc()
828 bch2_btree_keys_init(b); in bch2_btree_node_mem_alloc()
829 set_btree_node_accessed(b); in bch2_btree_node_mem_alloc()
831 bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], in bch2_btree_node_mem_alloc()
836 bch2_btree_node_to_freelist(c, b); in bch2_btree_node_mem_alloc()
840 return b; in bch2_btree_node_mem_alloc()
842 mutex_lock(&bc->lock); in bch2_btree_node_mem_alloc()
845 if (bc->alloc_lock == current) { in bch2_btree_node_mem_alloc()
850 if (b) { in bch2_btree_node_mem_alloc()
851 swap(b->data, b2->data); in bch2_btree_node_mem_alloc()
852 swap(b->aux_data, b2->aux_data); in bch2_btree_node_mem_alloc()
854 six_unlock_write(&b2->c.lock); in bch2_btree_node_mem_alloc()
855 six_unlock_intent(&b2->c.lock); in bch2_btree_node_mem_alloc()
857 b = b2; in bch2_btree_node_mem_alloc()
858 list_del_init(&b->list); in bch2_btree_node_mem_alloc()
861 mutex_unlock(&bc->lock); in bch2_btree_node_mem_alloc()
867 mutex_unlock(&bc->lock); in bch2_btree_node_mem_alloc()
868 return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc); in bch2_btree_node_mem_alloc()
880 struct bch_fs *c = trans->c; in bch2_btree_node_fill()
881 struct btree_cache *bc = &c->btree_cache; in bch2_btree_node_fill()
882 struct btree *b; in bch2_btree_node_fill() local
890 if (unlikely(!bkey_is_btree_ptr(&k->k))) { in bch2_btree_node_fill()
894 …int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); in bch2_btree_node_fill()
899 if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { in bch2_btree_node_fill()
917 b = bch2_btree_node_mem_alloc(trans, level != 0); in bch2_btree_node_fill()
919 if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) { in bch2_btree_node_fill()
921 return b; in bch2_btree_node_fill()
923 trans->memory_allocation_failure = true; in bch2_btree_node_fill()
928 if (IS_ERR(b)) in bch2_btree_node_fill()
929 return b; in bch2_btree_node_fill()
931 bkey_copy(&b->key, k); in bch2_btree_node_fill()
932 if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { in bch2_btree_node_fill()
936 b->hash_val = 0; in bch2_btree_node_fill()
938 mutex_lock(&bc->lock); in bch2_btree_node_fill()
939 list_add(&b->list, &bc->freeable); in bch2_btree_node_fill()
940 mutex_unlock(&bc->lock); in bch2_btree_node_fill()
942 six_unlock_write(&b->c.lock); in bch2_btree_node_fill()
943 six_unlock_intent(&b->c.lock); in bch2_btree_node_fill()
947 set_btree_node_read_in_flight(b); in bch2_btree_node_fill()
948 six_unlock_write(&b->c.lock); in bch2_btree_node_fill()
951 u32 seq = six_lock_seq(&b->c.lock); in bch2_btree_node_fill()
954 six_unlock_intent(&b->c.lock); in bch2_btree_node_fill()
957 bch2_btree_node_read(trans, b, sync); in bch2_btree_node_fill()
966 if (!six_relock_type(&b->c.lock, lock_type, seq)) in bch2_btree_node_fill()
967 b = NULL; in bch2_btree_node_fill()
969 bch2_btree_node_read(trans, b, sync); in bch2_btree_node_fill()
971 six_lock_downgrade(&b->c.lock); in bch2_btree_node_fill()
974 return b; in bch2_btree_node_fill()
977 static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) in btree_bad_header() argument
981 if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) in btree_bad_header()
988 bch2_btree_id_str(b->c.btree_id), b->c.level); in btree_bad_header()
989 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); in btree_bad_header()
993 bch2_btree_id_str(BTREE_NODE_ID(b->data)), in btree_bad_header()
994 BTREE_NODE_LEVEL(b->data)); in btree_bad_header()
995 bch2_bpos_to_text(&buf, b->data->min_key); in btree_bad_header()
998 bch2_bpos_to_text(&buf, b->data->max_key); in btree_bad_header()
1005 static inline void btree_check_header(struct bch_fs *c, struct btree *b) in btree_check_header() argument
1007 if (b->c.btree_id != BTREE_NODE_ID(b->data) || in btree_check_header()
1008 b->c.level != BTREE_NODE_LEVEL(b->data) || in btree_check_header()
1009 !bpos_eq(b->data->max_key, b->key.k.p) || in btree_check_header()
1010 (b->key.k.type == KEY_TYPE_btree_ptr_v2 && in btree_check_header()
1011 !bpos_eq(b->data->min_key, in btree_check_header()
1012 bkey_i_to_btree_ptr_v2(&b->key)->v.min_key))) in btree_check_header()
1013 btree_bad_header(c, b); in btree_check_header()
1021 struct bch_fs *c = trans->c; in __bch2_btree_node_get()
1022 struct btree_cache *bc = &c->btree_cache; in __bch2_btree_node_get()
1023 struct btree *b; in __bch2_btree_node_get() local
1029 b = btree_cache_find(bc, k); in __bch2_btree_node_get()
1030 if (unlikely(!b)) { in __bch2_btree_node_get()
1036 b = bch2_btree_node_fill(trans, path, k, path->btree_id, in __bch2_btree_node_get()
1041 if (!b) in __bch2_btree_node_get()
1044 if (IS_ERR(b)) in __bch2_btree_node_get()
1045 return b; in __bch2_btree_node_get()
1050 ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); in __bch2_btree_node_get()
1056 if (unlikely(b->hash_val != btree_ptr_hash_val(k) || in __bch2_btree_node_get()
1057 b->c.level != level || in __bch2_btree_node_get()
1059 six_unlock_type(&b->c.lock, lock_type); in __bch2_btree_node_get()
1068 if (!btree_node_accessed(b)) in __bch2_btree_node_get()
1069 set_btree_node_accessed(b); in __bch2_btree_node_get()
1072 if (unlikely(btree_node_read_in_flight(b))) { in __bch2_btree_node_get()
1073 u32 seq = six_lock_seq(&b->c.lock); in __bch2_btree_node_get()
1075 six_unlock_type(&b->c.lock, lock_type); in __bch2_btree_node_get()
1079 bch2_btree_node_wait_on_read(b); in __bch2_btree_node_get()
1089 if (!six_relock_type(&b->c.lock, lock_type, seq)) in __bch2_btree_node_get()
1097 six_unlock_type(&b->c.lock, lock_type); in __bch2_btree_node_get()
1102 prefetch(b->aux_data); in __bch2_btree_node_get()
1104 for_each_bset(b, t) { in __bch2_btree_node_get()
1105 void *p = (u64 *) b->aux_data + t->aux_data_offset; in __bch2_btree_node_get()
1112 if (unlikely(btree_node_read_error(b))) { in __bch2_btree_node_get()
1113 six_unlock_type(&b->c.lock, lock_type); in __bch2_btree_node_get()
1114 return ERR_PTR(-BCH_ERR_btree_node_read_error); in __bch2_btree_node_get()
1117 EBUG_ON(b->c.btree_id != path->btree_id); in __bch2_btree_node_get()
1118 EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); in __bch2_btree_node_get()
1119 btree_check_header(c, b); in __bch2_btree_node_get()
1121 return b; in __bch2_btree_node_get()
1125 * bch2_btree_node_get - find a btree node in the cache and lock it, reading it
1145 struct bch_fs *c = trans->c; in bch2_btree_node_get()
1146 struct btree *b; in bch2_btree_node_get() local
1151 b = btree_node_mem_ptr(k); in bch2_btree_node_get()
1154 * Check b->hash_val _before_ calling btree_node_lock() - this might not in bch2_btree_node_get()
1158 if (unlikely(!c->opts.btree_node_mem_ptr_optimization || in bch2_btree_node_get()
1159 !b || in bch2_btree_node_get()
1160 b->hash_val != btree_ptr_hash_val(k))) in bch2_btree_node_get()
1166 ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); in bch2_btree_node_get()
1172 if (unlikely(b->hash_val != btree_ptr_hash_val(k) || in bch2_btree_node_get()
1173 b->c.level != level || in bch2_btree_node_get()
1175 six_unlock_type(&b->c.lock, lock_type); in bch2_btree_node_get()
1183 if (unlikely(btree_node_read_in_flight(b))) { in bch2_btree_node_get()
1184 six_unlock_type(&b->c.lock, lock_type); in bch2_btree_node_get()
1188 prefetch(b->aux_data); in bch2_btree_node_get()
1190 for_each_bset(b, t) { in bch2_btree_node_get()
1191 void *p = (u64 *) b->aux_data + t->aux_data_offset; in bch2_btree_node_get()
1199 if (!btree_node_accessed(b)) in bch2_btree_node_get()
1200 set_btree_node_accessed(b); in bch2_btree_node_get()
1202 if (unlikely(btree_node_read_error(b))) { in bch2_btree_node_get()
1203 six_unlock_type(&b->c.lock, lock_type); in bch2_btree_node_get()
1204 return ERR_PTR(-BCH_ERR_btree_node_read_error); in bch2_btree_node_get()
1207 EBUG_ON(b->c.btree_id != path->btree_id); in bch2_btree_node_get()
1208 EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); in bch2_btree_node_get()
1209 btree_check_header(c, b); in bch2_btree_node_get()
1211 return b; in bch2_btree_node_get()
1220 struct bch_fs *c = trans->c; in bch2_btree_node_get_noiter()
1221 struct btree_cache *bc = &c->btree_cache; in bch2_btree_node_get_noiter()
1222 struct btree *b; in bch2_btree_node_get_noiter() local
1227 if (c->opts.btree_node_mem_ptr_optimization) { in bch2_btree_node_get_noiter()
1228 b = btree_node_mem_ptr(k); in bch2_btree_node_get_noiter()
1229 if (b) in bch2_btree_node_get_noiter()
1233 b = btree_cache_find(bc, k); in bch2_btree_node_get_noiter()
1234 if (unlikely(!b)) { in bch2_btree_node_get_noiter()
1238 b = bch2_btree_node_fill(trans, NULL, k, btree_id, in bch2_btree_node_get_noiter()
1242 if (!b) in bch2_btree_node_get_noiter()
1245 if (IS_ERR(b) && in bch2_btree_node_get_noiter()
1249 if (IS_ERR(b)) in bch2_btree_node_get_noiter()
1253 ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_); in bch2_btree_node_get_noiter()
1259 if (unlikely(b->hash_val != btree_ptr_hash_val(k) || in bch2_btree_node_get_noiter()
1260 b->c.btree_id != btree_id || in bch2_btree_node_get_noiter()
1261 b->c.level != level)) { in bch2_btree_node_get_noiter()
1262 six_unlock_read(&b->c.lock); in bch2_btree_node_get_noiter()
1268 __bch2_btree_node_wait_on_read(b); in bch2_btree_node_get_noiter()
1270 prefetch(b->aux_data); in bch2_btree_node_get_noiter()
1272 for_each_bset(b, t) { in bch2_btree_node_get_noiter()
1273 void *p = (u64 *) b->aux_data + t->aux_data_offset; in bch2_btree_node_get_noiter()
1281 if (!btree_node_accessed(b)) in bch2_btree_node_get_noiter()
1282 set_btree_node_accessed(b); in bch2_btree_node_get_noiter()
1284 if (unlikely(btree_node_read_error(b))) { in bch2_btree_node_get_noiter()
1285 six_unlock_read(&b->c.lock); in bch2_btree_node_get_noiter()
1286 b = ERR_PTR(-BCH_ERR_btree_node_read_error); in bch2_btree_node_get_noiter()
1290 EBUG_ON(b->c.btree_id != btree_id); in bch2_btree_node_get_noiter()
1291 EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); in bch2_btree_node_get_noiter()
1292 btree_check_header(c, b); in bch2_btree_node_get_noiter()
1295 return b; in bch2_btree_node_get_noiter()
1303 struct bch_fs *c = trans->c; in bch2_btree_node_prefetch()
1304 struct btree_cache *bc = &c->btree_cache; in bch2_btree_node_prefetch()
1309 struct btree *b = btree_cache_find(bc, k); in bch2_btree_node_prefetch() local
1310 if (b) in bch2_btree_node_prefetch()
1313 b = bch2_btree_node_fill(trans, path, k, btree_id, in bch2_btree_node_prefetch()
1315 if (!IS_ERR_OR_NULL(b)) in bch2_btree_node_prefetch()
1316 six_unlock_read(&b->c.lock); in bch2_btree_node_prefetch()
1317 return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b); in bch2_btree_node_prefetch()
1322 struct bch_fs *c = trans->c; in bch2_btree_node_evict()
1323 struct btree_cache *bc = &c->btree_cache; in bch2_btree_node_evict()
1324 struct btree *b; in bch2_btree_node_evict() local
1326 b = btree_cache_find(bc, k); in bch2_btree_node_evict()
1327 if (!b) in bch2_btree_node_evict()
1330 BUG_ON(b == btree_node_root(trans->c, b)); in bch2_btree_node_evict()
1337 __bch2_btree_node_wait_on_read(b); in bch2_btree_node_evict()
1338 __bch2_btree_node_wait_on_write(b); in bch2_btree_node_evict()
1340 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); in bch2_btree_node_evict()
1341 btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); in bch2_btree_node_evict()
1342 if (unlikely(b->hash_val != btree_ptr_hash_val(k))) in bch2_btree_node_evict()
1345 if (btree_node_dirty(b)) { in bch2_btree_node_evict()
1346 __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); in bch2_btree_node_evict()
1347 six_unlock_write(&b->c.lock); in bch2_btree_node_evict()
1348 six_unlock_intent(&b->c.lock); in bch2_btree_node_evict()
1352 BUG_ON(btree_node_dirty(b)); in bch2_btree_node_evict()
1354 mutex_lock(&bc->lock); in bch2_btree_node_evict()
1355 bch2_btree_node_hash_remove(bc, b); in bch2_btree_node_evict()
1356 btree_node_data_free(c, b); in bch2_btree_node_evict()
1357 mutex_unlock(&bc->lock); in bch2_btree_node_evict()
1359 six_unlock_write(&b->c.lock); in bch2_btree_node_evict()
1360 six_unlock_intent(&b->c.lock); in bch2_btree_node_evict()
1376 void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) in bch2_btree_pos_to_text() argument
1379 bch2_btree_id_str(b->c.btree_id), in bch2_btree_pos_to_text()
1380 b->c.level, in bch2_btree_pos_to_text()
1381 bch2_btree_id_root(c, b->c.btree_id)->level); in bch2_btree_pos_to_text()
1382 bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); in bch2_btree_pos_to_text()
1385 void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) in bch2_btree_node_to_text() argument
1391 bch2_btree_keys_stats(b, &stats); in bch2_btree_node_to_text()
1393 prt_printf(out, "l %u ", b->c.level); in bch2_btree_node_to_text()
1394 bch2_bpos_to_text(out, b->data->min_key); in bch2_btree_node_to_text()
1395 prt_printf(out, " - "); in bch2_btree_node_to_text()
1396 bch2_bpos_to_text(out, b->data->max_key); in bch2_btree_node_to_text()
1399 bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); in bch2_btree_node_to_text()
1404 bch2_bkey_format_to_text(out, &b->format); in bch2_btree_node_to_text()
1414 b->unpack_fn_len, in bch2_btree_node_to_text()
1415 b->nr.live_u64s * sizeof(u64), in bch2_btree_node_to_text()
1416 btree_buf_bytes(b) - sizeof(struct btree_node), in bch2_btree_node_to_text()
1417 b->nr.live_u64s * 100 / btree_max_u64s(c), in bch2_btree_node_to_text()
1418 b->sib_u64s[0], in bch2_btree_node_to_text()
1419 b->sib_u64s[1], in bch2_btree_node_to_text()
1420 c->btree_foreground_merge_threshold, in bch2_btree_node_to_text()
1421 b->nr.packed_keys, in bch2_btree_node_to_text()
1422 b->nr.unpacked_keys, in bch2_btree_node_to_text()
1431 prt_human_readable_u64(out, nr * c->opts.btree_node_size); in prt_btree_cache_line()
1446 if (!out->nr_tabstops) in bch2_btree_cache_to_text()
1449 prt_btree_cache_line(out, c, "live:", bc->live[0].nr); in bch2_btree_cache_to_text()
1450 prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); in bch2_btree_cache_to_text()
1451 prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable); in bch2_btree_cache_to_text()
1452 prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty)); in bch2_btree_cache_to_text()
1453 prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); in bch2_btree_cache_to_text()
1456 for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) in bch2_btree_cache_to_text()
1457 prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); in bch2_btree_cache_to_text()
1460 prt_printf(out, "freed:\t%zu\n", bc->nr_freed); in bch2_btree_cache_to_text()
1463 for (unsigned i = 0; i < ARRAY_SIZE(bc->not_freed); i++) in bch2_btree_cache_to_text()
1465 bch2_btree_cache_not_freed_reasons_strs[i], bc->not_freed[i]); in bch2_btree_cache_to_text()