1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "bset.h" 6 #include "btree_cache.h" 7 #include "btree_journal_iter.h" 8 #include "journal_io.h" 9 10 #include <linux/sort.h> 11 12 /* 13 * For managing keys we read from the journal: until journal replay works normal 14 * btree lookups need to be able to find and return keys from the journal where 15 * they overwrite what's in the btree, so we have a special iterator and 16 * operations for the regular btree iter code to use: 17 */ 18 19 static inline size_t pos_to_idx(struct journal_keys *keys, size_t pos) 20 { 21 size_t gap_size = keys->size - keys->nr; 22 23 BUG_ON(pos >= keys->gap && pos < keys->gap + gap_size); 24 25 if (pos >= keys->gap) 26 pos -= gap_size; 27 return pos; 28 } 29 30 static inline size_t idx_to_pos(struct journal_keys *keys, size_t idx) 31 { 32 size_t gap_size = keys->size - keys->nr; 33 34 if (idx >= keys->gap) 35 idx += gap_size; 36 return idx; 37 } 38 39 static inline struct journal_key *idx_to_key(struct journal_keys *keys, size_t idx) 40 { 41 return keys->data + idx_to_pos(keys, idx); 42 } 43 44 static size_t __bch2_journal_key_search(struct journal_keys *keys, 45 enum btree_id id, unsigned level, 46 struct bpos pos) 47 { 48 size_t l = 0, r = keys->nr, m; 49 50 while (l < r) { 51 m = l + ((r - l) >> 1); 52 if (__journal_key_cmp(id, level, pos, idx_to_key(keys, m)) > 0) 53 l = m + 1; 54 else 55 r = m; 56 } 57 58 BUG_ON(l < keys->nr && 59 __journal_key_cmp(id, level, pos, idx_to_key(keys, l)) > 0); 60 61 BUG_ON(l && 62 __journal_key_cmp(id, level, pos, idx_to_key(keys, l - 1)) <= 0); 63 64 return l; 65 } 66 67 static size_t bch2_journal_key_search(struct journal_keys *keys, 68 enum btree_id id, unsigned level, 69 struct bpos pos) 70 { 71 return idx_to_pos(keys, __bch2_journal_key_search(keys, id, level, pos)); 72 } 73 74 /* Returns first non-overwritten key >= search key: */ 75 struct bkey_i *bch2_journal_keys_peek_max(struct bch_fs *c, enum btree_id btree_id, 76 unsigned level, struct bpos pos, 77 struct bpos end_pos, size_t *idx) 78 { 79 struct journal_keys *keys = &c->journal_keys; 80 unsigned iters = 0; 81 struct journal_key *k; 82 83 BUG_ON(*idx > keys->nr); 84 search: 85 if (!*idx) 86 *idx = __bch2_journal_key_search(keys, btree_id, level, pos); 87 88 while (*idx && 89 __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { 90 --(*idx); 91 iters++; 92 if (iters == 10) { 93 *idx = 0; 94 goto search; 95 } 96 } 97 98 struct bkey_i *ret = NULL; 99 rcu_read_lock(); /* for overwritten_ranges */ 100 101 while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { 102 if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) 103 break; 104 105 if (k->overwritten) { 106 if (k->overwritten_range) 107 *idx = rcu_dereference(k->overwritten_range)->end; 108 else 109 *idx += 1; 110 continue; 111 } 112 113 if (__journal_key_cmp(btree_id, level, pos, k) <= 0) { 114 ret = k->k; 115 break; 116 } 117 118 (*idx)++; 119 iters++; 120 if (iters == 10) { 121 *idx = 0; 122 rcu_read_unlock(); 123 goto search; 124 } 125 } 126 127 rcu_read_unlock(); 128 return ret; 129 } 130 131 struct bkey_i *bch2_journal_keys_peek_prev_min(struct bch_fs *c, enum btree_id btree_id, 132 unsigned level, struct bpos pos, 133 struct bpos end_pos, size_t *idx) 134 { 135 struct journal_keys *keys = &c->journal_keys; 136 unsigned iters = 0; 137 struct journal_key *k; 138 139 BUG_ON(*idx > keys->nr); 140 search: 141 if (!*idx) 142 *idx = __bch2_journal_key_search(keys, btree_id, level, pos); 143 144 while (*idx && 145 __journal_key_cmp(btree_id, level, end_pos, idx_to_key(keys, *idx - 1)) <= 0) { 146 (*idx)++; 147 iters++; 148 if (iters == 10) { 149 *idx = 0; 150 goto search; 151 } 152 } 153 154 struct bkey_i *ret = NULL; 155 rcu_read_lock(); /* for overwritten_ranges */ 156 157 while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { 158 if (__journal_key_cmp(btree_id, level, end_pos, k) > 0) 159 break; 160 161 if (k->overwritten) { 162 if (k->overwritten_range) 163 *idx = rcu_dereference(k->overwritten_range)->start - 1; 164 else 165 *idx -= 1; 166 continue; 167 } 168 169 if (__journal_key_cmp(btree_id, level, pos, k) >= 0) { 170 ret = k->k; 171 break; 172 } 173 174 --(*idx); 175 iters++; 176 if (iters == 10) { 177 *idx = 0; 178 goto search; 179 } 180 } 181 182 rcu_read_unlock(); 183 return ret; 184 } 185 186 struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree_id, 187 unsigned level, struct bpos pos) 188 { 189 size_t idx = 0; 190 191 return bch2_journal_keys_peek_max(c, btree_id, level, pos, pos, &idx); 192 } 193 194 static void journal_iter_verify(struct journal_iter *iter) 195 { 196 #ifdef CONFIG_BCACHEFS_DEBUG 197 struct journal_keys *keys = iter->keys; 198 size_t gap_size = keys->size - keys->nr; 199 200 BUG_ON(iter->idx >= keys->gap && 201 iter->idx < keys->gap + gap_size); 202 203 if (iter->idx < keys->size) { 204 struct journal_key *k = keys->data + iter->idx; 205 206 int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); 207 BUG_ON(cmp > 0); 208 } 209 #endif 210 } 211 212 static void journal_iters_fix(struct bch_fs *c) 213 { 214 struct journal_keys *keys = &c->journal_keys; 215 /* The key we just inserted is immediately before the gap: */ 216 size_t gap_end = keys->gap + (keys->size - keys->nr); 217 struct journal_key *new_key = &keys->data[keys->gap - 1]; 218 struct journal_iter *iter; 219 220 /* 221 * If an iterator points one after the key we just inserted, decrement 222 * the iterator so it points at the key we just inserted - if the 223 * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will 224 * handle that: 225 */ 226 list_for_each_entry(iter, &c->journal_iters, list) { 227 journal_iter_verify(iter); 228 if (iter->idx == gap_end && 229 new_key->btree_id == iter->btree_id && 230 new_key->level == iter->level) 231 iter->idx = keys->gap - 1; 232 journal_iter_verify(iter); 233 } 234 } 235 236 static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap) 237 { 238 struct journal_keys *keys = &c->journal_keys; 239 struct journal_iter *iter; 240 size_t gap_size = keys->size - keys->nr; 241 242 list_for_each_entry(iter, &c->journal_iters, list) { 243 if (iter->idx > old_gap) 244 iter->idx -= gap_size; 245 if (iter->idx >= new_gap) 246 iter->idx += gap_size; 247 } 248 } 249 250 int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id, 251 unsigned level, struct bkey_i *k) 252 { 253 struct journal_key n = { 254 .btree_id = id, 255 .level = level, 256 .k = k, 257 .allocated = true, 258 /* 259 * Ensure these keys are done last by journal replay, to unblock 260 * journal reclaim: 261 */ 262 .journal_seq = U64_MAX, 263 }; 264 struct journal_keys *keys = &c->journal_keys; 265 size_t idx = bch2_journal_key_search(keys, id, level, k->k.p); 266 267 BUG_ON(test_bit(BCH_FS_rw, &c->flags)); 268 269 if (idx < keys->size && 270 journal_key_cmp(&n, &keys->data[idx]) == 0) { 271 if (keys->data[idx].allocated) 272 kfree(keys->data[idx].k); 273 keys->data[idx] = n; 274 return 0; 275 } 276 277 if (idx > keys->gap) 278 idx -= keys->size - keys->nr; 279 280 size_t old_gap = keys->gap; 281 282 if (keys->nr == keys->size) { 283 journal_iters_move_gap(c, old_gap, keys->size); 284 old_gap = keys->size; 285 286 struct journal_keys new_keys = { 287 .nr = keys->nr, 288 .size = max_t(size_t, keys->size, 8) * 2, 289 }; 290 291 new_keys.data = kvmalloc_array(new_keys.size, sizeof(new_keys.data[0]), GFP_KERNEL); 292 if (!new_keys.data) { 293 bch_err(c, "%s: error allocating new key array (size %zu)", 294 __func__, new_keys.size); 295 return -BCH_ERR_ENOMEM_journal_key_insert; 296 } 297 298 /* Since @keys was full, there was no gap: */ 299 memcpy(new_keys.data, keys->data, sizeof(keys->data[0]) * keys->nr); 300 kvfree(keys->data); 301 keys->data = new_keys.data; 302 keys->nr = new_keys.nr; 303 keys->size = new_keys.size; 304 305 /* And now the gap is at the end: */ 306 keys->gap = keys->nr; 307 } 308 309 journal_iters_move_gap(c, old_gap, idx); 310 311 move_gap(keys, idx); 312 313 keys->nr++; 314 keys->data[keys->gap++] = n; 315 316 journal_iters_fix(c); 317 318 return 0; 319 } 320 321 /* 322 * Can only be used from the recovery thread while we're still RO - can't be 323 * used once we've got RW, as journal_keys is at that point used by multiple 324 * threads: 325 */ 326 int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id, 327 unsigned level, struct bkey_i *k) 328 { 329 struct bkey_i *n; 330 int ret; 331 332 n = kmalloc(bkey_bytes(&k->k), GFP_KERNEL); 333 if (!n) 334 return -BCH_ERR_ENOMEM_journal_key_insert; 335 336 bkey_copy(n, k); 337 ret = bch2_journal_key_insert_take(c, id, level, n); 338 if (ret) 339 kfree(n); 340 return ret; 341 } 342 343 int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id, 344 unsigned level, struct bpos pos) 345 { 346 struct bkey_i whiteout; 347 348 bkey_init(&whiteout.k); 349 whiteout.k.p = pos; 350 351 return bch2_journal_key_insert(c, id, level, &whiteout); 352 } 353 354 bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree, 355 unsigned level, struct bpos pos) 356 { 357 struct journal_keys *keys = &trans->c->journal_keys; 358 size_t idx = bch2_journal_key_search(keys, btree, level, pos); 359 360 if (!trans->journal_replay_not_finished) 361 return false; 362 363 return (idx < keys->size && 364 keys->data[idx].btree_id == btree && 365 keys->data[idx].level == level && 366 bpos_eq(keys->data[idx].k->k.p, pos) && 367 bkey_deleted(&keys->data[idx].k->k)); 368 } 369 370 static void __bch2_journal_key_overwritten(struct journal_keys *keys, size_t pos) 371 { 372 struct journal_key *k = keys->data + pos; 373 size_t idx = pos_to_idx(keys, pos); 374 375 k->overwritten = true; 376 377 struct journal_key *prev = idx > 0 ? keys->data + idx_to_pos(keys, idx - 1) : NULL; 378 struct journal_key *next = idx + 1 < keys->nr ? keys->data + idx_to_pos(keys, idx + 1) : NULL; 379 380 bool prev_overwritten = prev && prev->overwritten; 381 bool next_overwritten = next && next->overwritten; 382 383 struct journal_key_range_overwritten *prev_range = 384 prev_overwritten ? prev->overwritten_range : NULL; 385 struct journal_key_range_overwritten *next_range = 386 next_overwritten ? next->overwritten_range : NULL; 387 388 BUG_ON(prev_range && prev_range->end != idx); 389 BUG_ON(next_range && next_range->start != idx + 1); 390 391 if (prev_range && next_range) { 392 prev_range->end = next_range->end; 393 394 keys->data[pos].overwritten_range = prev_range; 395 for (size_t i = next_range->start; i < next_range->end; i++) { 396 struct journal_key *ip = keys->data + idx_to_pos(keys, i); 397 BUG_ON(ip->overwritten_range != next_range); 398 ip->overwritten_range = prev_range; 399 } 400 401 kfree_rcu_mightsleep(next_range); 402 } else if (prev_range) { 403 prev_range->end++; 404 k->overwritten_range = prev_range; 405 if (next_overwritten) { 406 prev_range->end++; 407 next->overwritten_range = prev_range; 408 } 409 } else if (next_range) { 410 next_range->start--; 411 k->overwritten_range = next_range; 412 if (prev_overwritten) { 413 next_range->start--; 414 prev->overwritten_range = next_range; 415 } 416 } else if (prev_overwritten || next_overwritten) { 417 struct journal_key_range_overwritten *r = kmalloc(sizeof(*r), GFP_KERNEL); 418 if (!r) 419 return; 420 421 r->start = idx - (size_t) prev_overwritten; 422 r->end = idx + 1 + (size_t) next_overwritten; 423 424 rcu_assign_pointer(k->overwritten_range, r); 425 if (prev_overwritten) 426 prev->overwritten_range = r; 427 if (next_overwritten) 428 next->overwritten_range = r; 429 } 430 } 431 432 void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree, 433 unsigned level, struct bpos pos) 434 { 435 struct journal_keys *keys = &c->journal_keys; 436 size_t idx = bch2_journal_key_search(keys, btree, level, pos); 437 438 if (idx < keys->size && 439 keys->data[idx].btree_id == btree && 440 keys->data[idx].level == level && 441 bpos_eq(keys->data[idx].k->k.p, pos) && 442 !keys->data[idx].overwritten) { 443 mutex_lock(&keys->overwrite_lock); 444 __bch2_journal_key_overwritten(keys, idx); 445 mutex_unlock(&keys->overwrite_lock); 446 } 447 } 448 449 static void bch2_journal_iter_advance(struct journal_iter *iter) 450 { 451 if (iter->idx < iter->keys->size) { 452 iter->idx++; 453 if (iter->idx == iter->keys->gap) 454 iter->idx += iter->keys->size - iter->keys->nr; 455 } 456 } 457 458 static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter) 459 { 460 struct bkey_s_c ret = bkey_s_c_null; 461 462 journal_iter_verify(iter); 463 464 rcu_read_lock(); 465 while (iter->idx < iter->keys->size) { 466 struct journal_key *k = iter->keys->data + iter->idx; 467 468 int cmp = __journal_key_btree_cmp(iter->btree_id, iter->level, k); 469 if (cmp < 0) 470 break; 471 BUG_ON(cmp); 472 473 if (!k->overwritten) { 474 ret = bkey_i_to_s_c(k->k); 475 break; 476 } 477 478 if (k->overwritten_range) 479 iter->idx = idx_to_pos(iter->keys, rcu_dereference(k->overwritten_range)->end); 480 else 481 bch2_journal_iter_advance(iter); 482 } 483 rcu_read_unlock(); 484 485 return ret; 486 } 487 488 static void bch2_journal_iter_exit(struct journal_iter *iter) 489 { 490 list_del(&iter->list); 491 } 492 493 static void bch2_journal_iter_init(struct bch_fs *c, 494 struct journal_iter *iter, 495 enum btree_id id, unsigned level, 496 struct bpos pos) 497 { 498 iter->btree_id = id; 499 iter->level = level; 500 iter->keys = &c->journal_keys; 501 iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos); 502 503 journal_iter_verify(iter); 504 } 505 506 static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter) 507 { 508 return bch2_btree_node_iter_peek_unpack(&iter->node_iter, 509 iter->b, &iter->unpacked); 510 } 511 512 static void bch2_journal_iter_advance_btree(struct btree_and_journal_iter *iter) 513 { 514 bch2_btree_node_iter_advance(&iter->node_iter, iter->b); 515 } 516 517 void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *iter) 518 { 519 if (bpos_eq(iter->pos, SPOS_MAX)) 520 iter->at_end = true; 521 else 522 iter->pos = bpos_successor(iter->pos); 523 } 524 525 static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter) 526 { 527 struct btree_and_journal_iter iter = *_iter; 528 struct bch_fs *c = iter.trans->c; 529 unsigned level = iter.journal.level; 530 struct bkey_buf tmp; 531 unsigned nr = test_bit(BCH_FS_started, &c->flags) 532 ? (level > 1 ? 0 : 2) 533 : (level > 1 ? 1 : 16); 534 535 iter.prefetch = false; 536 iter.fail_if_too_many_whiteouts = true; 537 bch2_bkey_buf_init(&tmp); 538 539 while (nr--) { 540 bch2_btree_and_journal_iter_advance(&iter); 541 struct bkey_s_c k = bch2_btree_and_journal_iter_peek(&iter); 542 if (!k.k) 543 break; 544 545 bch2_bkey_buf_reassemble(&tmp, c, k); 546 bch2_btree_node_prefetch(iter.trans, NULL, tmp.k, iter.journal.btree_id, level - 1); 547 } 548 549 bch2_bkey_buf_exit(&tmp, c); 550 } 551 552 struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter) 553 { 554 struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret; 555 size_t iters = 0; 556 557 if (iter->prefetch && iter->journal.level) 558 btree_and_journal_iter_prefetch(iter); 559 again: 560 if (iter->at_end) 561 return bkey_s_c_null; 562 563 iters++; 564 565 if (iters > 20 && iter->fail_if_too_many_whiteouts) 566 return bkey_s_c_null; 567 568 while ((btree_k = bch2_journal_iter_peek_btree(iter)).k && 569 bpos_lt(btree_k.k->p, iter->pos)) 570 bch2_journal_iter_advance_btree(iter); 571 572 if (iter->trans->journal_replay_not_finished) 573 while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k && 574 bpos_lt(journal_k.k->p, iter->pos)) 575 bch2_journal_iter_advance(&iter->journal); 576 577 ret = journal_k.k && 578 (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p)) 579 ? journal_k 580 : btree_k; 581 582 if (ret.k && iter->b && bpos_gt(ret.k->p, iter->b->data->max_key)) 583 ret = bkey_s_c_null; 584 585 if (ret.k) { 586 iter->pos = ret.k->p; 587 if (bkey_deleted(ret.k)) { 588 bch2_btree_and_journal_iter_advance(iter); 589 goto again; 590 } 591 } else { 592 iter->pos = SPOS_MAX; 593 iter->at_end = true; 594 } 595 596 return ret; 597 } 598 599 void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter) 600 { 601 bch2_journal_iter_exit(&iter->journal); 602 } 603 604 void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, 605 struct btree_and_journal_iter *iter, 606 struct btree *b, 607 struct btree_node_iter node_iter, 608 struct bpos pos) 609 { 610 memset(iter, 0, sizeof(*iter)); 611 612 iter->trans = trans; 613 iter->b = b; 614 iter->node_iter = node_iter; 615 iter->pos = b->data->min_key; 616 iter->at_end = false; 617 INIT_LIST_HEAD(&iter->journal.list); 618 619 if (trans->journal_replay_not_finished) { 620 bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos); 621 if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags)) 622 list_add(&iter->journal.list, &trans->c->journal_iters); 623 } 624 } 625 626 /* 627 * this version is used by btree_gc before filesystem has gone RW and 628 * multithreaded, so uses the journal_iters list: 629 */ 630 void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans, 631 struct btree_and_journal_iter *iter, 632 struct btree *b) 633 { 634 struct btree_node_iter node_iter; 635 636 bch2_btree_node_iter_init_from_start(&node_iter, b); 637 __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key); 638 } 639 640 /* sort and dedup all keys in the journal: */ 641 642 /* 643 * When keys compare equal, oldest compares first: 644 */ 645 static int journal_sort_key_cmp(const void *_l, const void *_r) 646 { 647 const struct journal_key *l = _l; 648 const struct journal_key *r = _r; 649 650 return journal_key_cmp(l, r) ?: 651 cmp_int(l->journal_seq, r->journal_seq) ?: 652 cmp_int(l->journal_offset, r->journal_offset); 653 } 654 655 void bch2_journal_keys_put(struct bch_fs *c) 656 { 657 struct journal_keys *keys = &c->journal_keys; 658 659 BUG_ON(atomic_read(&keys->ref) <= 0); 660 661 if (!atomic_dec_and_test(&keys->ref)) 662 return; 663 664 move_gap(keys, keys->nr); 665 666 darray_for_each(*keys, i) { 667 if (i->overwritten_range && 668 (i == &darray_last(*keys) || 669 i->overwritten_range != i[1].overwritten_range)) 670 kfree(i->overwritten_range); 671 672 if (i->allocated) 673 kfree(i->k); 674 } 675 676 kvfree(keys->data); 677 keys->data = NULL; 678 keys->nr = keys->gap = keys->size = 0; 679 680 struct journal_replay **i; 681 struct genradix_iter iter; 682 683 genradix_for_each(&c->journal_entries, iter, i) 684 kvfree(*i); 685 genradix_free(&c->journal_entries); 686 } 687 688 static void __journal_keys_sort(struct journal_keys *keys) 689 { 690 sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL); 691 692 cond_resched(); 693 694 struct journal_key *dst = keys->data; 695 696 darray_for_each(*keys, src) { 697 /* 698 * We don't accumulate accounting keys here because we have to 699 * compare each individual accounting key against the version in 700 * the btree during replay: 701 */ 702 if (src->k->k.type != KEY_TYPE_accounting && 703 src + 1 < &darray_top(*keys) && 704 !journal_key_cmp(src, src + 1)) 705 continue; 706 707 *dst++ = *src; 708 } 709 710 keys->nr = dst - keys->data; 711 } 712 713 int bch2_journal_keys_sort(struct bch_fs *c) 714 { 715 struct genradix_iter iter; 716 struct journal_replay *i, **_i; 717 struct journal_keys *keys = &c->journal_keys; 718 size_t nr_read = 0; 719 720 genradix_for_each(&c->journal_entries, iter, _i) { 721 i = *_i; 722 723 if (journal_replay_ignore(i)) 724 continue; 725 726 cond_resched(); 727 728 for_each_jset_key(k, entry, &i->j) { 729 struct journal_key n = (struct journal_key) { 730 .btree_id = entry->btree_id, 731 .level = entry->level, 732 .k = k, 733 .journal_seq = le64_to_cpu(i->j.seq), 734 .journal_offset = k->_data - i->j._data, 735 }; 736 737 if (darray_push(keys, n)) { 738 __journal_keys_sort(keys); 739 740 if (keys->nr * 8 > keys->size * 7) { 741 bch_err(c, "Too many journal keys for slowpath; have %zu compacted, buf size %zu, processed %zu keys at seq %llu", 742 keys->nr, keys->size, nr_read, le64_to_cpu(i->j.seq)); 743 return -BCH_ERR_ENOMEM_journal_keys_sort; 744 } 745 746 BUG_ON(darray_push(keys, n)); 747 } 748 749 nr_read++; 750 } 751 } 752 753 __journal_keys_sort(keys); 754 keys->gap = keys->nr; 755 756 bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr); 757 return 0; 758 } 759 760 void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree, 761 unsigned level_min, unsigned level_max, 762 struct bpos start, struct bpos end) 763 { 764 struct journal_keys *keys = &c->journal_keys; 765 size_t dst = 0; 766 767 move_gap(keys, keys->nr); 768 769 darray_for_each(*keys, i) 770 if (!(i->btree_id == btree && 771 i->level >= level_min && 772 i->level <= level_max && 773 bpos_ge(i->k->k.p, start) && 774 bpos_le(i->k->k.p, end))) 775 keys->data[dst++] = *i; 776 keys->nr = keys->gap = dst; 777 } 778 779 void bch2_journal_keys_dump(struct bch_fs *c) 780 { 781 struct journal_keys *keys = &c->journal_keys; 782 struct printbuf buf = PRINTBUF; 783 784 pr_info("%zu keys:", keys->nr); 785 786 move_gap(keys, keys->nr); 787 788 darray_for_each(*keys, i) { 789 printbuf_reset(&buf); 790 prt_printf(&buf, "btree="); 791 bch2_btree_id_to_text(&buf, i->btree_id); 792 prt_printf(&buf, " l=%u ", i->level); 793 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(i->k)); 794 pr_err("%s", buf.buf); 795 } 796 printbuf_exit(&buf); 797 } 798 799 void bch2_fs_journal_keys_init(struct bch_fs *c) 800 { 801 struct journal_keys *keys = &c->journal_keys; 802 803 atomic_set(&keys->ref, 1); 804 keys->initial_ref_held = true; 805 mutex_init(&keys->overwrite_lock); 806 } 807