1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_update.h" 5 #include "btree_iter.h" 6 #include "btree_journal_iter.h" 7 #include "btree_locking.h" 8 #include "buckets.h" 9 #include "debug.h" 10 #include "errcode.h" 11 #include "error.h" 12 #include "extents.h" 13 #include "keylist.h" 14 #include "snapshot.h" 15 #include "trace.h" 16 17 static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, 18 const struct btree_insert_entry *r) 19 { 20 return cmp_int(l->btree_id, r->btree_id) ?: 21 cmp_int(l->cached, r->cached) ?: 22 -cmp_int(l->level, r->level) ?: 23 bpos_cmp(l->k->k.p, r->k->k.p); 24 } 25 26 static int __must_check 27 bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, 28 struct bkey_i *, enum btree_update_flags, 29 unsigned long ip); 30 31 static noinline int extent_front_merge(struct btree_trans *trans, 32 struct btree_iter *iter, 33 struct bkey_s_c k, 34 struct bkey_i **insert, 35 enum btree_update_flags flags) 36 { 37 struct bch_fs *c = trans->c; 38 struct bkey_i *update; 39 int ret; 40 41 update = bch2_bkey_make_mut_noupdate(trans, k); 42 ret = PTR_ERR_OR_ZERO(update); 43 if (ret) 44 return ret; 45 46 if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) 47 return 0; 48 49 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: 50 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); 51 if (ret < 0) 52 return ret; 53 if (ret) 54 return 0; 55 56 ret = bch2_btree_delete_at(trans, iter, flags); 57 if (ret) 58 return ret; 59 60 *insert = update; 61 return 0; 62 } 63 64 static noinline int extent_back_merge(struct btree_trans *trans, 65 struct btree_iter *iter, 66 struct bkey_i *insert, 67 struct bkey_s_c k) 68 { 69 struct bch_fs *c = trans->c; 70 int ret; 71 72 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: 73 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); 74 if (ret < 0) 75 return ret; 76 if (ret) 77 return 0; 78 79 bch2_bkey_merge(c, bkey_i_to_s(insert), k); 80 return 0; 81 } 82 83 /* 84 * When deleting, check if we need to emit a whiteout (because we're overwriting 85 * something in an ancestor snapshot) 86 */ 87 static int need_whiteout_for_snapshot(struct btree_trans *trans, 88 enum btree_id btree_id, struct bpos pos) 89 { 90 struct btree_iter iter; 91 struct bkey_s_c k; 92 u32 snapshot = pos.snapshot; 93 int ret; 94 95 if (!bch2_snapshot_parent(trans->c, pos.snapshot)) 96 return 0; 97 98 pos.snapshot++; 99 100 for_each_btree_key_norestart(trans, iter, btree_id, pos, 101 BTREE_ITER_ALL_SNAPSHOTS| 102 BTREE_ITER_NOPRESERVE, k, ret) { 103 if (!bkey_eq(k.k->p, pos)) 104 break; 105 106 if (bch2_snapshot_is_ancestor(trans->c, snapshot, 107 k.k->p.snapshot)) { 108 ret = !bkey_whiteout(k.k); 109 break; 110 } 111 } 112 bch2_trans_iter_exit(trans, &iter); 113 114 return ret; 115 } 116 117 int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, 118 enum btree_id id, 119 struct bpos old_pos, 120 struct bpos new_pos) 121 { 122 struct bch_fs *c = trans->c; 123 struct btree_iter old_iter, new_iter = { NULL }; 124 struct bkey_s_c old_k, new_k; 125 snapshot_id_list s; 126 struct bkey_i *update; 127 int ret = 0; 128 129 if (!bch2_snapshot_has_children(c, old_pos.snapshot)) 130 return 0; 131 132 darray_init(&s); 133 134 bch2_trans_iter_init(trans, &old_iter, id, old_pos, 135 BTREE_ITER_NOT_EXTENTS| 136 BTREE_ITER_ALL_SNAPSHOTS); 137 while ((old_k = bch2_btree_iter_prev(&old_iter)).k && 138 !(ret = bkey_err(old_k)) && 139 bkey_eq(old_pos, old_k.k->p)) { 140 struct bpos whiteout_pos = 141 SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; 142 143 if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || 144 snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) 145 continue; 146 147 new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos, 148 BTREE_ITER_NOT_EXTENTS| 149 BTREE_ITER_INTENT); 150 ret = bkey_err(new_k); 151 if (ret) 152 break; 153 154 if (new_k.k->type == KEY_TYPE_deleted) { 155 update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); 156 ret = PTR_ERR_OR_ZERO(update); 157 if (ret) 158 break; 159 160 bkey_init(&update->k); 161 update->k.p = whiteout_pos; 162 update->k.type = KEY_TYPE_whiteout; 163 164 ret = bch2_trans_update(trans, &new_iter, update, 165 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 166 } 167 bch2_trans_iter_exit(trans, &new_iter); 168 169 ret = snapshot_list_add(c, &s, old_k.k->p.snapshot); 170 if (ret) 171 break; 172 } 173 bch2_trans_iter_exit(trans, &new_iter); 174 bch2_trans_iter_exit(trans, &old_iter); 175 darray_exit(&s); 176 177 return ret; 178 } 179 180 int bch2_trans_update_extent_overwrite(struct btree_trans *trans, 181 struct btree_iter *iter, 182 enum btree_update_flags flags, 183 struct bkey_s_c old, 184 struct bkey_s_c new) 185 { 186 enum btree_id btree_id = iter->btree_id; 187 struct bkey_i *update; 188 struct bpos new_start = bkey_start_pos(new.k); 189 bool front_split = bkey_lt(bkey_start_pos(old.k), new_start); 190 bool back_split = bkey_gt(old.k->p, new.k->p); 191 int ret = 0, compressed_sectors; 192 193 /* 194 * If we're going to be splitting a compressed extent, note it 195 * so that __bch2_trans_commit() can increase our disk 196 * reservation: 197 */ 198 if (((front_split && back_split) || 199 ((front_split || back_split) && old.k->p.snapshot != new.k->p.snapshot)) && 200 (compressed_sectors = bch2_bkey_sectors_compressed(old))) 201 trans->extra_journal_res += compressed_sectors; 202 203 if (front_split) { 204 update = bch2_bkey_make_mut_noupdate(trans, old); 205 if ((ret = PTR_ERR_OR_ZERO(update))) 206 return ret; 207 208 bch2_cut_back(new_start, update); 209 210 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 211 old.k->p, update->k.p) ?: 212 bch2_btree_insert_nonextent(trans, btree_id, update, 213 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 214 if (ret) 215 return ret; 216 } 217 218 /* If we're overwriting in a different snapshot - middle split: */ 219 if (old.k->p.snapshot != new.k->p.snapshot && 220 (front_split || back_split)) { 221 update = bch2_bkey_make_mut_noupdate(trans, old); 222 if ((ret = PTR_ERR_OR_ZERO(update))) 223 return ret; 224 225 bch2_cut_front(new_start, update); 226 bch2_cut_back(new.k->p, update); 227 228 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 229 old.k->p, update->k.p) ?: 230 bch2_btree_insert_nonextent(trans, btree_id, update, 231 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 232 if (ret) 233 return ret; 234 } 235 236 if (bkey_le(old.k->p, new.k->p)) { 237 update = bch2_trans_kmalloc(trans, sizeof(*update)); 238 if ((ret = PTR_ERR_OR_ZERO(update))) 239 return ret; 240 241 bkey_init(&update->k); 242 update->k.p = old.k->p; 243 update->k.p.snapshot = new.k->p.snapshot; 244 245 if (new.k->p.snapshot != old.k->p.snapshot) { 246 update->k.type = KEY_TYPE_whiteout; 247 } else if (btree_type_has_snapshots(btree_id)) { 248 ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p); 249 if (ret < 0) 250 return ret; 251 if (ret) 252 update->k.type = KEY_TYPE_whiteout; 253 } 254 255 ret = bch2_btree_insert_nonextent(trans, btree_id, update, 256 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 257 if (ret) 258 return ret; 259 } 260 261 if (back_split) { 262 update = bch2_bkey_make_mut_noupdate(trans, old); 263 if ((ret = PTR_ERR_OR_ZERO(update))) 264 return ret; 265 266 bch2_cut_front(new.k->p, update); 267 268 ret = bch2_trans_update_by_path(trans, iter->path, update, 269 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| 270 flags, _RET_IP_); 271 if (ret) 272 return ret; 273 } 274 275 return 0; 276 } 277 278 static int bch2_trans_update_extent(struct btree_trans *trans, 279 struct btree_iter *orig_iter, 280 struct bkey_i *insert, 281 enum btree_update_flags flags) 282 { 283 struct btree_iter iter; 284 struct bkey_s_c k; 285 enum btree_id btree_id = orig_iter->btree_id; 286 int ret = 0; 287 288 bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), 289 BTREE_ITER_INTENT| 290 BTREE_ITER_WITH_UPDATES| 291 BTREE_ITER_NOT_EXTENTS); 292 k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); 293 if ((ret = bkey_err(k))) 294 goto err; 295 if (!k.k) 296 goto out; 297 298 if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { 299 if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { 300 ret = extent_front_merge(trans, &iter, k, &insert, flags); 301 if (ret) 302 goto err; 303 } 304 305 goto next; 306 } 307 308 while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) { 309 bool done = bkey_lt(insert->k.p, k.k->p); 310 311 ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); 312 if (ret) 313 goto err; 314 315 if (done) 316 goto out; 317 next: 318 bch2_btree_iter_advance(&iter); 319 k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); 320 if ((ret = bkey_err(k))) 321 goto err; 322 if (!k.k) 323 goto out; 324 } 325 326 if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { 327 ret = extent_back_merge(trans, &iter, insert, k); 328 if (ret) 329 goto err; 330 } 331 out: 332 if (!bkey_deleted(&insert->k)) 333 ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); 334 err: 335 bch2_trans_iter_exit(trans, &iter); 336 337 return ret; 338 } 339 340 static noinline int flush_new_cached_update(struct btree_trans *trans, 341 struct btree_path *path, 342 struct btree_insert_entry *i, 343 enum btree_update_flags flags, 344 unsigned long ip) 345 { 346 struct btree_path *btree_path; 347 struct bkey k; 348 int ret; 349 350 btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 351 BTREE_ITER_INTENT, _THIS_IP_); 352 ret = bch2_btree_path_traverse(trans, btree_path, 0); 353 if (ret) 354 goto out; 355 356 /* 357 * The old key in the insert entry might actually refer to an existing 358 * key in the btree that has been deleted from cache and not yet 359 * flushed. Check for this and skip the flush so we don't run triggers 360 * against a stale key. 361 */ 362 bch2_btree_path_peek_slot_exact(btree_path, &k); 363 if (!bkey_deleted(&k)) 364 goto out; 365 366 i->key_cache_already_flushed = true; 367 i->flags |= BTREE_TRIGGER_NORUN; 368 369 btree_path_set_should_be_locked(btree_path); 370 ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); 371 out: 372 bch2_path_put(trans, btree_path, true); 373 return ret; 374 } 375 376 static int __must_check 377 bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, 378 struct bkey_i *k, enum btree_update_flags flags, 379 unsigned long ip) 380 { 381 struct bch_fs *c = trans->c; 382 struct btree_insert_entry *i, n; 383 u64 seq = 0; 384 int cmp; 385 386 EBUG_ON(!path->should_be_locked); 387 EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); 388 EBUG_ON(!bpos_eq(k->k.p, path->pos)); 389 390 /* 391 * The transaction journal res hasn't been allocated at this point. 392 * That occurs at commit time. Reuse the seq field to pass in the seq 393 * of a prejournaled key. 394 */ 395 if (flags & BTREE_UPDATE_PREJOURNAL) 396 seq = trans->journal_res.seq; 397 398 n = (struct btree_insert_entry) { 399 .flags = flags, 400 .bkey_type = __btree_node_type(path->level, path->btree_id), 401 .btree_id = path->btree_id, 402 .level = path->level, 403 .cached = path->cached, 404 .path = path, 405 .k = k, 406 .seq = seq, 407 .ip_allocated = ip, 408 }; 409 410 #ifdef CONFIG_BCACHEFS_DEBUG 411 trans_for_each_update(trans, i) 412 BUG_ON(i != trans->updates && 413 btree_insert_entry_cmp(i - 1, i) >= 0); 414 #endif 415 416 /* 417 * Pending updates are kept sorted: first, find position of new update, 418 * then delete/trim any updates the new update overwrites: 419 */ 420 trans_for_each_update(trans, i) { 421 cmp = btree_insert_entry_cmp(&n, i); 422 if (cmp <= 0) 423 break; 424 } 425 426 if (!cmp && i < trans->updates + trans->nr_updates) { 427 EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); 428 429 bch2_path_put(trans, i->path, true); 430 i->flags = n.flags; 431 i->cached = n.cached; 432 i->k = n.k; 433 i->path = n.path; 434 i->seq = n.seq; 435 i->ip_allocated = n.ip_allocated; 436 } else { 437 array_insert_item(trans->updates, trans->nr_updates, 438 i - trans->updates, n); 439 440 i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v; 441 i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; 442 443 if (unlikely(trans->journal_replay_not_finished)) { 444 struct bkey_i *j_k = 445 bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); 446 447 if (j_k) { 448 i->old_k = j_k->k; 449 i->old_v = &j_k->v; 450 } 451 } 452 } 453 454 __btree_path_get(i->path, true); 455 456 /* 457 * If a key is present in the key cache, it must also exist in the 458 * btree - this is necessary for cache coherency. When iterating over 459 * a btree that's cached in the key cache, the btree iter code checks 460 * the key cache - but the key has to exist in the btree for that to 461 * work: 462 */ 463 if (path->cached && bkey_deleted(&i->old_k)) 464 return flush_new_cached_update(trans, path, i, flags, ip); 465 466 return 0; 467 } 468 469 static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, 470 struct btree_iter *iter, 471 struct btree_path *path) 472 { 473 if (!iter->key_cache_path || 474 !iter->key_cache_path->should_be_locked || 475 !bpos_eq(iter->key_cache_path->pos, iter->pos)) { 476 struct bkey_cached *ck; 477 int ret; 478 479 if (!iter->key_cache_path) 480 iter->key_cache_path = 481 bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 482 BTREE_ITER_INTENT| 483 BTREE_ITER_CACHED, _THIS_IP_); 484 485 iter->key_cache_path = 486 bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, 487 iter->flags & BTREE_ITER_INTENT, 488 _THIS_IP_); 489 490 ret = bch2_btree_path_traverse(trans, iter->key_cache_path, 491 BTREE_ITER_CACHED); 492 if (unlikely(ret)) 493 return ret; 494 495 ck = (void *) iter->key_cache_path->l[0].b; 496 497 if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { 498 trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); 499 return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); 500 } 501 502 btree_path_set_should_be_locked(iter->key_cache_path); 503 } 504 505 return 0; 506 } 507 508 int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, 509 struct bkey_i *k, enum btree_update_flags flags) 510 { 511 struct btree_path *path = iter->update_path ?: iter->path; 512 int ret; 513 514 if (iter->flags & BTREE_ITER_IS_EXTENTS) 515 return bch2_trans_update_extent(trans, iter, k, flags); 516 517 if (bkey_deleted(&k->k) && 518 !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && 519 (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { 520 ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); 521 if (unlikely(ret < 0)) 522 return ret; 523 524 if (ret) 525 k->k.type = KEY_TYPE_whiteout; 526 } 527 528 /* 529 * Ensure that updates to cached btrees go to the key cache: 530 */ 531 if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && 532 !path->cached && 533 !path->level && 534 btree_id_cached(trans->c, path->btree_id)) { 535 ret = bch2_trans_update_get_key_cache(trans, iter, path); 536 if (ret) 537 return ret; 538 539 path = iter->key_cache_path; 540 } 541 542 return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); 543 } 544 545 /* 546 * Add a transaction update for a key that has already been journaled. 547 */ 548 int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, 549 struct btree_iter *iter, struct bkey_i *k, 550 enum btree_update_flags flags) 551 { 552 trans->journal_res.seq = seq; 553 return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL| 554 BTREE_UPDATE_PREJOURNAL); 555 } 556 557 static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, 558 enum btree_id btree, 559 struct bkey_i *k) 560 { 561 struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); 562 int ret = PTR_ERR_OR_ZERO(n); 563 if (ret) 564 return ret; 565 566 bkey_copy(n, k); 567 return bch2_btree_insert_trans(trans, btree, n, 0); 568 } 569 570 int __must_check bch2_trans_update_buffered(struct btree_trans *trans, 571 enum btree_id btree, 572 struct bkey_i *k) 573 { 574 struct btree_write_buffered_key *i; 575 int ret; 576 577 EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); 578 EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); 579 580 if (unlikely(trans->journal_replay_not_finished)) 581 return bch2_btree_insert_clone_trans(trans, btree, k); 582 583 trans_for_each_wb_update(trans, i) { 584 if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { 585 bkey_copy(&i->k, k); 586 return 0; 587 } 588 } 589 590 if (!trans->wb_updates || 591 trans->nr_wb_updates == trans->wb_updates_size) { 592 struct btree_write_buffered_key *u; 593 594 if (trans->nr_wb_updates == trans->wb_updates_size) { 595 struct btree_transaction_stats *s = btree_trans_stats(trans); 596 597 BUG_ON(trans->wb_updates_size > U8_MAX / 2); 598 trans->wb_updates_size = max(1, trans->wb_updates_size * 2); 599 if (s) 600 s->wb_updates_size = trans->wb_updates_size; 601 } 602 603 u = bch2_trans_kmalloc_nomemzero(trans, 604 trans->wb_updates_size * 605 sizeof(struct btree_write_buffered_key)); 606 ret = PTR_ERR_OR_ZERO(u); 607 if (ret) 608 return ret; 609 610 if (trans->nr_wb_updates) 611 memcpy(u, trans->wb_updates, trans->nr_wb_updates * 612 sizeof(struct btree_write_buffered_key)); 613 trans->wb_updates = u; 614 } 615 616 trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) { 617 .btree = btree, 618 }; 619 620 bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k); 621 trans->nr_wb_updates++; 622 623 return 0; 624 } 625 626 int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, 627 enum btree_id btree, struct bpos end) 628 { 629 struct bkey_s_c k; 630 int ret = 0; 631 632 bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT); 633 k = bch2_btree_iter_prev(iter); 634 ret = bkey_err(k); 635 if (ret) 636 goto err; 637 638 bch2_btree_iter_advance(iter); 639 k = bch2_btree_iter_peek_slot(iter); 640 ret = bkey_err(k); 641 if (ret) 642 goto err; 643 644 BUG_ON(k.k->type != KEY_TYPE_deleted); 645 646 if (bkey_gt(k.k->p, end)) { 647 ret = -BCH_ERR_ENOSPC_btree_slot; 648 goto err; 649 } 650 651 return 0; 652 err: 653 bch2_trans_iter_exit(trans, iter); 654 return ret; 655 } 656 657 void bch2_trans_commit_hook(struct btree_trans *trans, 658 struct btree_trans_commit_hook *h) 659 { 660 h->next = trans->hooks; 661 trans->hooks = h; 662 } 663 664 int bch2_btree_insert_nonextent(struct btree_trans *trans, 665 enum btree_id btree, struct bkey_i *k, 666 enum btree_update_flags flags) 667 { 668 struct btree_iter iter; 669 int ret; 670 671 bch2_trans_iter_init(trans, &iter, btree, k->k.p, 672 BTREE_ITER_CACHED| 673 BTREE_ITER_NOT_EXTENTS| 674 BTREE_ITER_INTENT); 675 ret = bch2_btree_iter_traverse(&iter) ?: 676 bch2_trans_update(trans, &iter, k, flags); 677 bch2_trans_iter_exit(trans, &iter); 678 return ret; 679 } 680 681 int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, 682 struct bkey_i *k, enum btree_update_flags flags) 683 { 684 struct btree_iter iter; 685 int ret; 686 687 bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), 688 BTREE_ITER_CACHED| 689 BTREE_ITER_INTENT); 690 ret = bch2_btree_iter_traverse(&iter) ?: 691 bch2_trans_update(trans, &iter, k, flags); 692 bch2_trans_iter_exit(trans, &iter); 693 return ret; 694 } 695 696 /** 697 * bch2_btree_insert - insert keys into the extent btree 698 * @c: pointer to struct bch_fs 699 * @id: btree to insert into 700 * @k: key to insert 701 * @disk_res: must be non-NULL whenever inserting or potentially 702 * splitting data extents 703 * @flags: transaction commit flags 704 * 705 * Returns: 0 on success, error code on failure 706 */ 707 int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, 708 struct disk_reservation *disk_res, int flags) 709 { 710 return bch2_trans_do(c, disk_res, NULL, flags, 711 bch2_btree_insert_trans(trans, id, k, 0)); 712 } 713 714 int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, 715 unsigned len, unsigned update_flags) 716 { 717 struct bkey_i *k; 718 719 k = bch2_trans_kmalloc(trans, sizeof(*k)); 720 if (IS_ERR(k)) 721 return PTR_ERR(k); 722 723 bkey_init(&k->k); 724 k->k.p = iter->pos; 725 bch2_key_resize(&k->k, len); 726 return bch2_trans_update(trans, iter, k, update_flags); 727 } 728 729 int bch2_btree_delete_at(struct btree_trans *trans, 730 struct btree_iter *iter, unsigned update_flags) 731 { 732 return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); 733 } 734 735 int bch2_btree_delete_at_buffered(struct btree_trans *trans, 736 enum btree_id btree, struct bpos pos) 737 { 738 struct bkey_i *k; 739 740 k = bch2_trans_kmalloc(trans, sizeof(*k)); 741 if (IS_ERR(k)) 742 return PTR_ERR(k); 743 744 bkey_init(&k->k); 745 k->k.p = pos; 746 return bch2_trans_update_buffered(trans, btree, k); 747 } 748 749 int bch2_btree_delete(struct btree_trans *trans, 750 enum btree_id btree, struct bpos pos, 751 unsigned update_flags) 752 { 753 struct btree_iter iter; 754 int ret; 755 756 bch2_trans_iter_init(trans, &iter, btree, pos, 757 BTREE_ITER_CACHED| 758 BTREE_ITER_INTENT); 759 ret = bch2_btree_iter_traverse(&iter) ?: 760 bch2_btree_delete_at(trans, &iter, update_flags); 761 bch2_trans_iter_exit(trans, &iter); 762 763 return ret; 764 } 765 766 int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, 767 struct bpos start, struct bpos end, 768 unsigned update_flags, 769 u64 *journal_seq) 770 { 771 u32 restart_count = trans->restart_count; 772 struct btree_iter iter; 773 struct bkey_s_c k; 774 int ret = 0; 775 776 bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); 777 while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { 778 struct disk_reservation disk_res = 779 bch2_disk_reservation_init(trans->c, 0); 780 struct bkey_i delete; 781 782 ret = bkey_err(k); 783 if (ret) 784 goto err; 785 786 bkey_init(&delete.k); 787 788 /* 789 * This could probably be more efficient for extents: 790 */ 791 792 /* 793 * For extents, iter.pos won't necessarily be the same as 794 * bkey_start_pos(k.k) (for non extents they always will be the 795 * same). It's important that we delete starting from iter.pos 796 * because the range we want to delete could start in the middle 797 * of k. 798 * 799 * (bch2_btree_iter_peek() does guarantee that iter.pos >= 800 * bkey_start_pos(k.k)). 801 */ 802 delete.k.p = iter.pos; 803 804 if (iter.flags & BTREE_ITER_IS_EXTENTS) 805 bch2_key_resize(&delete.k, 806 bpos_min(end, k.k->p).offset - 807 iter.pos.offset); 808 809 ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: 810 bch2_trans_commit(trans, &disk_res, journal_seq, 811 BTREE_INSERT_NOFAIL); 812 bch2_disk_reservation_put(trans->c, &disk_res); 813 err: 814 /* 815 * the bch2_trans_begin() call is in a weird place because we 816 * need to call it after every transaction commit, to avoid path 817 * overflow, but don't want to call it if the delete operation 818 * is a no-op and we have no work to do: 819 */ 820 bch2_trans_begin(trans); 821 822 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 823 ret = 0; 824 if (ret) 825 break; 826 } 827 bch2_trans_iter_exit(trans, &iter); 828 829 return ret ?: trans_was_restarted(trans, restart_count); 830 } 831 832 /* 833 * bch_btree_delete_range - delete everything within a given range 834 * 835 * Range is a half open interval - [start, end) 836 */ 837 int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, 838 struct bpos start, struct bpos end, 839 unsigned update_flags, 840 u64 *journal_seq) 841 { 842 int ret = bch2_trans_run(c, 843 bch2_btree_delete_range_trans(trans, id, start, end, 844 update_flags, journal_seq)); 845 if (ret == -BCH_ERR_transaction_restart_nested) 846 ret = 0; 847 return ret; 848 } 849 850 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, 851 struct bpos pos, bool set) 852 { 853 struct bkey_i *k; 854 int ret = 0; 855 856 k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); 857 ret = PTR_ERR_OR_ZERO(k); 858 if (unlikely(ret)) 859 return ret; 860 861 bkey_init(&k->k); 862 k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; 863 k->k.p = pos; 864 865 return bch2_trans_update_buffered(trans, btree, k); 866 } 867 868 __printf(2, 0) 869 static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) 870 { 871 struct printbuf buf = PRINTBUF; 872 struct jset_entry_log *l; 873 unsigned u64s; 874 int ret; 875 876 prt_vprintf(&buf, fmt, args); 877 ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; 878 if (ret) 879 goto err; 880 881 u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); 882 883 ret = darray_make_room(entries, jset_u64s(u64s)); 884 if (ret) 885 goto err; 886 887 l = (void *) &darray_top(*entries); 888 l->entry.u64s = cpu_to_le16(u64s); 889 l->entry.btree_id = 0; 890 l->entry.level = 1; 891 l->entry.type = BCH_JSET_ENTRY_log; 892 l->entry.pad[0] = 0; 893 l->entry.pad[1] = 0; 894 l->entry.pad[2] = 0; 895 memcpy(l->d, buf.buf, buf.pos); 896 while (buf.pos & 7) 897 l->d[buf.pos++] = '\0'; 898 899 entries->nr += jset_u64s(u64s); 900 err: 901 printbuf_exit(&buf); 902 return ret; 903 } 904 905 __printf(3, 0) 906 static int 907 __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, 908 va_list args) 909 { 910 int ret; 911 912 if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { 913 ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); 914 } else { 915 ret = bch2_trans_do(c, NULL, NULL, 916 BTREE_INSERT_LAZY_RW|commit_flags, 917 __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); 918 } 919 920 return ret; 921 } 922 923 __printf(2, 3) 924 int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) 925 { 926 va_list args; 927 int ret; 928 929 va_start(args, fmt); 930 ret = __bch2_fs_log_msg(c, 0, fmt, args); 931 va_end(args); 932 return ret; 933 } 934 935 /* 936 * Use for logging messages during recovery to enable reserved space and avoid 937 * blocking. 938 */ 939 __printf(2, 3) 940 int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) 941 { 942 va_list args; 943 int ret; 944 945 va_start(args, fmt); 946 ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args); 947 va_end(args); 948 return ret; 949 } 950