1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_update.h" 5 #include "btree_iter.h" 6 #include "btree_journal_iter.h" 7 #include "btree_locking.h" 8 #include "buckets.h" 9 #include "debug.h" 10 #include "errcode.h" 11 #include "error.h" 12 #include "extents.h" 13 #include "keylist.h" 14 #include "snapshot.h" 15 #include "trace.h" 16 17 static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, 18 const struct btree_insert_entry *r) 19 { 20 return cmp_int(l->btree_id, r->btree_id) ?: 21 cmp_int(l->cached, r->cached) ?: 22 -cmp_int(l->level, r->level) ?: 23 bpos_cmp(l->k->k.p, r->k->k.p); 24 } 25 26 static int __must_check 27 bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, 28 struct bkey_i *, enum btree_update_flags, 29 unsigned long ip); 30 31 static noinline int extent_front_merge(struct btree_trans *trans, 32 struct btree_iter *iter, 33 struct bkey_s_c k, 34 struct bkey_i **insert, 35 enum btree_update_flags flags) 36 { 37 struct bch_fs *c = trans->c; 38 struct bkey_i *update; 39 int ret; 40 41 update = bch2_bkey_make_mut_noupdate(trans, k); 42 ret = PTR_ERR_OR_ZERO(update); 43 if (ret) 44 return ret; 45 46 if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) 47 return 0; 48 49 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: 50 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); 51 if (ret < 0) 52 return ret; 53 if (ret) 54 return 0; 55 56 ret = bch2_btree_delete_at(trans, iter, flags); 57 if (ret) 58 return ret; 59 60 *insert = update; 61 return 0; 62 } 63 64 static noinline int extent_back_merge(struct btree_trans *trans, 65 struct btree_iter *iter, 66 struct bkey_i *insert, 67 struct bkey_s_c k) 68 { 69 struct bch_fs *c = trans->c; 70 int ret; 71 72 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: 73 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); 74 if (ret < 0) 75 return ret; 76 if (ret) 77 return 0; 78 79 bch2_bkey_merge(c, bkey_i_to_s(insert), k); 80 return 0; 81 } 82 83 /* 84 * When deleting, check if we need to emit a whiteout (because we're overwriting 85 * something in an ancestor snapshot) 86 */ 87 static int need_whiteout_for_snapshot(struct btree_trans *trans, 88 enum btree_id btree_id, struct bpos pos) 89 { 90 struct btree_iter iter; 91 struct bkey_s_c k; 92 u32 snapshot = pos.snapshot; 93 int ret; 94 95 if (!bch2_snapshot_parent(trans->c, pos.snapshot)) 96 return 0; 97 98 pos.snapshot++; 99 100 for_each_btree_key_norestart(trans, iter, btree_id, pos, 101 BTREE_ITER_ALL_SNAPSHOTS| 102 BTREE_ITER_NOPRESERVE, k, ret) { 103 if (!bkey_eq(k.k->p, pos)) 104 break; 105 106 if (bch2_snapshot_is_ancestor(trans->c, snapshot, 107 k.k->p.snapshot)) { 108 ret = !bkey_whiteout(k.k); 109 break; 110 } 111 } 112 bch2_trans_iter_exit(trans, &iter); 113 114 return ret; 115 } 116 117 int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, 118 enum btree_id id, 119 struct bpos old_pos, 120 struct bpos new_pos) 121 { 122 struct bch_fs *c = trans->c; 123 struct btree_iter old_iter, new_iter = { NULL }; 124 struct bkey_s_c old_k, new_k; 125 snapshot_id_list s; 126 struct bkey_i *update; 127 int ret = 0; 128 129 if (!bch2_snapshot_has_children(c, old_pos.snapshot)) 130 return 0; 131 132 darray_init(&s); 133 134 bch2_trans_iter_init(trans, &old_iter, id, old_pos, 135 BTREE_ITER_NOT_EXTENTS| 136 BTREE_ITER_ALL_SNAPSHOTS); 137 while ((old_k = bch2_btree_iter_prev(&old_iter)).k && 138 !(ret = bkey_err(old_k)) && 139 bkey_eq(old_pos, old_k.k->p)) { 140 struct bpos whiteout_pos = 141 SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; 142 143 if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || 144 snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) 145 continue; 146 147 new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos, 148 BTREE_ITER_NOT_EXTENTS| 149 BTREE_ITER_INTENT); 150 ret = bkey_err(new_k); 151 if (ret) 152 break; 153 154 if (new_k.k->type == KEY_TYPE_deleted) { 155 update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); 156 ret = PTR_ERR_OR_ZERO(update); 157 if (ret) 158 break; 159 160 bkey_init(&update->k); 161 update->k.p = whiteout_pos; 162 update->k.type = KEY_TYPE_whiteout; 163 164 ret = bch2_trans_update(trans, &new_iter, update, 165 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 166 } 167 bch2_trans_iter_exit(trans, &new_iter); 168 169 ret = snapshot_list_add(c, &s, old_k.k->p.snapshot); 170 if (ret) 171 break; 172 } 173 bch2_trans_iter_exit(trans, &new_iter); 174 bch2_trans_iter_exit(trans, &old_iter); 175 darray_exit(&s); 176 177 return ret; 178 } 179 180 int bch2_trans_update_extent_overwrite(struct btree_trans *trans, 181 struct btree_iter *iter, 182 enum btree_update_flags flags, 183 struct bkey_s_c old, 184 struct bkey_s_c new) 185 { 186 enum btree_id btree_id = iter->btree_id; 187 struct bkey_i *update; 188 struct bpos new_start = bkey_start_pos(new.k); 189 unsigned front_split = bkey_lt(bkey_start_pos(old.k), new_start); 190 unsigned back_split = bkey_gt(old.k->p, new.k->p); 191 unsigned middle_split = (front_split || back_split) && 192 old.k->p.snapshot != new.k->p.snapshot; 193 unsigned nr_splits = front_split + back_split + middle_split; 194 int ret = 0, compressed_sectors; 195 196 /* 197 * If we're going to be splitting a compressed extent, note it 198 * so that __bch2_trans_commit() can increase our disk 199 * reservation: 200 */ 201 if (nr_splits > 1 && 202 (compressed_sectors = bch2_bkey_sectors_compressed(old))) 203 trans->extra_journal_res += compressed_sectors * (nr_splits - 1); 204 205 if (front_split) { 206 update = bch2_bkey_make_mut_noupdate(trans, old); 207 if ((ret = PTR_ERR_OR_ZERO(update))) 208 return ret; 209 210 bch2_cut_back(new_start, update); 211 212 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 213 old.k->p, update->k.p) ?: 214 bch2_btree_insert_nonextent(trans, btree_id, update, 215 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 216 if (ret) 217 return ret; 218 } 219 220 /* If we're overwriting in a different snapshot - middle split: */ 221 if (middle_split) { 222 update = bch2_bkey_make_mut_noupdate(trans, old); 223 if ((ret = PTR_ERR_OR_ZERO(update))) 224 return ret; 225 226 bch2_cut_front(new_start, update); 227 bch2_cut_back(new.k->p, update); 228 229 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 230 old.k->p, update->k.p) ?: 231 bch2_btree_insert_nonextent(trans, btree_id, update, 232 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 233 if (ret) 234 return ret; 235 } 236 237 if (bkey_le(old.k->p, new.k->p)) { 238 update = bch2_trans_kmalloc(trans, sizeof(*update)); 239 if ((ret = PTR_ERR_OR_ZERO(update))) 240 return ret; 241 242 bkey_init(&update->k); 243 update->k.p = old.k->p; 244 update->k.p.snapshot = new.k->p.snapshot; 245 246 if (new.k->p.snapshot != old.k->p.snapshot) { 247 update->k.type = KEY_TYPE_whiteout; 248 } else if (btree_type_has_snapshots(btree_id)) { 249 ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p); 250 if (ret < 0) 251 return ret; 252 if (ret) 253 update->k.type = KEY_TYPE_whiteout; 254 } 255 256 ret = bch2_btree_insert_nonextent(trans, btree_id, update, 257 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 258 if (ret) 259 return ret; 260 } 261 262 if (back_split) { 263 update = bch2_bkey_make_mut_noupdate(trans, old); 264 if ((ret = PTR_ERR_OR_ZERO(update))) 265 return ret; 266 267 bch2_cut_front(new.k->p, update); 268 269 ret = bch2_trans_update_by_path(trans, iter->path, update, 270 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| 271 flags, _RET_IP_); 272 if (ret) 273 return ret; 274 } 275 276 return 0; 277 } 278 279 static int bch2_trans_update_extent(struct btree_trans *trans, 280 struct btree_iter *orig_iter, 281 struct bkey_i *insert, 282 enum btree_update_flags flags) 283 { 284 struct btree_iter iter; 285 struct bkey_s_c k; 286 enum btree_id btree_id = orig_iter->btree_id; 287 int ret = 0; 288 289 bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), 290 BTREE_ITER_INTENT| 291 BTREE_ITER_WITH_UPDATES| 292 BTREE_ITER_NOT_EXTENTS); 293 k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); 294 if ((ret = bkey_err(k))) 295 goto err; 296 if (!k.k) 297 goto out; 298 299 if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { 300 if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { 301 ret = extent_front_merge(trans, &iter, k, &insert, flags); 302 if (ret) 303 goto err; 304 } 305 306 goto next; 307 } 308 309 while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) { 310 bool done = bkey_lt(insert->k.p, k.k->p); 311 312 ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); 313 if (ret) 314 goto err; 315 316 if (done) 317 goto out; 318 next: 319 bch2_btree_iter_advance(&iter); 320 k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); 321 if ((ret = bkey_err(k))) 322 goto err; 323 if (!k.k) 324 goto out; 325 } 326 327 if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { 328 ret = extent_back_merge(trans, &iter, insert, k); 329 if (ret) 330 goto err; 331 } 332 out: 333 if (!bkey_deleted(&insert->k)) 334 ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); 335 err: 336 bch2_trans_iter_exit(trans, &iter); 337 338 return ret; 339 } 340 341 static noinline int flush_new_cached_update(struct btree_trans *trans, 342 struct btree_path *path, 343 struct btree_insert_entry *i, 344 enum btree_update_flags flags, 345 unsigned long ip) 346 { 347 struct btree_path *btree_path; 348 struct bkey k; 349 int ret; 350 351 btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 352 BTREE_ITER_INTENT, _THIS_IP_); 353 ret = bch2_btree_path_traverse(trans, btree_path, 0); 354 if (ret) 355 goto out; 356 357 /* 358 * The old key in the insert entry might actually refer to an existing 359 * key in the btree that has been deleted from cache and not yet 360 * flushed. Check for this and skip the flush so we don't run triggers 361 * against a stale key. 362 */ 363 bch2_btree_path_peek_slot_exact(btree_path, &k); 364 if (!bkey_deleted(&k)) 365 goto out; 366 367 i->key_cache_already_flushed = true; 368 i->flags |= BTREE_TRIGGER_NORUN; 369 370 btree_path_set_should_be_locked(btree_path); 371 ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); 372 out: 373 bch2_path_put(trans, btree_path, true); 374 return ret; 375 } 376 377 static int __must_check 378 bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, 379 struct bkey_i *k, enum btree_update_flags flags, 380 unsigned long ip) 381 { 382 struct bch_fs *c = trans->c; 383 struct btree_insert_entry *i, n; 384 u64 seq = 0; 385 int cmp; 386 387 EBUG_ON(!path->should_be_locked); 388 EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); 389 EBUG_ON(!bpos_eq(k->k.p, path->pos)); 390 391 /* 392 * The transaction journal res hasn't been allocated at this point. 393 * That occurs at commit time. Reuse the seq field to pass in the seq 394 * of a prejournaled key. 395 */ 396 if (flags & BTREE_UPDATE_PREJOURNAL) 397 seq = trans->journal_res.seq; 398 399 n = (struct btree_insert_entry) { 400 .flags = flags, 401 .bkey_type = __btree_node_type(path->level, path->btree_id), 402 .btree_id = path->btree_id, 403 .level = path->level, 404 .cached = path->cached, 405 .path = path, 406 .k = k, 407 .seq = seq, 408 .ip_allocated = ip, 409 }; 410 411 #ifdef CONFIG_BCACHEFS_DEBUG 412 trans_for_each_update(trans, i) 413 BUG_ON(i != trans->updates && 414 btree_insert_entry_cmp(i - 1, i) >= 0); 415 #endif 416 417 /* 418 * Pending updates are kept sorted: first, find position of new update, 419 * then delete/trim any updates the new update overwrites: 420 */ 421 trans_for_each_update(trans, i) { 422 cmp = btree_insert_entry_cmp(&n, i); 423 if (cmp <= 0) 424 break; 425 } 426 427 if (!cmp && i < trans->updates + trans->nr_updates) { 428 EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); 429 430 bch2_path_put(trans, i->path, true); 431 i->flags = n.flags; 432 i->cached = n.cached; 433 i->k = n.k; 434 i->path = n.path; 435 i->seq = n.seq; 436 i->ip_allocated = n.ip_allocated; 437 } else { 438 array_insert_item(trans->updates, trans->nr_updates, 439 i - trans->updates, n); 440 441 i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v; 442 i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; 443 444 if (unlikely(trans->journal_replay_not_finished)) { 445 struct bkey_i *j_k = 446 bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); 447 448 if (j_k) { 449 i->old_k = j_k->k; 450 i->old_v = &j_k->v; 451 } 452 } 453 } 454 455 __btree_path_get(i->path, true); 456 457 /* 458 * If a key is present in the key cache, it must also exist in the 459 * btree - this is necessary for cache coherency. When iterating over 460 * a btree that's cached in the key cache, the btree iter code checks 461 * the key cache - but the key has to exist in the btree for that to 462 * work: 463 */ 464 if (path->cached && bkey_deleted(&i->old_k)) 465 return flush_new_cached_update(trans, path, i, flags, ip); 466 467 return 0; 468 } 469 470 static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, 471 struct btree_iter *iter, 472 struct btree_path *path) 473 { 474 if (!iter->key_cache_path || 475 !iter->key_cache_path->should_be_locked || 476 !bpos_eq(iter->key_cache_path->pos, iter->pos)) { 477 struct bkey_cached *ck; 478 int ret; 479 480 if (!iter->key_cache_path) 481 iter->key_cache_path = 482 bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 483 BTREE_ITER_INTENT| 484 BTREE_ITER_CACHED, _THIS_IP_); 485 486 iter->key_cache_path = 487 bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, 488 iter->flags & BTREE_ITER_INTENT, 489 _THIS_IP_); 490 491 ret = bch2_btree_path_traverse(trans, iter->key_cache_path, 492 BTREE_ITER_CACHED); 493 if (unlikely(ret)) 494 return ret; 495 496 ck = (void *) iter->key_cache_path->l[0].b; 497 498 if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { 499 trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); 500 return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); 501 } 502 503 btree_path_set_should_be_locked(iter->key_cache_path); 504 } 505 506 return 0; 507 } 508 509 int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, 510 struct bkey_i *k, enum btree_update_flags flags) 511 { 512 struct btree_path *path = iter->update_path ?: iter->path; 513 int ret; 514 515 if (iter->flags & BTREE_ITER_IS_EXTENTS) 516 return bch2_trans_update_extent(trans, iter, k, flags); 517 518 if (bkey_deleted(&k->k) && 519 !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && 520 (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { 521 ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); 522 if (unlikely(ret < 0)) 523 return ret; 524 525 if (ret) 526 k->k.type = KEY_TYPE_whiteout; 527 } 528 529 /* 530 * Ensure that updates to cached btrees go to the key cache: 531 */ 532 if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && 533 !path->cached && 534 !path->level && 535 btree_id_cached(trans->c, path->btree_id)) { 536 ret = bch2_trans_update_get_key_cache(trans, iter, path); 537 if (ret) 538 return ret; 539 540 path = iter->key_cache_path; 541 } 542 543 return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); 544 } 545 546 /* 547 * Add a transaction update for a key that has already been journaled. 548 */ 549 int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, 550 struct btree_iter *iter, struct bkey_i *k, 551 enum btree_update_flags flags) 552 { 553 trans->journal_res.seq = seq; 554 return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL| 555 BTREE_UPDATE_PREJOURNAL); 556 } 557 558 static noinline int bch2_btree_insert_clone_trans(struct btree_trans *trans, 559 enum btree_id btree, 560 struct bkey_i *k) 561 { 562 struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); 563 int ret = PTR_ERR_OR_ZERO(n); 564 if (ret) 565 return ret; 566 567 bkey_copy(n, k); 568 return bch2_btree_insert_trans(trans, btree, n, 0); 569 } 570 571 int __must_check bch2_trans_update_buffered(struct btree_trans *trans, 572 enum btree_id btree, 573 struct bkey_i *k) 574 { 575 struct btree_write_buffered_key *i; 576 int ret; 577 578 EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); 579 EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); 580 581 if (unlikely(trans->journal_replay_not_finished)) 582 return bch2_btree_insert_clone_trans(trans, btree, k); 583 584 trans_for_each_wb_update(trans, i) { 585 if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { 586 bkey_copy(&i->k, k); 587 return 0; 588 } 589 } 590 591 if (!trans->wb_updates || 592 trans->nr_wb_updates == trans->wb_updates_size) { 593 struct btree_write_buffered_key *u; 594 595 if (trans->nr_wb_updates == trans->wb_updates_size) { 596 struct btree_transaction_stats *s = btree_trans_stats(trans); 597 598 BUG_ON(trans->wb_updates_size > U8_MAX / 2); 599 trans->wb_updates_size = max(1, trans->wb_updates_size * 2); 600 if (s) 601 s->wb_updates_size = trans->wb_updates_size; 602 } 603 604 u = bch2_trans_kmalloc_nomemzero(trans, 605 trans->wb_updates_size * 606 sizeof(struct btree_write_buffered_key)); 607 ret = PTR_ERR_OR_ZERO(u); 608 if (ret) 609 return ret; 610 611 if (trans->nr_wb_updates) 612 memcpy(u, trans->wb_updates, trans->nr_wb_updates * 613 sizeof(struct btree_write_buffered_key)); 614 trans->wb_updates = u; 615 } 616 617 trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) { 618 .btree = btree, 619 }; 620 621 bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k); 622 trans->nr_wb_updates++; 623 624 return 0; 625 } 626 627 int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, 628 enum btree_id btree, struct bpos end) 629 { 630 struct bkey_s_c k; 631 int ret = 0; 632 633 bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT); 634 k = bch2_btree_iter_prev(iter); 635 ret = bkey_err(k); 636 if (ret) 637 goto err; 638 639 bch2_btree_iter_advance(iter); 640 k = bch2_btree_iter_peek_slot(iter); 641 ret = bkey_err(k); 642 if (ret) 643 goto err; 644 645 BUG_ON(k.k->type != KEY_TYPE_deleted); 646 647 if (bkey_gt(k.k->p, end)) { 648 ret = -BCH_ERR_ENOSPC_btree_slot; 649 goto err; 650 } 651 652 return 0; 653 err: 654 bch2_trans_iter_exit(trans, iter); 655 return ret; 656 } 657 658 void bch2_trans_commit_hook(struct btree_trans *trans, 659 struct btree_trans_commit_hook *h) 660 { 661 h->next = trans->hooks; 662 trans->hooks = h; 663 } 664 665 int bch2_btree_insert_nonextent(struct btree_trans *trans, 666 enum btree_id btree, struct bkey_i *k, 667 enum btree_update_flags flags) 668 { 669 struct btree_iter iter; 670 int ret; 671 672 bch2_trans_iter_init(trans, &iter, btree, k->k.p, 673 BTREE_ITER_CACHED| 674 BTREE_ITER_NOT_EXTENTS| 675 BTREE_ITER_INTENT); 676 ret = bch2_btree_iter_traverse(&iter) ?: 677 bch2_trans_update(trans, &iter, k, flags); 678 bch2_trans_iter_exit(trans, &iter); 679 return ret; 680 } 681 682 int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, 683 struct bkey_i *k, enum btree_update_flags flags) 684 { 685 struct btree_iter iter; 686 int ret; 687 688 bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), 689 BTREE_ITER_CACHED| 690 BTREE_ITER_INTENT); 691 ret = bch2_btree_iter_traverse(&iter) ?: 692 bch2_trans_update(trans, &iter, k, flags); 693 bch2_trans_iter_exit(trans, &iter); 694 return ret; 695 } 696 697 /** 698 * bch2_btree_insert - insert keys into the extent btree 699 * @c: pointer to struct bch_fs 700 * @id: btree to insert into 701 * @k: key to insert 702 * @disk_res: must be non-NULL whenever inserting or potentially 703 * splitting data extents 704 * @flags: transaction commit flags 705 * 706 * Returns: 0 on success, error code on failure 707 */ 708 int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, 709 struct disk_reservation *disk_res, int flags) 710 { 711 return bch2_trans_do(c, disk_res, NULL, flags, 712 bch2_btree_insert_trans(trans, id, k, 0)); 713 } 714 715 int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, 716 unsigned len, unsigned update_flags) 717 { 718 struct bkey_i *k; 719 720 k = bch2_trans_kmalloc(trans, sizeof(*k)); 721 if (IS_ERR(k)) 722 return PTR_ERR(k); 723 724 bkey_init(&k->k); 725 k->k.p = iter->pos; 726 bch2_key_resize(&k->k, len); 727 return bch2_trans_update(trans, iter, k, update_flags); 728 } 729 730 int bch2_btree_delete_at(struct btree_trans *trans, 731 struct btree_iter *iter, unsigned update_flags) 732 { 733 return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); 734 } 735 736 int bch2_btree_delete_at_buffered(struct btree_trans *trans, 737 enum btree_id btree, struct bpos pos) 738 { 739 struct bkey_i *k; 740 741 k = bch2_trans_kmalloc(trans, sizeof(*k)); 742 if (IS_ERR(k)) 743 return PTR_ERR(k); 744 745 bkey_init(&k->k); 746 k->k.p = pos; 747 return bch2_trans_update_buffered(trans, btree, k); 748 } 749 750 int bch2_btree_delete(struct btree_trans *trans, 751 enum btree_id btree, struct bpos pos, 752 unsigned update_flags) 753 { 754 struct btree_iter iter; 755 int ret; 756 757 bch2_trans_iter_init(trans, &iter, btree, pos, 758 BTREE_ITER_CACHED| 759 BTREE_ITER_INTENT); 760 ret = bch2_btree_iter_traverse(&iter) ?: 761 bch2_btree_delete_at(trans, &iter, update_flags); 762 bch2_trans_iter_exit(trans, &iter); 763 764 return ret; 765 } 766 767 int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, 768 struct bpos start, struct bpos end, 769 unsigned update_flags, 770 u64 *journal_seq) 771 { 772 u32 restart_count = trans->restart_count; 773 struct btree_iter iter; 774 struct bkey_s_c k; 775 int ret = 0; 776 777 bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); 778 while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { 779 struct disk_reservation disk_res = 780 bch2_disk_reservation_init(trans->c, 0); 781 struct bkey_i delete; 782 783 ret = bkey_err(k); 784 if (ret) 785 goto err; 786 787 bkey_init(&delete.k); 788 789 /* 790 * This could probably be more efficient for extents: 791 */ 792 793 /* 794 * For extents, iter.pos won't necessarily be the same as 795 * bkey_start_pos(k.k) (for non extents they always will be the 796 * same). It's important that we delete starting from iter.pos 797 * because the range we want to delete could start in the middle 798 * of k. 799 * 800 * (bch2_btree_iter_peek() does guarantee that iter.pos >= 801 * bkey_start_pos(k.k)). 802 */ 803 delete.k.p = iter.pos; 804 805 if (iter.flags & BTREE_ITER_IS_EXTENTS) 806 bch2_key_resize(&delete.k, 807 bpos_min(end, k.k->p).offset - 808 iter.pos.offset); 809 810 ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: 811 bch2_trans_commit(trans, &disk_res, journal_seq, 812 BTREE_INSERT_NOFAIL); 813 bch2_disk_reservation_put(trans->c, &disk_res); 814 err: 815 /* 816 * the bch2_trans_begin() call is in a weird place because we 817 * need to call it after every transaction commit, to avoid path 818 * overflow, but don't want to call it if the delete operation 819 * is a no-op and we have no work to do: 820 */ 821 bch2_trans_begin(trans); 822 823 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 824 ret = 0; 825 if (ret) 826 break; 827 } 828 bch2_trans_iter_exit(trans, &iter); 829 830 return ret ?: trans_was_restarted(trans, restart_count); 831 } 832 833 /* 834 * bch_btree_delete_range - delete everything within a given range 835 * 836 * Range is a half open interval - [start, end) 837 */ 838 int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, 839 struct bpos start, struct bpos end, 840 unsigned update_flags, 841 u64 *journal_seq) 842 { 843 int ret = bch2_trans_run(c, 844 bch2_btree_delete_range_trans(trans, id, start, end, 845 update_flags, journal_seq)); 846 if (ret == -BCH_ERR_transaction_restart_nested) 847 ret = 0; 848 return ret; 849 } 850 851 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, 852 struct bpos pos, bool set) 853 { 854 struct bkey_i *k; 855 int ret = 0; 856 857 k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); 858 ret = PTR_ERR_OR_ZERO(k); 859 if (unlikely(ret)) 860 return ret; 861 862 bkey_init(&k->k); 863 k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; 864 k->k.p = pos; 865 866 return bch2_trans_update_buffered(trans, btree, k); 867 } 868 869 __printf(2, 0) 870 static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) 871 { 872 struct printbuf buf = PRINTBUF; 873 struct jset_entry_log *l; 874 unsigned u64s; 875 int ret; 876 877 prt_vprintf(&buf, fmt, args); 878 ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; 879 if (ret) 880 goto err; 881 882 u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); 883 884 ret = darray_make_room(entries, jset_u64s(u64s)); 885 if (ret) 886 goto err; 887 888 l = (void *) &darray_top(*entries); 889 l->entry.u64s = cpu_to_le16(u64s); 890 l->entry.btree_id = 0; 891 l->entry.level = 1; 892 l->entry.type = BCH_JSET_ENTRY_log; 893 l->entry.pad[0] = 0; 894 l->entry.pad[1] = 0; 895 l->entry.pad[2] = 0; 896 memcpy(l->d, buf.buf, buf.pos); 897 while (buf.pos & 7) 898 l->d[buf.pos++] = '\0'; 899 900 entries->nr += jset_u64s(u64s); 901 err: 902 printbuf_exit(&buf); 903 return ret; 904 } 905 906 __printf(3, 0) 907 static int 908 __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, 909 va_list args) 910 { 911 int ret; 912 913 if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { 914 ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); 915 } else { 916 ret = bch2_trans_do(c, NULL, NULL, 917 BTREE_INSERT_LAZY_RW|commit_flags, 918 __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); 919 } 920 921 return ret; 922 } 923 924 __printf(2, 3) 925 int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) 926 { 927 va_list args; 928 int ret; 929 930 va_start(args, fmt); 931 ret = __bch2_fs_log_msg(c, 0, fmt, args); 932 va_end(args); 933 return ret; 934 } 935 936 /* 937 * Use for logging messages during recovery to enable reserved space and avoid 938 * blocking. 939 */ 940 __printf(2, 3) 941 int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) 942 { 943 va_list args; 944 int ret; 945 946 va_start(args, fmt); 947 ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args); 948 va_end(args); 949 return ret; 950 } 951