1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_update.h" 5 #include "btree_iter.h" 6 #include "btree_journal_iter.h" 7 #include "btree_locking.h" 8 #include "buckets.h" 9 #include "debug.h" 10 #include "errcode.h" 11 #include "error.h" 12 #include "extents.h" 13 #include "keylist.h" 14 #include "snapshot.h" 15 #include "trace.h" 16 17 static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, 18 const struct btree_insert_entry *r) 19 { 20 return cmp_int(l->btree_id, r->btree_id) ?: 21 cmp_int(l->cached, r->cached) ?: 22 -cmp_int(l->level, r->level) ?: 23 bpos_cmp(l->k->k.p, r->k->k.p); 24 } 25 26 static int __must_check 27 bch2_trans_update_by_path(struct btree_trans *, struct btree_path *, 28 struct bkey_i *, enum btree_update_flags, 29 unsigned long ip); 30 31 static noinline int extent_front_merge(struct btree_trans *trans, 32 struct btree_iter *iter, 33 struct bkey_s_c k, 34 struct bkey_i **insert, 35 enum btree_update_flags flags) 36 { 37 struct bch_fs *c = trans->c; 38 struct bkey_i *update; 39 int ret; 40 41 update = bch2_bkey_make_mut_noupdate(trans, k); 42 ret = PTR_ERR_OR_ZERO(update); 43 if (ret) 44 return ret; 45 46 if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) 47 return 0; 48 49 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: 50 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); 51 if (ret < 0) 52 return ret; 53 if (ret) 54 return 0; 55 56 ret = bch2_btree_delete_at(trans, iter, flags); 57 if (ret) 58 return ret; 59 60 *insert = update; 61 return 0; 62 } 63 64 static noinline int extent_back_merge(struct btree_trans *trans, 65 struct btree_iter *iter, 66 struct bkey_i *insert, 67 struct bkey_s_c k) 68 { 69 struct bch_fs *c = trans->c; 70 int ret; 71 72 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: 73 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); 74 if (ret < 0) 75 return ret; 76 if (ret) 77 return 0; 78 79 bch2_bkey_merge(c, bkey_i_to_s(insert), k); 80 return 0; 81 } 82 83 /* 84 * When deleting, check if we need to emit a whiteout (because we're overwriting 85 * something in an ancestor snapshot) 86 */ 87 static int need_whiteout_for_snapshot(struct btree_trans *trans, 88 enum btree_id btree_id, struct bpos pos) 89 { 90 struct btree_iter iter; 91 struct bkey_s_c k; 92 u32 snapshot = pos.snapshot; 93 int ret; 94 95 if (!bch2_snapshot_parent(trans->c, pos.snapshot)) 96 return 0; 97 98 pos.snapshot++; 99 100 for_each_btree_key_norestart(trans, iter, btree_id, pos, 101 BTREE_ITER_ALL_SNAPSHOTS| 102 BTREE_ITER_NOPRESERVE, k, ret) { 103 if (!bkey_eq(k.k->p, pos)) 104 break; 105 106 if (bch2_snapshot_is_ancestor(trans->c, snapshot, 107 k.k->p.snapshot)) { 108 ret = !bkey_whiteout(k.k); 109 break; 110 } 111 } 112 bch2_trans_iter_exit(trans, &iter); 113 114 return ret; 115 } 116 117 int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, 118 enum btree_id id, 119 struct bpos old_pos, 120 struct bpos new_pos) 121 { 122 struct bch_fs *c = trans->c; 123 struct btree_iter old_iter, new_iter = { NULL }; 124 struct bkey_s_c old_k, new_k; 125 snapshot_id_list s; 126 struct bkey_i *update; 127 int ret = 0; 128 129 if (!bch2_snapshot_has_children(c, old_pos.snapshot)) 130 return 0; 131 132 darray_init(&s); 133 134 bch2_trans_iter_init(trans, &old_iter, id, old_pos, 135 BTREE_ITER_NOT_EXTENTS| 136 BTREE_ITER_ALL_SNAPSHOTS); 137 while ((old_k = bch2_btree_iter_prev(&old_iter)).k && 138 !(ret = bkey_err(old_k)) && 139 bkey_eq(old_pos, old_k.k->p)) { 140 struct bpos whiteout_pos = 141 SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);; 142 143 if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) || 144 snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot)) 145 continue; 146 147 new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos, 148 BTREE_ITER_NOT_EXTENTS| 149 BTREE_ITER_INTENT); 150 ret = bkey_err(new_k); 151 if (ret) 152 break; 153 154 if (new_k.k->type == KEY_TYPE_deleted) { 155 update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); 156 ret = PTR_ERR_OR_ZERO(update); 157 if (ret) 158 break; 159 160 bkey_init(&update->k); 161 update->k.p = whiteout_pos; 162 update->k.type = KEY_TYPE_whiteout; 163 164 ret = bch2_trans_update(trans, &new_iter, update, 165 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 166 } 167 bch2_trans_iter_exit(trans, &new_iter); 168 169 ret = snapshot_list_add(c, &s, old_k.k->p.snapshot); 170 if (ret) 171 break; 172 } 173 bch2_trans_iter_exit(trans, &new_iter); 174 bch2_trans_iter_exit(trans, &old_iter); 175 darray_exit(&s); 176 177 return ret; 178 } 179 180 int bch2_trans_update_extent_overwrite(struct btree_trans *trans, 181 struct btree_iter *iter, 182 enum btree_update_flags flags, 183 struct bkey_s_c old, 184 struct bkey_s_c new) 185 { 186 enum btree_id btree_id = iter->btree_id; 187 struct bkey_i *update; 188 struct bpos new_start = bkey_start_pos(new.k); 189 bool front_split = bkey_lt(bkey_start_pos(old.k), new_start); 190 bool back_split = bkey_gt(old.k->p, new.k->p); 191 int ret = 0, compressed_sectors; 192 193 /* 194 * If we're going to be splitting a compressed extent, note it 195 * so that __bch2_trans_commit() can increase our disk 196 * reservation: 197 */ 198 if (((front_split && back_split) || 199 ((front_split || back_split) && old.k->p.snapshot != new.k->p.snapshot)) && 200 (compressed_sectors = bch2_bkey_sectors_compressed(old))) 201 trans->extra_journal_res += compressed_sectors; 202 203 if (front_split) { 204 update = bch2_bkey_make_mut_noupdate(trans, old); 205 if ((ret = PTR_ERR_OR_ZERO(update))) 206 return ret; 207 208 bch2_cut_back(new_start, update); 209 210 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 211 old.k->p, update->k.p) ?: 212 bch2_btree_insert_nonextent(trans, btree_id, update, 213 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 214 if (ret) 215 return ret; 216 } 217 218 /* If we're overwriting in a different snapshot - middle split: */ 219 if (old.k->p.snapshot != new.k->p.snapshot && 220 (front_split || back_split)) { 221 update = bch2_bkey_make_mut_noupdate(trans, old); 222 if ((ret = PTR_ERR_OR_ZERO(update))) 223 return ret; 224 225 bch2_cut_front(new_start, update); 226 bch2_cut_back(new.k->p, update); 227 228 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 229 old.k->p, update->k.p) ?: 230 bch2_btree_insert_nonextent(trans, btree_id, update, 231 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 232 if (ret) 233 return ret; 234 } 235 236 if (bkey_le(old.k->p, new.k->p)) { 237 update = bch2_trans_kmalloc(trans, sizeof(*update)); 238 if ((ret = PTR_ERR_OR_ZERO(update))) 239 return ret; 240 241 bkey_init(&update->k); 242 update->k.p = old.k->p; 243 update->k.p.snapshot = new.k->p.snapshot; 244 245 if (new.k->p.snapshot != old.k->p.snapshot) { 246 update->k.type = KEY_TYPE_whiteout; 247 } else if (btree_type_has_snapshots(btree_id)) { 248 ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p); 249 if (ret < 0) 250 return ret; 251 if (ret) 252 update->k.type = KEY_TYPE_whiteout; 253 } 254 255 ret = bch2_btree_insert_nonextent(trans, btree_id, update, 256 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags); 257 if (ret) 258 return ret; 259 } 260 261 if (back_split) { 262 update = bch2_bkey_make_mut_noupdate(trans, old); 263 if ((ret = PTR_ERR_OR_ZERO(update))) 264 return ret; 265 266 bch2_cut_front(new.k->p, update); 267 268 ret = bch2_trans_update_by_path(trans, iter->path, update, 269 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| 270 flags, _RET_IP_); 271 if (ret) 272 return ret; 273 } 274 275 return 0; 276 } 277 278 static int bch2_trans_update_extent(struct btree_trans *trans, 279 struct btree_iter *orig_iter, 280 struct bkey_i *insert, 281 enum btree_update_flags flags) 282 { 283 struct btree_iter iter; 284 struct bkey_s_c k; 285 enum btree_id btree_id = orig_iter->btree_id; 286 int ret = 0; 287 288 bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), 289 BTREE_ITER_INTENT| 290 BTREE_ITER_WITH_UPDATES| 291 BTREE_ITER_NOT_EXTENTS); 292 k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); 293 if ((ret = bkey_err(k))) 294 goto err; 295 if (!k.k) 296 goto out; 297 298 if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { 299 if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { 300 ret = extent_front_merge(trans, &iter, k, &insert, flags); 301 if (ret) 302 goto err; 303 } 304 305 goto next; 306 } 307 308 while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) { 309 bool done = bkey_lt(insert->k.p, k.k->p); 310 311 ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); 312 if (ret) 313 goto err; 314 315 if (done) 316 goto out; 317 next: 318 bch2_btree_iter_advance(&iter); 319 k = bch2_btree_iter_peek_upto(&iter, POS(insert->k.p.inode, U64_MAX)); 320 if ((ret = bkey_err(k))) 321 goto err; 322 if (!k.k) 323 goto out; 324 } 325 326 if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { 327 ret = extent_back_merge(trans, &iter, insert, k); 328 if (ret) 329 goto err; 330 } 331 out: 332 if (!bkey_deleted(&insert->k)) 333 ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); 334 err: 335 bch2_trans_iter_exit(trans, &iter); 336 337 return ret; 338 } 339 340 static noinline int flush_new_cached_update(struct btree_trans *trans, 341 struct btree_path *path, 342 struct btree_insert_entry *i, 343 enum btree_update_flags flags, 344 unsigned long ip) 345 { 346 struct btree_path *btree_path; 347 struct bkey k; 348 int ret; 349 350 btree_path = bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 351 BTREE_ITER_INTENT, _THIS_IP_); 352 ret = bch2_btree_path_traverse(trans, btree_path, 0); 353 if (ret) 354 goto out; 355 356 /* 357 * The old key in the insert entry might actually refer to an existing 358 * key in the btree that has been deleted from cache and not yet 359 * flushed. Check for this and skip the flush so we don't run triggers 360 * against a stale key. 361 */ 362 bch2_btree_path_peek_slot_exact(btree_path, &k); 363 if (!bkey_deleted(&k)) 364 goto out; 365 366 i->key_cache_already_flushed = true; 367 i->flags |= BTREE_TRIGGER_NORUN; 368 369 btree_path_set_should_be_locked(btree_path); 370 ret = bch2_trans_update_by_path(trans, btree_path, i->k, flags, ip); 371 out: 372 bch2_path_put(trans, btree_path, true); 373 return ret; 374 } 375 376 static int __must_check 377 bch2_trans_update_by_path(struct btree_trans *trans, struct btree_path *path, 378 struct bkey_i *k, enum btree_update_flags flags, 379 unsigned long ip) 380 { 381 struct bch_fs *c = trans->c; 382 struct btree_insert_entry *i, n; 383 u64 seq = 0; 384 int cmp; 385 386 EBUG_ON(!path->should_be_locked); 387 EBUG_ON(trans->nr_updates >= BTREE_ITER_MAX); 388 EBUG_ON(!bpos_eq(k->k.p, path->pos)); 389 390 /* 391 * The transaction journal res hasn't been allocated at this point. 392 * That occurs at commit time. Reuse the seq field to pass in the seq 393 * of a prejournaled key. 394 */ 395 if (flags & BTREE_UPDATE_PREJOURNAL) 396 seq = trans->journal_res.seq; 397 398 n = (struct btree_insert_entry) { 399 .flags = flags, 400 .bkey_type = __btree_node_type(path->level, path->btree_id), 401 .btree_id = path->btree_id, 402 .level = path->level, 403 .cached = path->cached, 404 .path = path, 405 .k = k, 406 .seq = seq, 407 .ip_allocated = ip, 408 }; 409 410 #ifdef CONFIG_BCACHEFS_DEBUG 411 trans_for_each_update(trans, i) 412 BUG_ON(i != trans->updates && 413 btree_insert_entry_cmp(i - 1, i) >= 0); 414 #endif 415 416 /* 417 * Pending updates are kept sorted: first, find position of new update, 418 * then delete/trim any updates the new update overwrites: 419 */ 420 trans_for_each_update(trans, i) { 421 cmp = btree_insert_entry_cmp(&n, i); 422 if (cmp <= 0) 423 break; 424 } 425 426 if (!cmp && i < trans->updates + trans->nr_updates) { 427 EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); 428 429 bch2_path_put(trans, i->path, true); 430 i->flags = n.flags; 431 i->cached = n.cached; 432 i->k = n.k; 433 i->path = n.path; 434 i->seq = n.seq; 435 i->ip_allocated = n.ip_allocated; 436 } else { 437 array_insert_item(trans->updates, trans->nr_updates, 438 i - trans->updates, n); 439 440 i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v; 441 i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; 442 443 if (unlikely(trans->journal_replay_not_finished)) { 444 struct bkey_i *j_k = 445 bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); 446 447 if (j_k) { 448 i->old_k = j_k->k; 449 i->old_v = &j_k->v; 450 } 451 } 452 } 453 454 __btree_path_get(i->path, true); 455 456 /* 457 * If a key is present in the key cache, it must also exist in the 458 * btree - this is necessary for cache coherency. When iterating over 459 * a btree that's cached in the key cache, the btree iter code checks 460 * the key cache - but the key has to exist in the btree for that to 461 * work: 462 */ 463 if (path->cached && bkey_deleted(&i->old_k)) 464 return flush_new_cached_update(trans, path, i, flags, ip); 465 466 return 0; 467 } 468 469 static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, 470 struct btree_iter *iter, 471 struct btree_path *path) 472 { 473 if (!iter->key_cache_path || 474 !iter->key_cache_path->should_be_locked || 475 !bpos_eq(iter->key_cache_path->pos, iter->pos)) { 476 struct bkey_cached *ck; 477 int ret; 478 479 if (!iter->key_cache_path) 480 iter->key_cache_path = 481 bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 482 BTREE_ITER_INTENT| 483 BTREE_ITER_CACHED, _THIS_IP_); 484 485 iter->key_cache_path = 486 bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, 487 iter->flags & BTREE_ITER_INTENT, 488 _THIS_IP_); 489 490 ret = bch2_btree_path_traverse(trans, iter->key_cache_path, 491 BTREE_ITER_CACHED); 492 if (unlikely(ret)) 493 return ret; 494 495 ck = (void *) iter->key_cache_path->l[0].b; 496 497 if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { 498 trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); 499 return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); 500 } 501 502 btree_path_set_should_be_locked(iter->key_cache_path); 503 } 504 505 return 0; 506 } 507 508 int __must_check bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, 509 struct bkey_i *k, enum btree_update_flags flags) 510 { 511 struct btree_path *path = iter->update_path ?: iter->path; 512 int ret; 513 514 if (iter->flags & BTREE_ITER_IS_EXTENTS) 515 return bch2_trans_update_extent(trans, iter, k, flags); 516 517 if (bkey_deleted(&k->k) && 518 !(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && 519 (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { 520 ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); 521 if (unlikely(ret < 0)) 522 return ret; 523 524 if (ret) 525 k->k.type = KEY_TYPE_whiteout; 526 } 527 528 /* 529 * Ensure that updates to cached btrees go to the key cache: 530 */ 531 if (!(flags & BTREE_UPDATE_KEY_CACHE_RECLAIM) && 532 !path->cached && 533 !path->level && 534 btree_id_cached(trans->c, path->btree_id)) { 535 ret = bch2_trans_update_get_key_cache(trans, iter, path); 536 if (ret) 537 return ret; 538 539 path = iter->key_cache_path; 540 } 541 542 return bch2_trans_update_by_path(trans, path, k, flags, _RET_IP_); 543 } 544 545 /* 546 * Add a transaction update for a key that has already been journaled. 547 */ 548 int __must_check bch2_trans_update_seq(struct btree_trans *trans, u64 seq, 549 struct btree_iter *iter, struct bkey_i *k, 550 enum btree_update_flags flags) 551 { 552 trans->journal_res.seq = seq; 553 return bch2_trans_update(trans, iter, k, flags|BTREE_UPDATE_NOJOURNAL| 554 BTREE_UPDATE_PREJOURNAL); 555 } 556 557 int __must_check bch2_trans_update_buffered(struct btree_trans *trans, 558 enum btree_id btree, 559 struct bkey_i *k) 560 { 561 struct btree_write_buffered_key *i; 562 int ret; 563 564 EBUG_ON(trans->nr_wb_updates > trans->wb_updates_size); 565 EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); 566 567 trans_for_each_wb_update(trans, i) { 568 if (i->btree == btree && bpos_eq(i->k.k.p, k->k.p)) { 569 bkey_copy(&i->k, k); 570 return 0; 571 } 572 } 573 574 if (!trans->wb_updates || 575 trans->nr_wb_updates == trans->wb_updates_size) { 576 struct btree_write_buffered_key *u; 577 578 if (trans->nr_wb_updates == trans->wb_updates_size) { 579 struct btree_transaction_stats *s = btree_trans_stats(trans); 580 581 BUG_ON(trans->wb_updates_size > U8_MAX / 2); 582 trans->wb_updates_size = max(1, trans->wb_updates_size * 2); 583 if (s) 584 s->wb_updates_size = trans->wb_updates_size; 585 } 586 587 u = bch2_trans_kmalloc_nomemzero(trans, 588 trans->wb_updates_size * 589 sizeof(struct btree_write_buffered_key)); 590 ret = PTR_ERR_OR_ZERO(u); 591 if (ret) 592 return ret; 593 594 if (trans->nr_wb_updates) 595 memcpy(u, trans->wb_updates, trans->nr_wb_updates * 596 sizeof(struct btree_write_buffered_key)); 597 trans->wb_updates = u; 598 } 599 600 trans->wb_updates[trans->nr_wb_updates] = (struct btree_write_buffered_key) { 601 .btree = btree, 602 }; 603 604 bkey_copy(&trans->wb_updates[trans->nr_wb_updates].k, k); 605 trans->nr_wb_updates++; 606 607 return 0; 608 } 609 610 int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, 611 enum btree_id btree, struct bpos end) 612 { 613 struct bkey_s_c k; 614 int ret = 0; 615 616 bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_INTENT); 617 k = bch2_btree_iter_prev(iter); 618 ret = bkey_err(k); 619 if (ret) 620 goto err; 621 622 bch2_btree_iter_advance(iter); 623 k = bch2_btree_iter_peek_slot(iter); 624 ret = bkey_err(k); 625 if (ret) 626 goto err; 627 628 BUG_ON(k.k->type != KEY_TYPE_deleted); 629 630 if (bkey_gt(k.k->p, end)) { 631 ret = -BCH_ERR_ENOSPC_btree_slot; 632 goto err; 633 } 634 635 return 0; 636 err: 637 bch2_trans_iter_exit(trans, iter); 638 return ret; 639 } 640 641 void bch2_trans_commit_hook(struct btree_trans *trans, 642 struct btree_trans_commit_hook *h) 643 { 644 h->next = trans->hooks; 645 trans->hooks = h; 646 } 647 648 int bch2_btree_insert_nonextent(struct btree_trans *trans, 649 enum btree_id btree, struct bkey_i *k, 650 enum btree_update_flags flags) 651 { 652 struct btree_iter iter; 653 int ret; 654 655 bch2_trans_iter_init(trans, &iter, btree, k->k.p, 656 BTREE_ITER_CACHED| 657 BTREE_ITER_NOT_EXTENTS| 658 BTREE_ITER_INTENT); 659 ret = bch2_btree_iter_traverse(&iter) ?: 660 bch2_trans_update(trans, &iter, k, flags); 661 bch2_trans_iter_exit(trans, &iter); 662 return ret; 663 } 664 665 int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, 666 struct bkey_i *k, enum btree_update_flags flags) 667 { 668 struct btree_iter iter; 669 int ret; 670 671 bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), 672 BTREE_ITER_CACHED| 673 BTREE_ITER_INTENT); 674 ret = bch2_btree_iter_traverse(&iter) ?: 675 bch2_trans_update(trans, &iter, k, flags); 676 bch2_trans_iter_exit(trans, &iter); 677 return ret; 678 } 679 680 /** 681 * bch2_btree_insert - insert keys into the extent btree 682 * @c: pointer to struct bch_fs 683 * @id: btree to insert into 684 * @k: key to insert 685 * @disk_res: must be non-NULL whenever inserting or potentially 686 * splitting data extents 687 * @flags: transaction commit flags 688 * 689 * Returns: 0 on success, error code on failure 690 */ 691 int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, 692 struct disk_reservation *disk_res, int flags) 693 { 694 return bch2_trans_do(c, disk_res, NULL, flags, 695 bch2_btree_insert_trans(trans, id, k, 0)); 696 } 697 698 int bch2_btree_delete_extent_at(struct btree_trans *trans, struct btree_iter *iter, 699 unsigned len, unsigned update_flags) 700 { 701 struct bkey_i *k; 702 703 k = bch2_trans_kmalloc(trans, sizeof(*k)); 704 if (IS_ERR(k)) 705 return PTR_ERR(k); 706 707 bkey_init(&k->k); 708 k->k.p = iter->pos; 709 bch2_key_resize(&k->k, len); 710 return bch2_trans_update(trans, iter, k, update_flags); 711 } 712 713 int bch2_btree_delete_at(struct btree_trans *trans, 714 struct btree_iter *iter, unsigned update_flags) 715 { 716 return bch2_btree_delete_extent_at(trans, iter, 0, update_flags); 717 } 718 719 int bch2_btree_delete_at_buffered(struct btree_trans *trans, 720 enum btree_id btree, struct bpos pos) 721 { 722 struct bkey_i *k; 723 724 k = bch2_trans_kmalloc(trans, sizeof(*k)); 725 if (IS_ERR(k)) 726 return PTR_ERR(k); 727 728 bkey_init(&k->k); 729 k->k.p = pos; 730 return bch2_trans_update_buffered(trans, btree, k); 731 } 732 733 int bch2_btree_delete(struct btree_trans *trans, 734 enum btree_id btree, struct bpos pos, 735 unsigned update_flags) 736 { 737 struct btree_iter iter; 738 int ret; 739 740 bch2_trans_iter_init(trans, &iter, btree, pos, 741 BTREE_ITER_CACHED| 742 BTREE_ITER_INTENT); 743 ret = bch2_btree_iter_traverse(&iter) ?: 744 bch2_btree_delete_at(trans, &iter, update_flags); 745 bch2_trans_iter_exit(trans, &iter); 746 747 return ret; 748 } 749 750 int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, 751 struct bpos start, struct bpos end, 752 unsigned update_flags, 753 u64 *journal_seq) 754 { 755 u32 restart_count = trans->restart_count; 756 struct btree_iter iter; 757 struct bkey_s_c k; 758 int ret = 0; 759 760 bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT); 761 while ((k = bch2_btree_iter_peek_upto(&iter, end)).k) { 762 struct disk_reservation disk_res = 763 bch2_disk_reservation_init(trans->c, 0); 764 struct bkey_i delete; 765 766 ret = bkey_err(k); 767 if (ret) 768 goto err; 769 770 bkey_init(&delete.k); 771 772 /* 773 * This could probably be more efficient for extents: 774 */ 775 776 /* 777 * For extents, iter.pos won't necessarily be the same as 778 * bkey_start_pos(k.k) (for non extents they always will be the 779 * same). It's important that we delete starting from iter.pos 780 * because the range we want to delete could start in the middle 781 * of k. 782 * 783 * (bch2_btree_iter_peek() does guarantee that iter.pos >= 784 * bkey_start_pos(k.k)). 785 */ 786 delete.k.p = iter.pos; 787 788 if (iter.flags & BTREE_ITER_IS_EXTENTS) 789 bch2_key_resize(&delete.k, 790 bpos_min(end, k.k->p).offset - 791 iter.pos.offset); 792 793 ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: 794 bch2_trans_commit(trans, &disk_res, journal_seq, 795 BTREE_INSERT_NOFAIL); 796 bch2_disk_reservation_put(trans->c, &disk_res); 797 err: 798 /* 799 * the bch2_trans_begin() call is in a weird place because we 800 * need to call it after every transaction commit, to avoid path 801 * overflow, but don't want to call it if the delete operation 802 * is a no-op and we have no work to do: 803 */ 804 bch2_trans_begin(trans); 805 806 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 807 ret = 0; 808 if (ret) 809 break; 810 } 811 bch2_trans_iter_exit(trans, &iter); 812 813 return ret ?: trans_was_restarted(trans, restart_count); 814 } 815 816 /* 817 * bch_btree_delete_range - delete everything within a given range 818 * 819 * Range is a half open interval - [start, end) 820 */ 821 int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, 822 struct bpos start, struct bpos end, 823 unsigned update_flags, 824 u64 *journal_seq) 825 { 826 int ret = bch2_trans_run(c, 827 bch2_btree_delete_range_trans(trans, id, start, end, 828 update_flags, journal_seq)); 829 if (ret == -BCH_ERR_transaction_restart_nested) 830 ret = 0; 831 return ret; 832 } 833 834 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, 835 struct bpos pos, bool set) 836 { 837 struct bkey_i *k; 838 int ret = 0; 839 840 k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); 841 ret = PTR_ERR_OR_ZERO(k); 842 if (unlikely(ret)) 843 return ret; 844 845 bkey_init(&k->k); 846 k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; 847 k->k.p = pos; 848 849 return bch2_trans_update_buffered(trans, btree, k); 850 } 851 852 __printf(2, 0) 853 static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) 854 { 855 struct printbuf buf = PRINTBUF; 856 struct jset_entry_log *l; 857 unsigned u64s; 858 int ret; 859 860 prt_vprintf(&buf, fmt, args); 861 ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; 862 if (ret) 863 goto err; 864 865 u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); 866 867 ret = darray_make_room(entries, jset_u64s(u64s)); 868 if (ret) 869 goto err; 870 871 l = (void *) &darray_top(*entries); 872 l->entry.u64s = cpu_to_le16(u64s); 873 l->entry.btree_id = 0; 874 l->entry.level = 1; 875 l->entry.type = BCH_JSET_ENTRY_log; 876 l->entry.pad[0] = 0; 877 l->entry.pad[1] = 0; 878 l->entry.pad[2] = 0; 879 memcpy(l->d, buf.buf, buf.pos); 880 while (buf.pos & 7) 881 l->d[buf.pos++] = '\0'; 882 883 entries->nr += jset_u64s(u64s); 884 err: 885 printbuf_exit(&buf); 886 return ret; 887 } 888 889 __printf(3, 0) 890 static int 891 __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, 892 va_list args) 893 { 894 int ret; 895 896 if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { 897 ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); 898 } else { 899 ret = bch2_trans_do(c, NULL, NULL, 900 BTREE_INSERT_LAZY_RW|commit_flags, 901 __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args)); 902 } 903 904 return ret; 905 } 906 907 __printf(2, 3) 908 int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) 909 { 910 va_list args; 911 int ret; 912 913 va_start(args, fmt); 914 ret = __bch2_fs_log_msg(c, 0, fmt, args); 915 va_end(args); 916 return ret; 917 } 918 919 /* 920 * Use for logging messages during recovery to enable reserved space and avoid 921 * blocking. 922 */ 923 __printf(2, 3) 924 int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) 925 { 926 va_list args; 927 int ret; 928 929 va_start(args, fmt); 930 ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args); 931 va_end(args); 932 return ret; 933 } 934