1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_update.h" 5 #include "btree_iter.h" 6 #include "btree_journal_iter.h" 7 #include "btree_locking.h" 8 #include "buckets.h" 9 #include "debug.h" 10 #include "errcode.h" 11 #include "error.h" 12 #include "extents.h" 13 #include "keylist.h" 14 #include "snapshot.h" 15 #include "trace.h" 16 17 #include <linux/string_helpers.h> 18 19 static inline int btree_insert_entry_cmp(const struct btree_insert_entry *l, 20 const struct btree_insert_entry *r) 21 { 22 return cmp_int(l->sort_order, r->sort_order) ?: 23 cmp_int(l->cached, r->cached) ?: 24 -cmp_int(l->level, r->level) ?: 25 bpos_cmp(l->k->k.p, r->k->k.p); 26 } 27 28 static int __must_check 29 bch2_trans_update_by_path(struct btree_trans *, btree_path_idx_t, 30 struct bkey_i *, enum btree_iter_update_trigger_flags, 31 unsigned long ip); 32 33 static noinline int extent_front_merge(struct btree_trans *trans, 34 struct btree_iter *iter, 35 struct bkey_s_c k, 36 struct bkey_i **insert, 37 enum btree_iter_update_trigger_flags flags) 38 { 39 struct bch_fs *c = trans->c; 40 struct bkey_i *update; 41 int ret; 42 43 if (unlikely(trans->journal_replay_not_finished)) 44 return 0; 45 46 update = bch2_bkey_make_mut_noupdate(trans, k); 47 ret = PTR_ERR_OR_ZERO(update); 48 if (ret) 49 return ret; 50 51 if (!bch2_bkey_merge(c, bkey_i_to_s(update), bkey_i_to_s_c(*insert))) 52 return 0; 53 54 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p) ?: 55 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, (*insert)->k.p); 56 if (ret < 0) 57 return ret; 58 if (ret) 59 return 0; 60 61 ret = bch2_btree_delete_at(trans, iter, flags); 62 if (ret) 63 return ret; 64 65 *insert = update; 66 return 0; 67 } 68 69 static noinline int extent_back_merge(struct btree_trans *trans, 70 struct btree_iter *iter, 71 struct bkey_i *insert, 72 struct bkey_s_c k) 73 { 74 struct bch_fs *c = trans->c; 75 int ret; 76 77 if (unlikely(trans->journal_replay_not_finished)) 78 return 0; 79 80 ret = bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?: 81 bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p); 82 if (ret < 0) 83 return ret; 84 if (ret) 85 return 0; 86 87 bch2_bkey_merge(c, bkey_i_to_s(insert), k); 88 return 0; 89 } 90 91 /* 92 * When deleting, check if we need to emit a whiteout (because we're overwriting 93 * something in an ancestor snapshot) 94 */ 95 static int need_whiteout_for_snapshot(struct btree_trans *trans, 96 enum btree_id btree_id, struct bpos pos) 97 { 98 struct btree_iter iter; 99 struct bkey_s_c k; 100 u32 snapshot = pos.snapshot; 101 int ret; 102 103 if (!bch2_snapshot_parent(trans->c, pos.snapshot)) 104 return 0; 105 106 pos.snapshot++; 107 108 for_each_btree_key_norestart(trans, iter, btree_id, pos, 109 BTREE_ITER_all_snapshots| 110 BTREE_ITER_nopreserve, k, ret) { 111 if (!bkey_eq(k.k->p, pos)) 112 break; 113 114 if (bch2_snapshot_is_ancestor(trans->c, snapshot, 115 k.k->p.snapshot)) { 116 ret = !bkey_whiteout(k.k); 117 break; 118 } 119 } 120 bch2_trans_iter_exit(trans, &iter); 121 122 return ret; 123 } 124 125 int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans, 126 enum btree_id btree, struct bpos pos, 127 snapshot_id_list *s) 128 { 129 int ret = 0; 130 131 darray_for_each(*s, id) { 132 pos.snapshot = *id; 133 134 struct btree_iter iter; 135 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, pos, 136 BTREE_ITER_not_extents| 137 BTREE_ITER_intent); 138 ret = bkey_err(k); 139 if (ret) 140 break; 141 142 if (k.k->type == KEY_TYPE_deleted) { 143 struct bkey_i *update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i)); 144 ret = PTR_ERR_OR_ZERO(update); 145 if (ret) { 146 bch2_trans_iter_exit(trans, &iter); 147 break; 148 } 149 150 bkey_init(&update->k); 151 update->k.p = pos; 152 update->k.type = KEY_TYPE_whiteout; 153 154 ret = bch2_trans_update(trans, &iter, update, 155 BTREE_UPDATE_internal_snapshot_node); 156 } 157 bch2_trans_iter_exit(trans, &iter); 158 159 if (ret) 160 break; 161 } 162 163 darray_exit(s); 164 return ret; 165 } 166 167 int bch2_trans_update_extent_overwrite(struct btree_trans *trans, 168 struct btree_iter *iter, 169 enum btree_iter_update_trigger_flags flags, 170 struct bkey_s_c old, 171 struct bkey_s_c new) 172 { 173 enum btree_id btree_id = iter->btree_id; 174 struct bkey_i *update; 175 struct bpos new_start = bkey_start_pos(new.k); 176 unsigned front_split = bkey_lt(bkey_start_pos(old.k), new_start); 177 unsigned back_split = bkey_gt(old.k->p, new.k->p); 178 unsigned middle_split = (front_split || back_split) && 179 old.k->p.snapshot != new.k->p.snapshot; 180 unsigned nr_splits = front_split + back_split + middle_split; 181 int ret = 0, compressed_sectors; 182 183 /* 184 * If we're going to be splitting a compressed extent, note it 185 * so that __bch2_trans_commit() can increase our disk 186 * reservation: 187 */ 188 if (nr_splits > 1 && 189 (compressed_sectors = bch2_bkey_sectors_compressed(old))) 190 trans->extra_disk_res += compressed_sectors * (nr_splits - 1); 191 192 if (front_split) { 193 update = bch2_bkey_make_mut_noupdate(trans, old); 194 if ((ret = PTR_ERR_OR_ZERO(update))) 195 return ret; 196 197 bch2_cut_back(new_start, update); 198 199 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 200 old.k->p, update->k.p) ?: 201 bch2_btree_insert_nonextent(trans, btree_id, update, 202 BTREE_UPDATE_internal_snapshot_node|flags); 203 if (ret) 204 return ret; 205 } 206 207 /* If we're overwriting in a different snapshot - middle split: */ 208 if (middle_split) { 209 update = bch2_bkey_make_mut_noupdate(trans, old); 210 if ((ret = PTR_ERR_OR_ZERO(update))) 211 return ret; 212 213 bch2_cut_front(new_start, update); 214 bch2_cut_back(new.k->p, update); 215 216 ret = bch2_insert_snapshot_whiteouts(trans, btree_id, 217 old.k->p, update->k.p) ?: 218 bch2_btree_insert_nonextent(trans, btree_id, update, 219 BTREE_UPDATE_internal_snapshot_node|flags); 220 if (ret) 221 return ret; 222 } 223 224 if (bkey_le(old.k->p, new.k->p)) { 225 update = bch2_trans_kmalloc(trans, sizeof(*update)); 226 if ((ret = PTR_ERR_OR_ZERO(update))) 227 return ret; 228 229 bkey_init(&update->k); 230 update->k.p = old.k->p; 231 update->k.p.snapshot = new.k->p.snapshot; 232 233 if (new.k->p.snapshot != old.k->p.snapshot) { 234 update->k.type = KEY_TYPE_whiteout; 235 } else if (btree_type_has_snapshots(btree_id)) { 236 ret = need_whiteout_for_snapshot(trans, btree_id, update->k.p); 237 if (ret < 0) 238 return ret; 239 if (ret) 240 update->k.type = KEY_TYPE_whiteout; 241 } 242 243 ret = bch2_btree_insert_nonextent(trans, btree_id, update, 244 BTREE_UPDATE_internal_snapshot_node|flags); 245 if (ret) 246 return ret; 247 } 248 249 if (back_split) { 250 update = bch2_bkey_make_mut_noupdate(trans, old); 251 if ((ret = PTR_ERR_OR_ZERO(update))) 252 return ret; 253 254 bch2_cut_front(new.k->p, update); 255 256 ret = bch2_trans_update_by_path(trans, iter->path, update, 257 BTREE_UPDATE_internal_snapshot_node| 258 flags, _RET_IP_); 259 if (ret) 260 return ret; 261 } 262 263 return 0; 264 } 265 266 static int bch2_trans_update_extent(struct btree_trans *trans, 267 struct btree_iter *orig_iter, 268 struct bkey_i *insert, 269 enum btree_iter_update_trigger_flags flags) 270 { 271 struct btree_iter iter; 272 struct bkey_s_c k; 273 enum btree_id btree_id = orig_iter->btree_id; 274 int ret = 0; 275 276 bch2_trans_iter_init(trans, &iter, btree_id, bkey_start_pos(&insert->k), 277 BTREE_ITER_intent| 278 BTREE_ITER_with_updates| 279 BTREE_ITER_not_extents); 280 k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); 281 if ((ret = bkey_err(k))) 282 goto err; 283 if (!k.k) 284 goto out; 285 286 if (bkey_eq(k.k->p, bkey_start_pos(&insert->k))) { 287 if (bch2_bkey_maybe_mergable(k.k, &insert->k)) { 288 ret = extent_front_merge(trans, &iter, k, &insert, flags); 289 if (ret) 290 goto err; 291 } 292 293 goto next; 294 } 295 296 while (bkey_gt(insert->k.p, bkey_start_pos(k.k))) { 297 bool done = bkey_lt(insert->k.p, k.k->p); 298 299 ret = bch2_trans_update_extent_overwrite(trans, &iter, flags, k, bkey_i_to_s_c(insert)); 300 if (ret) 301 goto err; 302 303 if (done) 304 goto out; 305 next: 306 bch2_btree_iter_advance(trans, &iter); 307 k = bch2_btree_iter_peek_max(trans, &iter, POS(insert->k.p.inode, U64_MAX)); 308 if ((ret = bkey_err(k))) 309 goto err; 310 if (!k.k) 311 goto out; 312 } 313 314 if (bch2_bkey_maybe_mergable(&insert->k, k.k)) { 315 ret = extent_back_merge(trans, &iter, insert, k); 316 if (ret) 317 goto err; 318 } 319 out: 320 if (!bkey_deleted(&insert->k)) 321 ret = bch2_btree_insert_nonextent(trans, btree_id, insert, flags); 322 err: 323 bch2_trans_iter_exit(trans, &iter); 324 325 return ret; 326 } 327 328 static noinline int flush_new_cached_update(struct btree_trans *trans, 329 struct btree_insert_entry *i, 330 enum btree_iter_update_trigger_flags flags, 331 unsigned long ip) 332 { 333 struct bkey k; 334 int ret; 335 336 btree_path_idx_t path_idx = 337 bch2_path_get(trans, i->btree_id, i->old_k.p, 1, 0, 338 BTREE_ITER_intent, _THIS_IP_); 339 ret = bch2_btree_path_traverse(trans, path_idx, 0); 340 if (ret) 341 goto out; 342 343 struct btree_path *btree_path = trans->paths + path_idx; 344 345 /* 346 * The old key in the insert entry might actually refer to an existing 347 * key in the btree that has been deleted from cache and not yet 348 * flushed. Check for this and skip the flush so we don't run triggers 349 * against a stale key. 350 */ 351 bch2_btree_path_peek_slot_exact(btree_path, &k); 352 if (!bkey_deleted(&k)) 353 goto out; 354 355 i->key_cache_already_flushed = true; 356 i->flags |= BTREE_TRIGGER_norun; 357 358 btree_path_set_should_be_locked(trans, btree_path); 359 ret = bch2_trans_update_by_path(trans, path_idx, i->k, flags, ip); 360 out: 361 bch2_path_put(trans, path_idx, true); 362 return ret; 363 } 364 365 static int __must_check 366 bch2_trans_update_by_path(struct btree_trans *trans, btree_path_idx_t path_idx, 367 struct bkey_i *k, enum btree_iter_update_trigger_flags flags, 368 unsigned long ip) 369 { 370 struct bch_fs *c = trans->c; 371 struct btree_insert_entry *i, n; 372 int cmp; 373 374 struct btree_path *path = trans->paths + path_idx; 375 EBUG_ON(!path->should_be_locked); 376 EBUG_ON(trans->nr_updates >= trans->nr_paths); 377 EBUG_ON(!bpos_eq(k->k.p, path->pos)); 378 379 n = (struct btree_insert_entry) { 380 .flags = flags, 381 .sort_order = btree_trigger_order(path->btree_id), 382 .bkey_type = __btree_node_type(path->level, path->btree_id), 383 .btree_id = path->btree_id, 384 .level = path->level, 385 .cached = path->cached, 386 .path = path_idx, 387 .k = k, 388 .ip_allocated = ip, 389 }; 390 391 #ifdef CONFIG_BCACHEFS_DEBUG 392 trans_for_each_update(trans, i) 393 BUG_ON(i != trans->updates && 394 btree_insert_entry_cmp(i - 1, i) >= 0); 395 #endif 396 397 /* 398 * Pending updates are kept sorted: first, find position of new update, 399 * then delete/trim any updates the new update overwrites: 400 */ 401 for (i = trans->updates; i < trans->updates + trans->nr_updates; i++) { 402 cmp = btree_insert_entry_cmp(&n, i); 403 if (cmp <= 0) 404 break; 405 } 406 407 bool overwrite = !cmp && i < trans->updates + trans->nr_updates; 408 409 if (overwrite) { 410 EBUG_ON(i->insert_trigger_run || i->overwrite_trigger_run); 411 412 bch2_path_put(trans, i->path, true); 413 i->flags = n.flags; 414 i->cached = n.cached; 415 i->k = n.k; 416 i->path = n.path; 417 i->ip_allocated = n.ip_allocated; 418 } else { 419 array_insert_item(trans->updates, trans->nr_updates, 420 i - trans->updates, n); 421 422 i->old_v = bch2_btree_path_peek_slot_exact(path, &i->old_k).v; 423 i->old_btree_u64s = !bkey_deleted(&i->old_k) ? i->old_k.u64s : 0; 424 425 if (unlikely(trans->journal_replay_not_finished)) { 426 struct bkey_i *j_k = 427 bch2_journal_keys_peek_slot(c, n.btree_id, n.level, k->k.p); 428 429 if (j_k) { 430 i->old_k = j_k->k; 431 i->old_v = &j_k->v; 432 } 433 } 434 } 435 436 __btree_path_get(trans, trans->paths + i->path, true); 437 438 trace_update_by_path(trans, path, i, overwrite); 439 440 /* 441 * If a key is present in the key cache, it must also exist in the 442 * btree - this is necessary for cache coherency. When iterating over 443 * a btree that's cached in the key cache, the btree iter code checks 444 * the key cache - but the key has to exist in the btree for that to 445 * work: 446 */ 447 if (path->cached && !i->old_btree_u64s) 448 return flush_new_cached_update(trans, i, flags, ip); 449 450 return 0; 451 } 452 453 static noinline int bch2_trans_update_get_key_cache(struct btree_trans *trans, 454 struct btree_iter *iter, 455 struct btree_path *path) 456 { 457 struct btree_path *key_cache_path = btree_iter_key_cache_path(trans, iter); 458 459 if (!key_cache_path || 460 !key_cache_path->should_be_locked || 461 !bpos_eq(key_cache_path->pos, iter->pos)) { 462 struct bkey_cached *ck; 463 int ret; 464 465 if (!iter->key_cache_path) 466 iter->key_cache_path = 467 bch2_path_get(trans, path->btree_id, path->pos, 1, 0, 468 BTREE_ITER_intent| 469 BTREE_ITER_cached, _THIS_IP_); 470 471 iter->key_cache_path = 472 bch2_btree_path_set_pos(trans, iter->key_cache_path, path->pos, 473 iter->flags & BTREE_ITER_intent, 474 _THIS_IP_); 475 476 ret = bch2_btree_path_traverse(trans, iter->key_cache_path, BTREE_ITER_cached); 477 if (unlikely(ret)) 478 return ret; 479 480 ck = (void *) trans->paths[iter->key_cache_path].l[0].b; 481 482 if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { 483 trace_and_count(trans->c, trans_restart_key_cache_raced, trans, _RET_IP_); 484 return btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_raced); 485 } 486 487 btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path); 488 } 489 490 return 0; 491 } 492 493 int __must_check bch2_trans_update_ip(struct btree_trans *trans, struct btree_iter *iter, 494 struct bkey_i *k, enum btree_iter_update_trigger_flags flags, 495 unsigned long ip) 496 { 497 kmsan_check_memory(k, bkey_bytes(&k->k)); 498 499 btree_path_idx_t path_idx = iter->update_path ?: iter->path; 500 int ret; 501 502 if (iter->flags & BTREE_ITER_is_extents) 503 return bch2_trans_update_extent(trans, iter, k, flags); 504 505 if (bkey_deleted(&k->k) && 506 !(flags & BTREE_UPDATE_key_cache_reclaim) && 507 (iter->flags & BTREE_ITER_filter_snapshots)) { 508 ret = need_whiteout_for_snapshot(trans, iter->btree_id, k->k.p); 509 if (unlikely(ret < 0)) 510 return ret; 511 512 if (ret) 513 k->k.type = KEY_TYPE_whiteout; 514 } 515 516 /* 517 * Ensure that updates to cached btrees go to the key cache: 518 */ 519 struct btree_path *path = trans->paths + path_idx; 520 if (!(flags & BTREE_UPDATE_key_cache_reclaim) && 521 !path->cached && 522 !path->level && 523 btree_id_cached(trans->c, path->btree_id)) { 524 ret = bch2_trans_update_get_key_cache(trans, iter, path); 525 if (ret) 526 return ret; 527 528 path_idx = iter->key_cache_path; 529 } 530 531 return bch2_trans_update_by_path(trans, path_idx, k, flags, ip); 532 } 533 534 int bch2_btree_insert_clone_trans(struct btree_trans *trans, 535 enum btree_id btree, 536 struct bkey_i *k) 537 { 538 struct bkey_i *n = bch2_trans_kmalloc(trans, bkey_bytes(&k->k)); 539 int ret = PTR_ERR_OR_ZERO(n); 540 if (ret) 541 return ret; 542 543 bkey_copy(n, k); 544 return bch2_btree_insert_trans(trans, btree, n, 0); 545 } 546 547 void *__bch2_trans_subbuf_alloc(struct btree_trans *trans, 548 struct btree_trans_subbuf *buf, 549 unsigned u64s) 550 { 551 unsigned new_top = buf->u64s + u64s; 552 unsigned old_size = buf->size; 553 554 if (new_top > buf->size) 555 buf->size = roundup_pow_of_two(new_top); 556 557 void *n = bch2_trans_kmalloc_nomemzero(trans, buf->size * sizeof(u64)); 558 if (IS_ERR(n)) 559 return n; 560 561 if (buf->u64s) 562 memcpy(n, 563 btree_trans_subbuf_base(trans, buf), 564 old_size * sizeof(u64)); 565 buf->base = (u64 *) n - (u64 *) trans->mem; 566 567 void *p = btree_trans_subbuf_top(trans, buf); 568 buf->u64s = new_top; 569 return p; 570 } 571 572 int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter, 573 enum btree_id btree, struct bpos end) 574 { 575 bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent); 576 struct bkey_s_c k = bch2_btree_iter_peek_prev(trans, iter); 577 int ret = bkey_err(k); 578 if (ret) 579 goto err; 580 581 bch2_btree_iter_advance(trans, iter); 582 k = bch2_btree_iter_peek_slot(trans, iter); 583 ret = bkey_err(k); 584 if (ret) 585 goto err; 586 587 BUG_ON(k.k->type != KEY_TYPE_deleted); 588 589 if (bkey_gt(k.k->p, end)) { 590 ret = bch_err_throw(trans->c, ENOSPC_btree_slot); 591 goto err; 592 } 593 594 return 0; 595 err: 596 bch2_trans_iter_exit(trans, iter); 597 return ret; 598 } 599 600 void bch2_trans_commit_hook(struct btree_trans *trans, 601 struct btree_trans_commit_hook *h) 602 { 603 h->next = trans->hooks; 604 trans->hooks = h; 605 } 606 607 int bch2_btree_insert_nonextent(struct btree_trans *trans, 608 enum btree_id btree, struct bkey_i *k, 609 enum btree_iter_update_trigger_flags flags) 610 { 611 struct btree_iter iter; 612 int ret; 613 614 bch2_trans_iter_init(trans, &iter, btree, k->k.p, 615 BTREE_ITER_cached| 616 BTREE_ITER_not_extents| 617 BTREE_ITER_intent); 618 ret = bch2_btree_iter_traverse(trans, &iter) ?: 619 bch2_trans_update(trans, &iter, k, flags); 620 bch2_trans_iter_exit(trans, &iter); 621 return ret; 622 } 623 624 int bch2_btree_insert_trans(struct btree_trans *trans, enum btree_id id, 625 struct bkey_i *k, enum btree_iter_update_trigger_flags flags) 626 { 627 struct btree_iter iter; 628 bch2_trans_iter_init(trans, &iter, id, bkey_start_pos(&k->k), 629 BTREE_ITER_intent|flags); 630 int ret = bch2_btree_iter_traverse(trans, &iter) ?: 631 bch2_trans_update(trans, &iter, k, flags); 632 bch2_trans_iter_exit(trans, &iter); 633 return ret; 634 } 635 636 /** 637 * bch2_btree_insert - insert keys into the extent btree 638 * @c: pointer to struct bch_fs 639 * @id: btree to insert into 640 * @k: key to insert 641 * @disk_res: must be non-NULL whenever inserting or potentially 642 * splitting data extents 643 * @flags: transaction commit flags 644 * @iter_flags: btree iter update trigger flags 645 * 646 * Returns: 0 on success, error code on failure 647 */ 648 int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k, 649 struct disk_reservation *disk_res, int flags, 650 enum btree_iter_update_trigger_flags iter_flags) 651 { 652 return bch2_trans_commit_do(c, disk_res, NULL, flags, 653 bch2_btree_insert_trans(trans, id, k, iter_flags)); 654 } 655 656 int bch2_btree_delete_at(struct btree_trans *trans, 657 struct btree_iter *iter, unsigned update_flags) 658 { 659 struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); 660 int ret = PTR_ERR_OR_ZERO(k); 661 if (ret) 662 return ret; 663 664 bkey_init(&k->k); 665 k->k.p = iter->pos; 666 return bch2_trans_update(trans, iter, k, update_flags); 667 } 668 669 int bch2_btree_delete(struct btree_trans *trans, 670 enum btree_id btree, struct bpos pos, 671 unsigned update_flags) 672 { 673 struct btree_iter iter; 674 int ret; 675 676 bch2_trans_iter_init(trans, &iter, btree, pos, 677 BTREE_ITER_cached| 678 BTREE_ITER_intent); 679 ret = bch2_btree_iter_traverse(trans, &iter) ?: 680 bch2_btree_delete_at(trans, &iter, update_flags); 681 bch2_trans_iter_exit(trans, &iter); 682 683 return ret; 684 } 685 686 int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id, 687 struct bpos start, struct bpos end, 688 unsigned update_flags, 689 u64 *journal_seq) 690 { 691 u32 restart_count = trans->restart_count; 692 struct btree_iter iter; 693 struct bkey_s_c k; 694 int ret = 0; 695 696 bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_intent); 697 while ((k = bch2_btree_iter_peek_max(trans, &iter, end)).k) { 698 struct disk_reservation disk_res = 699 bch2_disk_reservation_init(trans->c, 0); 700 struct bkey_i delete; 701 702 ret = bkey_err(k); 703 if (ret) 704 goto err; 705 706 bkey_init(&delete.k); 707 708 /* 709 * This could probably be more efficient for extents: 710 */ 711 712 /* 713 * For extents, iter.pos won't necessarily be the same as 714 * bkey_start_pos(k.k) (for non extents they always will be the 715 * same). It's important that we delete starting from iter.pos 716 * because the range we want to delete could start in the middle 717 * of k. 718 * 719 * (bch2_btree_iter_peek() does guarantee that iter.pos >= 720 * bkey_start_pos(k.k)). 721 */ 722 delete.k.p = iter.pos; 723 724 if (iter.flags & BTREE_ITER_is_extents) 725 bch2_key_resize(&delete.k, 726 bpos_min(end, k.k->p).offset - 727 iter.pos.offset); 728 729 ret = bch2_trans_update(trans, &iter, &delete, update_flags) ?: 730 bch2_trans_commit(trans, &disk_res, journal_seq, 731 BCH_TRANS_COMMIT_no_enospc); 732 bch2_disk_reservation_put(trans->c, &disk_res); 733 err: 734 /* 735 * the bch2_trans_begin() call is in a weird place because we 736 * need to call it after every transaction commit, to avoid path 737 * overflow, but don't want to call it if the delete operation 738 * is a no-op and we have no work to do: 739 */ 740 bch2_trans_begin(trans); 741 742 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 743 ret = 0; 744 if (ret) 745 break; 746 } 747 bch2_trans_iter_exit(trans, &iter); 748 749 return ret ?: trans_was_restarted(trans, restart_count); 750 } 751 752 /* 753 * bch_btree_delete_range - delete everything within a given range 754 * 755 * Range is a half open interval - [start, end) 756 */ 757 int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, 758 struct bpos start, struct bpos end, 759 unsigned update_flags, 760 u64 *journal_seq) 761 { 762 int ret = bch2_trans_run(c, 763 bch2_btree_delete_range_trans(trans, id, start, end, 764 update_flags, journal_seq)); 765 if (ret == -BCH_ERR_transaction_restart_nested) 766 ret = 0; 767 return ret; 768 } 769 770 int bch2_btree_bit_mod_iter(struct btree_trans *trans, struct btree_iter *iter, bool set) 771 { 772 struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); 773 int ret = PTR_ERR_OR_ZERO(k); 774 if (ret) 775 return ret; 776 777 bkey_init(&k->k); 778 k->k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; 779 k->k.p = iter->pos; 780 if (iter->flags & BTREE_ITER_is_extents) 781 bch2_key_resize(&k->k, 1); 782 783 return bch2_trans_update(trans, iter, k, 0); 784 } 785 786 int bch2_btree_bit_mod(struct btree_trans *trans, enum btree_id btree, 787 struct bpos pos, bool set) 788 { 789 struct btree_iter iter; 790 bch2_trans_iter_init(trans, &iter, btree, pos, BTREE_ITER_intent); 791 792 int ret = bch2_btree_iter_traverse(trans, &iter) ?: 793 bch2_btree_bit_mod_iter(trans, &iter, set); 794 bch2_trans_iter_exit(trans, &iter); 795 return ret; 796 } 797 798 int bch2_btree_bit_mod_buffered(struct btree_trans *trans, enum btree_id btree, 799 struct bpos pos, bool set) 800 { 801 struct bkey_i k; 802 803 bkey_init(&k.k); 804 k.k.type = set ? KEY_TYPE_set : KEY_TYPE_deleted; 805 k.k.p = pos; 806 807 return bch2_trans_update_buffered(trans, btree, &k); 808 } 809 810 static int __bch2_trans_log_str(struct btree_trans *trans, const char *str, unsigned len) 811 { 812 unsigned u64s = DIV_ROUND_UP(len, sizeof(u64)); 813 814 struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(u64s)); 815 int ret = PTR_ERR_OR_ZERO(e); 816 if (ret) 817 return ret; 818 819 struct jset_entry_log *l = container_of(e, struct jset_entry_log, entry); 820 journal_entry_init(e, BCH_JSET_ENTRY_log, 0, 1, u64s); 821 memcpy_and_pad(l->d, u64s * sizeof(u64), str, len, 0); 822 return 0; 823 } 824 825 int bch2_trans_log_str(struct btree_trans *trans, const char *str) 826 { 827 return __bch2_trans_log_str(trans, str, strlen(str)); 828 } 829 830 int bch2_trans_log_msg(struct btree_trans *trans, struct printbuf *buf) 831 { 832 int ret = buf->allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; 833 if (ret) 834 return ret; 835 836 return __bch2_trans_log_str(trans, buf->buf, buf->pos); 837 } 838 839 int bch2_trans_log_bkey(struct btree_trans *trans, enum btree_id btree, 840 unsigned level, struct bkey_i *k) 841 { 842 struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); 843 int ret = PTR_ERR_OR_ZERO(e); 844 if (ret) 845 return ret; 846 847 journal_entry_init(e, BCH_JSET_ENTRY_log_bkey, btree, level, k->k.u64s); 848 bkey_copy(e->start, k); 849 return 0; 850 } 851 852 __printf(3, 0) 853 static int 854 __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt, 855 va_list args) 856 { 857 struct printbuf buf = PRINTBUF; 858 prt_vprintf(&buf, fmt, args); 859 860 unsigned u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); 861 862 int ret = buf.allocation_failure ? -BCH_ERR_ENOMEM_trans_log_msg : 0; 863 if (ret) 864 goto err; 865 866 if (!test_bit(JOURNAL_running, &c->journal.flags)) { 867 ret = darray_make_room(&c->journal.early_journal_entries, jset_u64s(u64s)); 868 if (ret) 869 goto err; 870 871 struct jset_entry_log *l = (void *) &darray_top(c->journal.early_journal_entries); 872 journal_entry_init(&l->entry, BCH_JSET_ENTRY_log, 0, 1, u64s); 873 memcpy_and_pad(l->d, u64s * sizeof(u64), buf.buf, buf.pos, 0); 874 c->journal.early_journal_entries.nr += jset_u64s(u64s); 875 } else { 876 ret = bch2_trans_commit_do(c, NULL, NULL, commit_flags, 877 bch2_trans_log_msg(trans, &buf)); 878 } 879 err: 880 printbuf_exit(&buf); 881 return ret; 882 } 883 884 __printf(2, 3) 885 int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) 886 { 887 va_list args; 888 int ret; 889 890 va_start(args, fmt); 891 ret = __bch2_fs_log_msg(c, 0, fmt, args); 892 va_end(args); 893 return ret; 894 } 895 896 /* 897 * Use for logging messages during recovery to enable reserved space and avoid 898 * blocking. 899 */ 900 __printf(2, 3) 901 int bch2_journal_log_msg(struct bch_fs *c, const char *fmt, ...) 902 { 903 va_list args; 904 int ret; 905 906 va_start(args, fmt); 907 ret = __bch2_fs_log_msg(c, BCH_WATERMARK_reclaim, fmt, args); 908 va_end(args); 909 return ret; 910 } 911