1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef _BCACHEFS_BTREE_UPDATE_H 3 #define _BCACHEFS_BTREE_UPDATE_H 4 5 #include "btree_iter.h" 6 #include "journal.h" 7 #include "snapshot.h" 8 9 struct bch_fs; 10 struct btree; 11 12 void bch2_btree_node_prep_for_write(struct btree_trans *, 13 struct btree_path *, struct btree *); 14 bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *, 15 struct btree *, struct btree_node_iter *, 16 struct bkey_i *); 17 18 int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64); 19 int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64); 20 void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); 21 22 void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *, 23 struct bkey_i *, u64); 24 25 #define BCH_TRANS_COMMIT_FLAGS() \ 26 x(no_enospc, "don't check for enospc") \ 27 x(no_check_rw, "don't attempt to take a ref on c->writes") \ 28 x(no_journal_res, "don't take a journal reservation, instead " \ 29 "pin journal entry referred to by trans->journal_res.seq") \ 30 x(journal_reclaim, "operation required for journal reclaim; may return error" \ 31 "instead of deadlocking if BCH_WATERMARK_reclaim not specified")\ 32 x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied") 33 34 enum __bch_trans_commit_flags { 35 /* First bits for bch_watermark: */ 36 __BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS, 37 #define x(n, ...) __BCH_TRANS_COMMIT_##n, 38 BCH_TRANS_COMMIT_FLAGS() 39 #undef x 40 }; 41 42 enum bch_trans_commit_flags { 43 #define x(n, ...) BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n), 44 BCH_TRANS_COMMIT_FLAGS() 45 #undef x 46 }; 47 48 void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags); 49 50 int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned); 51 int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned); 52 53 int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id, 54 struct bkey_i *, enum btree_iter_update_trigger_flags); 55 56 int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *, 57 enum btree_iter_update_trigger_flags); 58 int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct 59 disk_reservation *, int flags, enum 60 btree_iter_update_trigger_flags iter_flags); 61 62 int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id, 63 struct bpos, struct bpos, unsigned, u64 *); 64 int bch2_btree_delete_range(struct bch_fs *, enum btree_id, 65 struct bpos, struct bpos, unsigned, u64 *); 66 67 int bch2_btree_bit_mod_iter(struct btree_trans *, struct btree_iter *, bool); 68 int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool); 69 int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool); 70 71 static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans, 72 enum btree_id btree, struct bpos pos) 73 { 74 return bch2_btree_bit_mod_buffered(trans, btree, pos, false); 75 } 76 77 int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id, 78 struct bpos, snapshot_id_list *); 79 80 /* 81 * For use when splitting extents in existing snapshots: 82 * 83 * If @old_pos is an interior snapshot node, iterate over descendent snapshot 84 * nodes: for every descendent snapshot in whiche @old_pos is overwritten and 85 * not visible, emit a whiteout at @new_pos. 86 */ 87 static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans, 88 enum btree_id btree, 89 struct bpos old_pos, 90 struct bpos new_pos) 91 { 92 BUG_ON(old_pos.snapshot != new_pos.snapshot); 93 94 if (!btree_type_has_snapshots(btree) || 95 bkey_eq(old_pos, new_pos)) 96 return 0; 97 98 snapshot_id_list s; 99 int ret = bch2_get_snapshot_overwrites(trans, btree, old_pos, &s); 100 if (ret) 101 return ret; 102 103 return s.nr 104 ? __bch2_insert_snapshot_whiteouts(trans, btree, new_pos, &s) 105 : 0; 106 } 107 108 int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *, 109 enum btree_iter_update_trigger_flags, 110 struct bkey_s_c, struct bkey_s_c); 111 112 int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *, 113 enum btree_id, struct bpos); 114 115 int __must_check bch2_trans_update_ip(struct btree_trans *, struct btree_iter *, 116 struct bkey_i *, enum btree_iter_update_trigger_flags, 117 unsigned long); 118 119 static inline int __must_check 120 bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, 121 struct bkey_i *k, enum btree_iter_update_trigger_flags flags) 122 { 123 return bch2_trans_update_ip(trans, iter, k, flags, _THIS_IP_); 124 } 125 126 static inline void *btree_trans_subbuf_base(struct btree_trans *trans, 127 struct btree_trans_subbuf *buf) 128 { 129 return (u64 *) trans->mem + buf->base; 130 } 131 132 static inline void *btree_trans_subbuf_top(struct btree_trans *trans, 133 struct btree_trans_subbuf *buf) 134 { 135 return (u64 *) trans->mem + buf->base + buf->u64s; 136 } 137 138 void *__bch2_trans_subbuf_alloc(struct btree_trans *, 139 struct btree_trans_subbuf *, 140 unsigned); 141 142 static inline void * 143 bch2_trans_subbuf_alloc(struct btree_trans *trans, 144 struct btree_trans_subbuf *buf, 145 unsigned u64s) 146 { 147 if (buf->u64s + u64s > buf->size) 148 return __bch2_trans_subbuf_alloc(trans, buf, u64s); 149 150 void *p = btree_trans_subbuf_top(trans, buf); 151 buf->u64s += u64s; 152 return p; 153 } 154 155 static inline struct jset_entry *btree_trans_journal_entries_start(struct btree_trans *trans) 156 { 157 return btree_trans_subbuf_base(trans, &trans->journal_entries); 158 } 159 160 static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans) 161 { 162 return btree_trans_subbuf_top(trans, &trans->journal_entries); 163 } 164 165 static inline struct jset_entry * 166 bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s) 167 { 168 return bch2_trans_subbuf_alloc(trans, &trans->journal_entries, u64s); 169 } 170 171 int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *); 172 173 int bch2_btree_write_buffer_insert_err(struct bch_fs *, enum btree_id, struct bkey_i *); 174 175 static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans, 176 enum btree_id btree, 177 struct bkey_i *k) 178 { 179 kmsan_check_memory(k, bkey_bytes(&k->k)); 180 181 EBUG_ON(k->k.u64s > BTREE_WRITE_BUFERED_U64s_MAX); 182 183 if (unlikely(!btree_type_uses_write_buffer(btree))) { 184 int ret = bch2_btree_write_buffer_insert_err(trans->c, btree, k); 185 dump_stack(); 186 return ret; 187 } 188 /* 189 * Most updates skip the btree write buffer until journal replay is 190 * finished because synchronization with journal replay relies on having 191 * a btree node locked - if we're overwriting a key in the journal that 192 * journal replay hasn't yet replayed, we have to mark it as 193 * overwritten. 194 * 195 * But accounting updates don't overwrite, they're deltas, and they have 196 * to be flushed to the btree strictly in order for journal replay to be 197 * able to tell which updates need to be applied: 198 */ 199 if (k->k.type != KEY_TYPE_accounting && 200 unlikely(trans->journal_replay_not_finished)) 201 return bch2_btree_insert_clone_trans(trans, btree, k); 202 203 struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s)); 204 int ret = PTR_ERR_OR_ZERO(e); 205 if (ret) 206 return ret; 207 208 journal_entry_init(e, BCH_JSET_ENTRY_write_buffer_keys, btree, 0, k->k.u64s); 209 bkey_copy(e->start, k); 210 return 0; 211 } 212 213 void bch2_trans_commit_hook(struct btree_trans *, 214 struct btree_trans_commit_hook *); 215 int __bch2_trans_commit(struct btree_trans *, unsigned); 216 217 int bch2_trans_log_str(struct btree_trans *, const char *); 218 int bch2_trans_log_msg(struct btree_trans *, struct printbuf *); 219 int bch2_trans_log_bkey(struct btree_trans *, enum btree_id, unsigned, struct bkey_i *); 220 221 __printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...); 222 __printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...); 223 224 /** 225 * bch2_trans_commit - insert keys at given iterator positions 226 * 227 * This is main entry point for btree updates. 228 * 229 * Return values: 230 * -EROFS: filesystem read only 231 * -EIO: journal or btree node IO error 232 */ 233 static inline int bch2_trans_commit(struct btree_trans *trans, 234 struct disk_reservation *disk_res, 235 u64 *journal_seq, 236 unsigned flags) 237 { 238 trans->disk_res = disk_res; 239 trans->journal_seq = journal_seq; 240 241 return __bch2_trans_commit(trans, flags); 242 } 243 244 #define commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ 245 lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ 246 (_journal_seq), (_flags))) 247 248 #define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do) \ 249 nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ 250 (_journal_seq), (_flags))) 251 252 #define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \ 253 bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) 254 255 #define trans_for_each_update(_trans, _i) \ 256 for (struct btree_insert_entry *_i = (_trans)->updates; \ 257 (_i) < (_trans)->updates + (_trans)->nr_updates; \ 258 (_i)++) 259 260 static inline void bch2_trans_reset_updates(struct btree_trans *trans) 261 { 262 trans_for_each_update(trans, i) 263 bch2_path_put(trans, i->path, true); 264 265 trans->nr_updates = 0; 266 trans->journal_entries.u64s = 0; 267 trans->journal_entries.size = 0; 268 trans->accounting.u64s = 0; 269 trans->accounting.size = 0; 270 trans->hooks = NULL; 271 trans->extra_disk_res = 0; 272 } 273 274 static __always_inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k, 275 unsigned type, unsigned min_bytes) 276 { 277 unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k)); 278 struct bkey_i *mut; 279 280 if (type && k.k->type != type) 281 return ERR_PTR(-ENOENT); 282 283 /* extra padding for varint_decode_fast... */ 284 mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8); 285 if (!IS_ERR(mut)) { 286 bkey_reassemble(mut, k); 287 288 if (unlikely(bytes > bkey_bytes(k.k))) { 289 memset((void *) mut + bkey_bytes(k.k), 0, 290 bytes - bkey_bytes(k.k)); 291 mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64)); 292 } 293 } 294 return mut; 295 } 296 297 static __always_inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k) 298 { 299 return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0); 300 } 301 302 #define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type) \ 303 bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k, \ 304 KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) 305 306 static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter, 307 struct bkey_s_c *k, 308 enum btree_iter_update_trigger_flags flags, 309 unsigned type, unsigned min_bytes) 310 { 311 struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes); 312 int ret; 313 314 if (IS_ERR(mut)) 315 return mut; 316 317 ret = bch2_trans_update(trans, iter, mut, flags); 318 if (ret) 319 return ERR_PTR(ret); 320 321 *k = bkey_i_to_s_c(mut); 322 return mut; 323 } 324 325 static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, 326 struct btree_iter *iter, struct bkey_s_c *k, 327 enum btree_iter_update_trigger_flags flags) 328 { 329 return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0); 330 } 331 332 #define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type) \ 333 bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\ 334 KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) 335 336 static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans, 337 struct btree_iter *iter, 338 unsigned btree_id, struct bpos pos, 339 enum btree_iter_update_trigger_flags flags, 340 unsigned type, unsigned min_bytes) 341 { 342 struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter, 343 btree_id, pos, flags|BTREE_ITER_intent, type); 344 struct bkey_i *ret = IS_ERR(k.k) 345 ? ERR_CAST(k.k) 346 : __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes); 347 if (IS_ERR(ret)) 348 bch2_trans_iter_exit(trans, iter); 349 return ret; 350 } 351 352 static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans, 353 struct btree_iter *iter, 354 unsigned btree_id, struct bpos pos, 355 enum btree_iter_update_trigger_flags flags) 356 { 357 return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0); 358 } 359 360 static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans, 361 struct btree_iter *iter, 362 unsigned btree_id, struct bpos pos, 363 enum btree_iter_update_trigger_flags flags, 364 unsigned type, unsigned min_bytes) 365 { 366 struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter, 367 btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes); 368 int ret; 369 370 if (IS_ERR(mut)) 371 return mut; 372 373 ret = bch2_trans_update(trans, iter, mut, flags); 374 if (ret) { 375 bch2_trans_iter_exit(trans, iter); 376 return ERR_PTR(ret); 377 } 378 379 return mut; 380 } 381 382 static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans, 383 struct btree_iter *iter, 384 unsigned btree_id, struct bpos pos, 385 enum btree_iter_update_trigger_flags flags, 386 unsigned min_bytes) 387 { 388 return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes); 389 } 390 391 static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, 392 struct btree_iter *iter, 393 unsigned btree_id, struct bpos pos, 394 enum btree_iter_update_trigger_flags flags) 395 { 396 return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0); 397 } 398 399 #define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\ 400 bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter, \ 401 _btree_id, _pos, _flags, \ 402 KEY_TYPE_##_type, sizeof(struct bkey_i_##_type))) 403 404 static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter, 405 enum btree_iter_update_trigger_flags flags, 406 unsigned type, unsigned val_size) 407 { 408 struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size); 409 int ret; 410 411 if (IS_ERR(k)) 412 return k; 413 414 bkey_init(&k->k); 415 k->k.p = iter->pos; 416 k->k.type = type; 417 set_bkey_val_bytes(&k->k, val_size); 418 419 ret = bch2_trans_update(trans, iter, k, flags); 420 if (unlikely(ret)) 421 return ERR_PTR(ret); 422 return k; 423 } 424 425 #define bch2_bkey_alloc(_trans, _iter, _flags, _type) \ 426 bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags, \ 427 KEY_TYPE_##_type, sizeof(struct bch_##_type))) 428 429 #endif /* _BCACHEFS_BTREE_UPDATE_H */ 430