xref: /linux/fs/bcachefs/btree_update.h (revision 70d7f7dbd98a4d499b46ec9ef2bd1f2698facf2b)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _BCACHEFS_BTREE_UPDATE_H
3 #define _BCACHEFS_BTREE_UPDATE_H
4 
5 #include "btree_iter.h"
6 #include "journal.h"
7 
8 struct bch_fs;
9 struct btree;
10 
11 void bch2_btree_node_prep_for_write(struct btree_trans *,
12 				    struct btree_path *, struct btree *);
13 bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
14 				struct btree *, struct btree_node_iter *,
15 				struct bkey_i *);
16 
17 int bch2_btree_node_flush0(struct journal *, struct journal_entry_pin *, u64);
18 int bch2_btree_node_flush1(struct journal *, struct journal_entry_pin *, u64);
19 void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
20 
21 void bch2_btree_insert_key_leaf(struct btree_trans *, struct btree_path *,
22 				struct bkey_i *, u64);
23 
24 #define BCH_TRANS_COMMIT_FLAGS()							\
25 	x(no_enospc,	"don't check for enospc")					\
26 	x(no_check_rw,	"don't attempt to take a ref on c->writes")			\
27 	x(lazy_rw,	"go read-write if we haven't yet - only for use in recovery")	\
28 	x(no_journal_res, "don't take a journal reservation, instead "			\
29 			"pin journal entry referred to by trans->journal_res.seq")	\
30 	x(journal_reclaim, "operation required for journal reclaim; may return error"	\
31 			"instead of deadlocking if BCH_WATERMARK_reclaim not specified")\
32 	x(skip_accounting_apply, "we're in journal replay - accounting updates have already been applied")
33 
34 enum __bch_trans_commit_flags {
35 	/* First bits for bch_watermark: */
36 	__BCH_TRANS_COMMIT_FLAGS_START = BCH_WATERMARK_BITS,
37 #define x(n, ...)	__BCH_TRANS_COMMIT_##n,
38 	BCH_TRANS_COMMIT_FLAGS()
39 #undef x
40 };
41 
42 enum bch_trans_commit_flags {
43 #define x(n, ...)	BCH_TRANS_COMMIT_##n = BIT(__BCH_TRANS_COMMIT_##n),
44 	BCH_TRANS_COMMIT_FLAGS()
45 #undef x
46 };
47 
48 void bch2_trans_commit_flags_to_text(struct printbuf *, enum bch_trans_commit_flags);
49 
50 int bch2_btree_delete_extent_at(struct btree_trans *, struct btree_iter *,
51 				unsigned, unsigned);
52 int bch2_btree_delete_at(struct btree_trans *, struct btree_iter *, unsigned);
53 int bch2_btree_delete(struct btree_trans *, enum btree_id, struct bpos, unsigned);
54 
55 int bch2_btree_insert_nonextent(struct btree_trans *, enum btree_id,
56 				struct bkey_i *, enum btree_iter_update_trigger_flags);
57 
58 int bch2_btree_insert_trans(struct btree_trans *, enum btree_id, struct bkey_i *,
59 			enum btree_iter_update_trigger_flags);
60 int bch2_btree_insert(struct bch_fs *, enum btree_id, struct bkey_i *, struct
61 		disk_reservation *, int flags, enum
62 		btree_iter_update_trigger_flags iter_flags);
63 
64 int bch2_btree_delete_range_trans(struct btree_trans *, enum btree_id,
65 				  struct bpos, struct bpos, unsigned, u64 *);
66 int bch2_btree_delete_range(struct bch_fs *, enum btree_id,
67 			    struct bpos, struct bpos, unsigned, u64 *);
68 
69 int bch2_btree_bit_mod(struct btree_trans *, enum btree_id, struct bpos, bool);
70 int bch2_btree_bit_mod_buffered(struct btree_trans *, enum btree_id, struct bpos, bool);
71 
72 static inline int bch2_btree_delete_at_buffered(struct btree_trans *trans,
73 						enum btree_id btree, struct bpos pos)
74 {
75 	return bch2_btree_bit_mod_buffered(trans, btree, pos, false);
76 }
77 
78 int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
79 				     struct bpos, struct bpos);
80 
81 /*
82  * For use when splitting extents in existing snapshots:
83  *
84  * If @old_pos is an interior snapshot node, iterate over descendent snapshot
85  * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
86  * not visible, emit a whiteout at @new_pos.
87  */
88 static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
89 						 enum btree_id btree,
90 						 struct bpos old_pos,
91 						 struct bpos new_pos)
92 {
93 	if (!btree_type_has_snapshots(btree) ||
94 	    bkey_eq(old_pos, new_pos))
95 		return 0;
96 
97 	return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
98 }
99 
100 int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
101 				       enum btree_iter_update_trigger_flags,
102 				       struct bkey_s_c, struct bkey_s_c);
103 
104 int bch2_bkey_get_empty_slot(struct btree_trans *, struct btree_iter *,
105 			     enum btree_id, struct bpos);
106 
107 int __must_check bch2_trans_update(struct btree_trans *, struct btree_iter *,
108 				   struct bkey_i *, enum btree_iter_update_trigger_flags);
109 
110 struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *, unsigned);
111 
112 static inline struct jset_entry *btree_trans_journal_entries_top(struct btree_trans *trans)
113 {
114 	return (void *) ((u64 *) trans->journal_entries + trans->journal_entries_u64s);
115 }
116 
117 static inline struct jset_entry *
118 bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
119 {
120 	if (!trans->journal_entries ||
121 	    trans->journal_entries_u64s + u64s > trans->journal_entries_size)
122 		return __bch2_trans_jset_entry_alloc(trans, u64s);
123 
124 	struct jset_entry *e = btree_trans_journal_entries_top(trans);
125 	trans->journal_entries_u64s += u64s;
126 	return e;
127 }
128 
129 int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
130 
131 static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
132 					    enum btree_id btree,
133 					    struct bkey_i *k)
134 {
135 	/*
136 	 * Most updates skip the btree write buffer until journal replay is
137 	 * finished because synchronization with journal replay relies on having
138 	 * a btree node locked - if we're overwriting a key in the journal that
139 	 * journal replay hasn't yet replayed, we have to mark it as
140 	 * overwritten.
141 	 *
142 	 * But accounting updates don't overwrite, they're deltas, and they have
143 	 * to be flushed to the btree strictly in order for journal replay to be
144 	 * able to tell which updates need to be applied:
145 	 */
146 	if (k->k.type != KEY_TYPE_accounting &&
147 	    unlikely(trans->journal_replay_not_finished))
148 		return bch2_btree_insert_clone_trans(trans, btree, k);
149 
150 	struct jset_entry *e = bch2_trans_jset_entry_alloc(trans, jset_u64s(k->k.u64s));
151 	int ret = PTR_ERR_OR_ZERO(e);
152 	if (ret)
153 		return ret;
154 
155 	journal_entry_init(e, BCH_JSET_ENTRY_write_buffer_keys, btree, 0, k->k.u64s);
156 	bkey_copy(e->start, k);
157 	return 0;
158 }
159 
160 void bch2_trans_commit_hook(struct btree_trans *,
161 			    struct btree_trans_commit_hook *);
162 int __bch2_trans_commit(struct btree_trans *, unsigned);
163 
164 __printf(2, 3) int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
165 __printf(2, 3) int bch2_journal_log_msg(struct bch_fs *, const char *, ...);
166 
167 /**
168  * bch2_trans_commit - insert keys at given iterator positions
169  *
170  * This is main entry point for btree updates.
171  *
172  * Return values:
173  * -EROFS: filesystem read only
174  * -EIO: journal or btree node IO error
175  */
176 static inline int bch2_trans_commit(struct btree_trans *trans,
177 				    struct disk_reservation *disk_res,
178 				    u64 *journal_seq,
179 				    unsigned flags)
180 {
181 	trans->disk_res		= disk_res;
182 	trans->journal_seq	= journal_seq;
183 
184 	return __bch2_trans_commit(trans, flags);
185 }
186 
187 #define commit_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
188 	lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
189 					(_journal_seq), (_flags)))
190 
191 #define nested_commit_do(_trans, _disk_res, _journal_seq, _flags, _do)	\
192 	nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
193 					(_journal_seq), (_flags)))
194 
195 #define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do)		\
196 	bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))
197 
198 #define trans_for_each_update(_trans, _i)				\
199 	for (struct btree_insert_entry *_i = (_trans)->updates;		\
200 	     (_i) < (_trans)->updates + (_trans)->nr_updates;		\
201 	     (_i)++)
202 
203 static inline void bch2_trans_reset_updates(struct btree_trans *trans)
204 {
205 	trans_for_each_update(trans, i)
206 		bch2_path_put(trans, i->path, true);
207 
208 	trans->nr_updates		= 0;
209 	trans->journal_entries_u64s	= 0;
210 	trans->hooks			= NULL;
211 	trans->extra_disk_res		= 0;
212 }
213 
214 static inline struct bkey_i *__bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k,
215 						  unsigned type, unsigned min_bytes)
216 {
217 	unsigned bytes = max_t(unsigned, min_bytes, bkey_bytes(k.k));
218 	struct bkey_i *mut;
219 
220 	if (type && k.k->type != type)
221 		return ERR_PTR(-ENOENT);
222 
223 	/* extra padding for varint_decode_fast... */
224 	mut = bch2_trans_kmalloc_nomemzero(trans, bytes + 8);
225 	if (!IS_ERR(mut)) {
226 		bkey_reassemble(mut, k);
227 
228 		if (unlikely(bytes > bkey_bytes(k.k))) {
229 			memset((void *) mut + bkey_bytes(k.k), 0,
230 			       bytes - bkey_bytes(k.k));
231 			mut->k.u64s = DIV_ROUND_UP(bytes, sizeof(u64));
232 		}
233 	}
234 	return mut;
235 }
236 
237 static inline struct bkey_i *bch2_bkey_make_mut_noupdate(struct btree_trans *trans, struct bkey_s_c k)
238 {
239 	return __bch2_bkey_make_mut_noupdate(trans, k, 0, 0);
240 }
241 
242 #define bch2_bkey_make_mut_noupdate_typed(_trans, _k, _type)		\
243 	bkey_i_to_##_type(__bch2_bkey_make_mut_noupdate(_trans, _k,	\
244 				KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
245 
246 static inline struct bkey_i *__bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
247 					struct bkey_s_c *k, unsigned flags,
248 					unsigned type, unsigned min_bytes)
249 {
250 	struct bkey_i *mut = __bch2_bkey_make_mut_noupdate(trans, *k, type, min_bytes);
251 	int ret;
252 
253 	if (IS_ERR(mut))
254 		return mut;
255 
256 	ret = bch2_trans_update(trans, iter, mut, flags);
257 	if (ret)
258 		return ERR_PTR(ret);
259 
260 	*k = bkey_i_to_s_c(mut);
261 	return mut;
262 }
263 
264 static inline struct bkey_i *bch2_bkey_make_mut(struct btree_trans *trans, struct btree_iter *iter,
265 						struct bkey_s_c *k, unsigned flags)
266 {
267 	return __bch2_bkey_make_mut(trans, iter, k, flags, 0, 0);
268 }
269 
270 #define bch2_bkey_make_mut_typed(_trans, _iter, _k, _flags, _type)	\
271 	bkey_i_to_##_type(__bch2_bkey_make_mut(_trans, _iter, _k, _flags,\
272 				KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
273 
274 static inline struct bkey_i *__bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
275 					 struct btree_iter *iter,
276 					 unsigned btree_id, struct bpos pos,
277 					 unsigned flags, unsigned type, unsigned min_bytes)
278 {
279 	struct bkey_s_c k = __bch2_bkey_get_iter(trans, iter,
280 				btree_id, pos, flags|BTREE_ITER_intent, type);
281 	struct bkey_i *ret = IS_ERR(k.k)
282 		? ERR_CAST(k.k)
283 		: __bch2_bkey_make_mut_noupdate(trans, k, 0, min_bytes);
284 	if (IS_ERR(ret))
285 		bch2_trans_iter_exit(trans, iter);
286 	return ret;
287 }
288 
289 static inline struct bkey_i *bch2_bkey_get_mut_noupdate(struct btree_trans *trans,
290 					       struct btree_iter *iter,
291 					       unsigned btree_id, struct bpos pos,
292 					       unsigned flags)
293 {
294 	return __bch2_bkey_get_mut_noupdate(trans, iter, btree_id, pos, flags, 0, 0);
295 }
296 
297 static inline struct bkey_i *__bch2_bkey_get_mut(struct btree_trans *trans,
298 					 struct btree_iter *iter,
299 					 unsigned btree_id, struct bpos pos,
300 					 unsigned flags, unsigned type, unsigned min_bytes)
301 {
302 	struct bkey_i *mut = __bch2_bkey_get_mut_noupdate(trans, iter,
303 				btree_id, pos, flags|BTREE_ITER_intent, type, min_bytes);
304 	int ret;
305 
306 	if (IS_ERR(mut))
307 		return mut;
308 
309 	ret = bch2_trans_update(trans, iter, mut, flags);
310 	if (ret) {
311 		bch2_trans_iter_exit(trans, iter);
312 		return ERR_PTR(ret);
313 	}
314 
315 	return mut;
316 }
317 
318 static inline struct bkey_i *bch2_bkey_get_mut_minsize(struct btree_trans *trans,
319 						       struct btree_iter *iter,
320 						       unsigned btree_id, struct bpos pos,
321 						       unsigned flags, unsigned min_bytes)
322 {
323 	return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, min_bytes);
324 }
325 
326 static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
327 					       struct btree_iter *iter,
328 					       unsigned btree_id, struct bpos pos,
329 					       unsigned flags)
330 {
331 	return __bch2_bkey_get_mut(trans, iter, btree_id, pos, flags, 0, 0);
332 }
333 
334 #define bch2_bkey_get_mut_typed(_trans, _iter, _btree_id, _pos, _flags, _type)\
335 	bkey_i_to_##_type(__bch2_bkey_get_mut(_trans, _iter,		\
336 			_btree_id, _pos, _flags,			\
337 			KEY_TYPE_##_type, sizeof(struct bkey_i_##_type)))
338 
339 static inline struct bkey_i *__bch2_bkey_alloc(struct btree_trans *trans, struct btree_iter *iter,
340 					       unsigned flags, unsigned type, unsigned val_size)
341 {
342 	struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k) + val_size);
343 	int ret;
344 
345 	if (IS_ERR(k))
346 		return k;
347 
348 	bkey_init(&k->k);
349 	k->k.p = iter->pos;
350 	k->k.type = type;
351 	set_bkey_val_bytes(&k->k, val_size);
352 
353 	ret = bch2_trans_update(trans, iter, k, flags);
354 	if (unlikely(ret))
355 		return ERR_PTR(ret);
356 	return k;
357 }
358 
359 #define bch2_bkey_alloc(_trans, _iter, _flags, _type)			\
360 	bkey_i_to_##_type(__bch2_bkey_alloc(_trans, _iter, _flags,	\
361 				KEY_TYPE_##_type, sizeof(struct bch_##_type)))
362 
363 #endif /* _BCACHEFS_BTREE_UPDATE_H */
364