xref: /linux/fs/bcachefs/subvolume.c (revision 031fba65fc202abf1f193e321be7a2c274fd88ba)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "btree_key_cache.h"
5 #include "btree_update.h"
6 #include "errcode.h"
7 #include "error.h"
8 #include "fs.h"
9 #include "snapshot.h"
10 #include "subvolume.h"
11 
12 #include <linux/random.h>
13 
14 static int bch2_subvolume_delete(struct btree_trans *, u32);
15 
16 static int check_subvol(struct btree_trans *trans,
17 			struct btree_iter *iter,
18 			struct bkey_s_c k)
19 {
20 	struct bch_fs *c = trans->c;
21 	struct bkey_s_c_subvolume subvol;
22 	struct bch_snapshot snapshot;
23 	unsigned snapid;
24 	int ret = 0;
25 
26 	if (k.k->type != KEY_TYPE_subvolume)
27 		return 0;
28 
29 	subvol = bkey_s_c_to_subvolume(k);
30 	snapid = le32_to_cpu(subvol.v->snapshot);
31 	ret = bch2_snapshot_lookup(trans, snapid, &snapshot);
32 
33 	if (bch2_err_matches(ret, ENOENT))
34 		bch_err(c, "subvolume %llu points to nonexistent snapshot %u",
35 			k.k->p.offset, snapid);
36 	if (ret)
37 		return ret;
38 
39 	if (BCH_SUBVOLUME_UNLINKED(subvol.v)) {
40 		bch2_fs_lazy_rw(c);
41 
42 		ret = bch2_subvolume_delete(trans, iter->pos.offset);
43 		if (ret)
44 			bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset);
45 		return ret ?: -BCH_ERR_transaction_restart_nested;
46 	}
47 
48 	if (!BCH_SUBVOLUME_SNAP(subvol.v)) {
49 		u32 snapshot_root = bch2_snapshot_root(c, le32_to_cpu(subvol.v->snapshot));
50 		u32 snapshot_tree;
51 		struct bch_snapshot_tree st;
52 
53 		rcu_read_lock();
54 		snapshot_tree = snapshot_t(c, snapshot_root)->tree;
55 		rcu_read_unlock();
56 
57 		ret = bch2_snapshot_tree_lookup(trans, snapshot_tree, &st);
58 
59 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
60 				"%s: snapshot tree %u not found", __func__, snapshot_tree);
61 
62 		if (ret)
63 			return ret;
64 
65 		if (fsck_err_on(le32_to_cpu(st.master_subvol) != subvol.k->p.offset, c,
66 				"subvolume %llu is not set as snapshot but is not master subvolume",
67 				k.k->p.offset)) {
68 			struct bkey_i_subvolume *s =
69 				bch2_bkey_make_mut_typed(trans, iter, &subvol.s_c, 0, subvolume);
70 			ret = PTR_ERR_OR_ZERO(s);
71 			if (ret)
72 				return ret;
73 
74 			SET_BCH_SUBVOLUME_SNAP(&s->v, true);
75 		}
76 	}
77 
78 fsck_err:
79 	return ret;
80 }
81 
82 int bch2_check_subvols(struct bch_fs *c)
83 {
84 	struct btree_iter iter;
85 	struct bkey_s_c k;
86 	int ret;
87 
88 	ret = bch2_trans_run(c,
89 		for_each_btree_key_commit(trans, iter,
90 			BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
91 			NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
92 		check_subvol(trans, &iter, k)));
93 	if (ret)
94 		bch_err_fn(c, ret);
95 	return ret;
96 }
97 
98 /* Subvolumes: */
99 
100 int bch2_subvolume_invalid(const struct bch_fs *c, struct bkey_s_c k,
101 			   enum bkey_invalid_flags flags, struct printbuf *err)
102 {
103 	if (bkey_lt(k.k->p, SUBVOL_POS_MIN) ||
104 	    bkey_gt(k.k->p, SUBVOL_POS_MAX)) {
105 		prt_printf(err, "invalid pos");
106 		return -BCH_ERR_invalid_bkey;
107 	}
108 
109 	return 0;
110 }
111 
112 void bch2_subvolume_to_text(struct printbuf *out, struct bch_fs *c,
113 			    struct bkey_s_c k)
114 {
115 	struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k);
116 
117 	prt_printf(out, "root %llu snapshot id %u",
118 		   le64_to_cpu(s.v->inode),
119 		   le32_to_cpu(s.v->snapshot));
120 
121 	if (bkey_val_bytes(s.k) > offsetof(struct bch_subvolume, parent))
122 		prt_printf(out, " parent %u", le32_to_cpu(s.v->parent));
123 }
124 
125 static __always_inline int
126 bch2_subvolume_get_inlined(struct btree_trans *trans, unsigned subvol,
127 			   bool inconsistent_if_not_found,
128 			   int iter_flags,
129 			   struct bch_subvolume *s)
130 {
131 	int ret = bch2_bkey_get_val_typed(trans, BTREE_ID_subvolumes, POS(0, subvol),
132 					  iter_flags, subvolume, s);
133 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT) &&
134 				inconsistent_if_not_found,
135 				trans->c, "missing subvolume %u", subvol);
136 	return ret;
137 }
138 
139 int bch2_subvolume_get(struct btree_trans *trans, unsigned subvol,
140 		       bool inconsistent_if_not_found,
141 		       int iter_flags,
142 		       struct bch_subvolume *s)
143 {
144 	return bch2_subvolume_get_inlined(trans, subvol, inconsistent_if_not_found, iter_flags, s);
145 }
146 
147 int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot,
148 			     struct bch_subvolume *subvol)
149 {
150 	struct bch_snapshot snap;
151 
152 	return  bch2_snapshot_lookup(trans, snapshot, &snap) ?:
153 		bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol);
154 }
155 
156 int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid,
157 				u32 *snapid)
158 {
159 	struct btree_iter iter;
160 	struct bkey_s_c_subvolume subvol;
161 	int ret;
162 
163 	subvol = bch2_bkey_get_iter_typed(trans, &iter,
164 					  BTREE_ID_subvolumes, POS(0, subvolid),
165 					  BTREE_ITER_CACHED|BTREE_ITER_WITH_UPDATES,
166 					  subvolume);
167 	ret = bkey_err(subvol);
168 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
169 				"missing subvolume %u", subvolid);
170 
171 	if (likely(!ret))
172 		*snapid = le32_to_cpu(subvol.v->snapshot);
173 	bch2_trans_iter_exit(trans, &iter);
174 	return ret;
175 }
176 
177 static int bch2_subvolume_reparent(struct btree_trans *trans,
178 				   struct btree_iter *iter,
179 				   struct bkey_s_c k,
180 				   u32 old_parent, u32 new_parent)
181 {
182 	struct bkey_i_subvolume *s;
183 	int ret;
184 
185 	if (k.k->type != KEY_TYPE_subvolume)
186 		return 0;
187 
188 	if (bkey_val_bytes(k.k) > offsetof(struct bch_subvolume, parent) &&
189 	    le32_to_cpu(bkey_s_c_to_subvolume(k).v->parent) != old_parent)
190 		return 0;
191 
192 	s = bch2_bkey_make_mut_typed(trans, iter, &k, 0, subvolume);
193 	ret = PTR_ERR_OR_ZERO(s);
194 	if (ret)
195 		return ret;
196 
197 	s->v.parent = cpu_to_le32(new_parent);
198 	return 0;
199 }
200 
201 /*
202  * Separate from the snapshot tree in the snapshots btree, we record the tree
203  * structure of how snapshot subvolumes were created - the parent subvolume of
204  * each snapshot subvolume.
205  *
206  * When a subvolume is deleted, we scan for child subvolumes and reparant them,
207  * to avoid dangling references:
208  */
209 static int bch2_subvolumes_reparent(struct btree_trans *trans, u32 subvolid_to_delete)
210 {
211 	struct btree_iter iter;
212 	struct bkey_s_c k;
213 	struct bch_subvolume s;
214 
215 	return lockrestart_do(trans,
216 			bch2_subvolume_get(trans, subvolid_to_delete, true,
217 				   BTREE_ITER_CACHED, &s)) ?:
218 		for_each_btree_key_commit(trans, iter,
219 				BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k,
220 				NULL, NULL, BTREE_INSERT_NOFAIL,
221 			bch2_subvolume_reparent(trans, &iter, k,
222 					subvolid_to_delete, le32_to_cpu(s.parent)));
223 }
224 
225 /*
226  * Delete subvolume, mark snapshot ID as deleted, queue up snapshot
227  * deletion/cleanup:
228  */
229 static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
230 {
231 	struct btree_iter iter;
232 	struct bkey_s_c_subvolume subvol;
233 	struct btree_trans_commit_hook *h;
234 	u32 snapid;
235 	int ret = 0;
236 
237 	subvol = bch2_bkey_get_iter_typed(trans, &iter,
238 				BTREE_ID_subvolumes, POS(0, subvolid),
239 				BTREE_ITER_CACHED|BTREE_ITER_INTENT,
240 				subvolume);
241 	ret = bkey_err(subvol);
242 	bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
243 				"missing subvolume %u", subvolid);
244 	if (ret)
245 		return ret;
246 
247 	snapid = le32_to_cpu(subvol.v->snapshot);
248 
249 	ret = bch2_btree_delete_at(trans, &iter, 0);
250 	if (ret)
251 		goto err;
252 
253 	ret = bch2_snapshot_node_set_deleted(trans, snapid);
254 	if (ret)
255 		goto err;
256 
257 	h = bch2_trans_kmalloc(trans, sizeof(*h));
258 	ret = PTR_ERR_OR_ZERO(h);
259 	if (ret)
260 		goto err;
261 
262 	h->fn = bch2_delete_dead_snapshots_hook;
263 	bch2_trans_commit_hook(trans, h);
264 err:
265 	bch2_trans_iter_exit(trans, &iter);
266 	return ret;
267 }
268 
269 static int bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid)
270 {
271 	return bch2_subvolumes_reparent(trans, subvolid) ?:
272 		commit_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL,
273 			  __bch2_subvolume_delete(trans, subvolid));
274 }
275 
276 static void bch2_subvolume_wait_for_pagecache_and_delete(struct work_struct *work)
277 {
278 	struct bch_fs *c = container_of(work, struct bch_fs,
279 				snapshot_wait_for_pagecache_and_delete_work);
280 	snapshot_id_list s;
281 	u32 *id;
282 	int ret = 0;
283 
284 	while (!ret) {
285 		mutex_lock(&c->snapshots_unlinked_lock);
286 		s = c->snapshots_unlinked;
287 		darray_init(&c->snapshots_unlinked);
288 		mutex_unlock(&c->snapshots_unlinked_lock);
289 
290 		if (!s.nr)
291 			break;
292 
293 		bch2_evict_subvolume_inodes(c, &s);
294 
295 		for (id = s.data; id < s.data + s.nr; id++) {
296 			ret = bch2_trans_run(c, bch2_subvolume_delete(trans, *id));
297 			if (ret) {
298 				bch_err_msg(c, ret, "deleting subvolume %u", *id);
299 				break;
300 			}
301 		}
302 
303 		darray_exit(&s);
304 	}
305 
306 	bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
307 }
308 
309 struct subvolume_unlink_hook {
310 	struct btree_trans_commit_hook	h;
311 	u32				subvol;
312 };
313 
314 static int bch2_subvolume_wait_for_pagecache_and_delete_hook(struct btree_trans *trans,
315 						      struct btree_trans_commit_hook *_h)
316 {
317 	struct subvolume_unlink_hook *h = container_of(_h, struct subvolume_unlink_hook, h);
318 	struct bch_fs *c = trans->c;
319 	int ret = 0;
320 
321 	mutex_lock(&c->snapshots_unlinked_lock);
322 	if (!snapshot_list_has_id(&c->snapshots_unlinked, h->subvol))
323 		ret = snapshot_list_add(c, &c->snapshots_unlinked, h->subvol);
324 	mutex_unlock(&c->snapshots_unlinked_lock);
325 
326 	if (ret)
327 		return ret;
328 
329 	if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_snapshot_delete_pagecache))
330 		return -EROFS;
331 
332 	if (!queue_work(c->write_ref_wq, &c->snapshot_wait_for_pagecache_and_delete_work))
333 		bch2_write_ref_put(c, BCH_WRITE_REF_snapshot_delete_pagecache);
334 	return 0;
335 }
336 
337 int bch2_subvolume_unlink(struct btree_trans *trans, u32 subvolid)
338 {
339 	struct btree_iter iter;
340 	struct bkey_i_subvolume *n;
341 	struct subvolume_unlink_hook *h;
342 	int ret = 0;
343 
344 	h = bch2_trans_kmalloc(trans, sizeof(*h));
345 	ret = PTR_ERR_OR_ZERO(h);
346 	if (ret)
347 		return ret;
348 
349 	h->h.fn		= bch2_subvolume_wait_for_pagecache_and_delete_hook;
350 	h->subvol	= subvolid;
351 	bch2_trans_commit_hook(trans, &h->h);
352 
353 	n = bch2_bkey_get_mut_typed(trans, &iter,
354 			BTREE_ID_subvolumes, POS(0, subvolid),
355 			BTREE_ITER_CACHED, subvolume);
356 	ret = PTR_ERR_OR_ZERO(n);
357 	if (unlikely(ret)) {
358 		bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c,
359 					"missing subvolume %u", subvolid);
360 		return ret;
361 	}
362 
363 	SET_BCH_SUBVOLUME_UNLINKED(&n->v, true);
364 	bch2_trans_iter_exit(trans, &iter);
365 	return ret;
366 }
367 
368 int bch2_subvolume_create(struct btree_trans *trans, u64 inode,
369 			  u32 src_subvolid,
370 			  u32 *new_subvolid,
371 			  u32 *new_snapshotid,
372 			  bool ro)
373 {
374 	struct bch_fs *c = trans->c;
375 	struct btree_iter dst_iter, src_iter = (struct btree_iter) { NULL };
376 	struct bkey_i_subvolume *new_subvol = NULL;
377 	struct bkey_i_subvolume *src_subvol = NULL;
378 	u32 parent = 0, new_nodes[2], snapshot_subvols[2];
379 	int ret = 0;
380 
381 	ret = bch2_bkey_get_empty_slot(trans, &dst_iter,
382 				BTREE_ID_subvolumes, POS(0, U32_MAX));
383 	if (ret == -BCH_ERR_ENOSPC_btree_slot)
384 		ret = -BCH_ERR_ENOSPC_subvolume_create;
385 	if (ret)
386 		return ret;
387 
388 	snapshot_subvols[0] = dst_iter.pos.offset;
389 	snapshot_subvols[1] = src_subvolid;
390 
391 	if (src_subvolid) {
392 		/* Creating a snapshot: */
393 
394 		src_subvol = bch2_bkey_get_mut_typed(trans, &src_iter,
395 				BTREE_ID_subvolumes, POS(0, src_subvolid),
396 				BTREE_ITER_CACHED, subvolume);
397 		ret = PTR_ERR_OR_ZERO(src_subvol);
398 		if (unlikely(ret)) {
399 			bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), c,
400 						"subvolume %u not found", src_subvolid);
401 			goto err;
402 		}
403 
404 		parent = le32_to_cpu(src_subvol->v.snapshot);
405 	}
406 
407 	ret = bch2_snapshot_node_create(trans, parent, new_nodes,
408 					snapshot_subvols,
409 					src_subvolid ? 2 : 1);
410 	if (ret)
411 		goto err;
412 
413 	if (src_subvolid) {
414 		src_subvol->v.snapshot = cpu_to_le32(new_nodes[1]);
415 		ret = bch2_trans_update(trans, &src_iter, &src_subvol->k_i, 0);
416 		if (ret)
417 			goto err;
418 	}
419 
420 	new_subvol = bch2_bkey_alloc(trans, &dst_iter, 0, subvolume);
421 	ret = PTR_ERR_OR_ZERO(new_subvol);
422 	if (ret)
423 		goto err;
424 
425 	new_subvol->v.flags	= 0;
426 	new_subvol->v.snapshot	= cpu_to_le32(new_nodes[0]);
427 	new_subvol->v.inode	= cpu_to_le64(inode);
428 	new_subvol->v.parent	= cpu_to_le32(src_subvolid);
429 	new_subvol->v.otime.lo	= cpu_to_le64(bch2_current_time(c));
430 	new_subvol->v.otime.hi	= 0;
431 
432 	SET_BCH_SUBVOLUME_RO(&new_subvol->v, ro);
433 	SET_BCH_SUBVOLUME_SNAP(&new_subvol->v, src_subvolid != 0);
434 
435 	*new_subvolid	= new_subvol->k.p.offset;
436 	*new_snapshotid	= new_nodes[0];
437 err:
438 	bch2_trans_iter_exit(trans, &src_iter);
439 	bch2_trans_iter_exit(trans, &dst_iter);
440 	return ret;
441 }
442 
443 int bch2_fs_subvolumes_init(struct bch_fs *c)
444 {
445 	INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
446 	INIT_WORK(&c->snapshot_wait_for_pagecache_and_delete_work,
447 		  bch2_subvolume_wait_for_pagecache_and_delete);
448 	mutex_init(&c->snapshots_unlinked_lock);
449 	return 0;
450 }
451