xref: /linux/fs/bcachefs/namei.c (revision 6f2a71a99ebd5dfaa7948a2e9c59eae94b741bd8)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "acl.h"
5 #include "btree_update.h"
6 #include "dirent.h"
7 #include "inode.h"
8 #include "namei.h"
9 #include "subvolume.h"
10 #include "xattr.h"
11 
12 #include <linux/posix_acl.h>
13 
parent_inum(subvol_inum inum,struct bch_inode_unpacked * inode)14 static inline subvol_inum parent_inum(subvol_inum inum, struct bch_inode_unpacked *inode)
15 {
16 	return (subvol_inum) {
17 		.subvol	= inode->bi_parent_subvol ?: inum.subvol,
18 		.inum	= inode->bi_dir,
19 	};
20 }
21 
is_subdir_for_nlink(struct bch_inode_unpacked * inode)22 static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
23 {
24 	return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
25 }
26 
bch2_create_trans(struct btree_trans * trans,subvol_inum dir,struct bch_inode_unpacked * dir_u,struct bch_inode_unpacked * new_inode,const struct qstr * name,uid_t uid,gid_t gid,umode_t mode,dev_t rdev,struct posix_acl * default_acl,struct posix_acl * acl,subvol_inum snapshot_src,unsigned flags)27 int bch2_create_trans(struct btree_trans *trans,
28 		      subvol_inum dir,
29 		      struct bch_inode_unpacked *dir_u,
30 		      struct bch_inode_unpacked *new_inode,
31 		      const struct qstr *name,
32 		      uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
33 		      struct posix_acl *default_acl,
34 		      struct posix_acl *acl,
35 		      subvol_inum snapshot_src,
36 		      unsigned flags)
37 {
38 	struct bch_fs *c = trans->c;
39 	struct btree_iter dir_iter = {};
40 	struct btree_iter inode_iter = {};
41 	subvol_inum new_inum = dir;
42 	u64 now = bch2_current_time(c);
43 	u64 cpu = raw_smp_processor_id();
44 	u64 dir_target;
45 	u32 snapshot;
46 	unsigned dir_type = mode_to_type(mode);
47 	int ret;
48 
49 	ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
50 	if (ret)
51 		goto err;
52 
53 	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir,
54 			      BTREE_ITER_intent|BTREE_ITER_with_updates);
55 	if (ret)
56 		goto err;
57 
58 	if (!(flags & BCH_CREATE_SNAPSHOT)) {
59 		/* Normal create path - allocate a new inode: */
60 		bch2_inode_init_late(c, new_inode, now, uid, gid, mode, rdev, dir_u);
61 
62 		if (flags & BCH_CREATE_TMPFILE)
63 			new_inode->bi_flags |= BCH_INODE_unlinked;
64 
65 		ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
66 		if (ret)
67 			goto err;
68 
69 		snapshot_src = (subvol_inum) { 0 };
70 	} else {
71 		/*
72 		 * Creating a snapshot - we're not allocating a new inode, but
73 		 * we do have to lookup the root inode of the subvolume we're
74 		 * snapshotting and update it (in the new snapshot):
75 		 */
76 
77 		if (!snapshot_src.inum) {
78 			/* Inode wasn't specified, just snapshot: */
79 			struct bch_subvolume s;
80 			ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s);
81 			if (ret)
82 				goto err;
83 
84 			snapshot_src.inum = le64_to_cpu(s.inode);
85 		}
86 
87 		ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
88 				      BTREE_ITER_intent);
89 		if (ret)
90 			goto err;
91 
92 		if (new_inode->bi_subvol != snapshot_src.subvol) {
93 			/* Not a subvolume root: */
94 			ret = -EINVAL;
95 			goto err;
96 		}
97 
98 		/*
99 		 * If we're not root, we have to own the subvolume being
100 		 * snapshotted:
101 		 */
102 		if (uid && new_inode->bi_uid != uid) {
103 			ret = -EPERM;
104 			goto err;
105 		}
106 
107 		flags |= BCH_CREATE_SUBVOL;
108 	}
109 
110 	new_inum.inum	= new_inode->bi_inum;
111 	dir_target	= new_inode->bi_inum;
112 
113 	if (flags & BCH_CREATE_SUBVOL) {
114 		u32 new_subvol, dir_snapshot;
115 
116 		ret = bch2_subvolume_create(trans, new_inode->bi_inum,
117 					    dir.subvol,
118 					    snapshot_src.subvol,
119 					    &new_subvol, &snapshot,
120 					    (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
121 		if (ret)
122 			goto err;
123 
124 		new_inode->bi_parent_subvol	= dir.subvol;
125 		new_inode->bi_subvol		= new_subvol;
126 		new_inum.subvol			= new_subvol;
127 		dir_target			= new_subvol;
128 		dir_type			= DT_SUBVOL;
129 
130 		ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
131 		if (ret)
132 			goto err;
133 
134 		bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot);
135 		ret = bch2_btree_iter_traverse(trans, &dir_iter);
136 		if (ret)
137 			goto err;
138 	}
139 
140 	if (!(flags & BCH_CREATE_SNAPSHOT)) {
141 		if (default_acl) {
142 			ret = bch2_set_acl_trans(trans, new_inum, new_inode,
143 						 default_acl, ACL_TYPE_DEFAULT);
144 			if (ret)
145 				goto err;
146 		}
147 
148 		if (acl) {
149 			ret = bch2_set_acl_trans(trans, new_inum, new_inode,
150 						 acl, ACL_TYPE_ACCESS);
151 			if (ret)
152 				goto err;
153 		}
154 	}
155 
156 	if (!(flags & BCH_CREATE_TMPFILE)) {
157 		struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
158 		u64 dir_offset;
159 
160 		if (is_subdir_for_nlink(new_inode))
161 			dir_u->bi_nlink++;
162 		dir_u->bi_mtime = dir_u->bi_ctime = now;
163 
164 		ret =   bch2_dirent_create(trans, dir, &dir_hash,
165 					   dir_type,
166 					   name,
167 					   dir_target,
168 					   &dir_offset,
169 					   STR_HASH_must_create|BTREE_ITER_with_updates) ?:
170 			bch2_inode_write(trans, &dir_iter, dir_u);
171 		if (ret)
172 			goto err;
173 
174 		new_inode->bi_dir		= dir_u->bi_inum;
175 		new_inode->bi_dir_offset	= dir_offset;
176 	}
177 
178 	if (S_ISDIR(mode)) {
179 		ret = bch2_maybe_propagate_has_case_insensitive(trans,
180 				(subvol_inum) {
181 					new_inode->bi_subvol ?: dir.subvol,
182 					new_inode->bi_inum },
183 				new_inode);
184 		if (ret)
185 			goto err;
186 	}
187 
188 	if (S_ISDIR(mode) &&
189 	    !new_inode->bi_subvol)
190 		new_inode->bi_depth = dir_u->bi_depth + 1;
191 
192 	inode_iter.flags &= ~BTREE_ITER_all_snapshots;
193 	bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot);
194 
195 	ret   = bch2_btree_iter_traverse(trans, &inode_iter) ?:
196 		bch2_inode_write(trans, &inode_iter, new_inode);
197 err:
198 	bch2_trans_iter_exit(trans, &inode_iter);
199 	bch2_trans_iter_exit(trans, &dir_iter);
200 	return ret;
201 }
202 
bch2_link_trans(struct btree_trans * trans,subvol_inum dir,struct bch_inode_unpacked * dir_u,subvol_inum inum,struct bch_inode_unpacked * inode_u,const struct qstr * name)203 int bch2_link_trans(struct btree_trans *trans,
204 		    subvol_inum dir,  struct bch_inode_unpacked *dir_u,
205 		    subvol_inum inum, struct bch_inode_unpacked *inode_u,
206 		    const struct qstr *name)
207 {
208 	struct bch_fs *c = trans->c;
209 	struct btree_iter dir_iter = {};
210 	struct btree_iter inode_iter = {};
211 	struct bch_hash_info dir_hash;
212 	u64 now = bch2_current_time(c);
213 	u64 dir_offset = 0;
214 	int ret;
215 
216 	if (dir.subvol != inum.subvol)
217 		return -EXDEV;
218 
219 	ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_intent);
220 	if (ret)
221 		return ret;
222 
223 	inode_u->bi_ctime = now;
224 	ret = bch2_inode_nlink_inc(inode_u);
225 	if (ret)
226 		goto err;
227 
228 	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
229 	if (ret)
230 		goto err;
231 
232 	if (bch2_reinherit_attrs(inode_u, dir_u)) {
233 		ret = -EXDEV;
234 		goto err;
235 	}
236 
237 	dir_u->bi_mtime = dir_u->bi_ctime = now;
238 
239 	dir_hash = bch2_hash_info_init(c, dir_u);
240 
241 	ret = bch2_dirent_create(trans, dir, &dir_hash,
242 				 mode_to_type(inode_u->bi_mode),
243 				 name, inum.inum,
244 				 &dir_offset,
245 				 STR_HASH_must_create);
246 	if (ret)
247 		goto err;
248 
249 	inode_u->bi_dir		= dir.inum;
250 	inode_u->bi_dir_offset	= dir_offset;
251 
252 	ret =   bch2_inode_write(trans, &dir_iter, dir_u) ?:
253 		bch2_inode_write(trans, &inode_iter, inode_u);
254 err:
255 	bch2_trans_iter_exit(trans, &dir_iter);
256 	bch2_trans_iter_exit(trans, &inode_iter);
257 	return ret;
258 }
259 
bch2_unlink_trans(struct btree_trans * trans,subvol_inum dir,struct bch_inode_unpacked * dir_u,struct bch_inode_unpacked * inode_u,const struct qstr * name,bool deleting_subvol)260 int bch2_unlink_trans(struct btree_trans *trans,
261 		      subvol_inum dir,
262 		      struct bch_inode_unpacked *dir_u,
263 		      struct bch_inode_unpacked *inode_u,
264 		      const struct qstr *name,
265 		      bool deleting_subvol)
266 {
267 	struct bch_fs *c = trans->c;
268 	struct btree_iter dir_iter = {};
269 	struct btree_iter dirent_iter = {};
270 	struct btree_iter inode_iter = {};
271 	struct bch_hash_info dir_hash;
272 	subvol_inum inum;
273 	u64 now = bch2_current_time(c);
274 	struct bkey_s_c k;
275 	int ret;
276 
277 	ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
278 	if (ret)
279 		goto err;
280 
281 	dir_hash = bch2_hash_info_init(c, dir_u);
282 
283 	ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
284 				       name, &inum, BTREE_ITER_intent);
285 	if (ret)
286 		goto err;
287 
288 	ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
289 			      BTREE_ITER_intent);
290 	if (ret)
291 		goto err;
292 
293 	if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
294 		ret = bch2_empty_dir_trans(trans, inum);
295 		if (ret)
296 			goto err;
297 	}
298 
299 	if (deleting_subvol && !inode_u->bi_subvol) {
300 		ret = bch_err_throw(c, ENOENT_not_subvol);
301 		goto err;
302 	}
303 
304 	if (inode_u->bi_subvol) {
305 		/* Recursive subvolume destroy not allowed (yet?) */
306 		ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
307 		if (ret)
308 			goto err;
309 	}
310 
311 	if (deleting_subvol || inode_u->bi_subvol) {
312 		ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
313 		if (ret)
314 			goto err;
315 
316 		k = bch2_btree_iter_peek_slot(trans, &dirent_iter);
317 		ret = bkey_err(k);
318 		if (ret)
319 			goto err;
320 
321 		/*
322 		 * If we're deleting a subvolume, we need to really delete the
323 		 * dirent, not just emit a whiteout in the current snapshot:
324 		 */
325 		bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot);
326 		ret = bch2_btree_iter_traverse(trans, &dirent_iter);
327 		if (ret)
328 			goto err;
329 	} else {
330 		bch2_inode_nlink_dec(trans, inode_u);
331 	}
332 
333 	if (inode_u->bi_dir		== dirent_iter.pos.inode &&
334 	    inode_u->bi_dir_offset	== dirent_iter.pos.offset) {
335 		inode_u->bi_dir		= 0;
336 		inode_u->bi_dir_offset	= 0;
337 	}
338 
339 	dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
340 	dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
341 
342 	ret =   bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
343 				    &dir_hash, &dirent_iter,
344 				    BTREE_UPDATE_internal_snapshot_node) ?:
345 		bch2_inode_write(trans, &dir_iter, dir_u) ?:
346 		bch2_inode_write(trans, &inode_iter, inode_u);
347 err:
348 	bch2_trans_iter_exit(trans, &inode_iter);
349 	bch2_trans_iter_exit(trans, &dirent_iter);
350 	bch2_trans_iter_exit(trans, &dir_iter);
351 	return ret;
352 }
353 
bch2_reinherit_attrs(struct bch_inode_unpacked * dst_u,struct bch_inode_unpacked * src_u)354 bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
355 			  struct bch_inode_unpacked *src_u)
356 {
357 	u64 src, dst;
358 	unsigned id;
359 	bool ret = false;
360 
361 	for (id = 0; id < Inode_opt_nr; id++) {
362 		if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
363 			continue;
364 
365 		/* Skip attributes that were explicitly set on this inode */
366 		if (dst_u->bi_fields_set & (1 << id))
367 			continue;
368 
369 		src = bch2_inode_opt_get(src_u, id);
370 		dst = bch2_inode_opt_get(dst_u, id);
371 
372 		if (src == dst)
373 			continue;
374 
375 		bch2_inode_opt_set(dst_u, id, src);
376 		ret = true;
377 	}
378 
379 	return ret;
380 }
381 
subvol_update_parent(struct btree_trans * trans,u32 subvol,u32 new_parent)382 static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent)
383 {
384 	struct btree_iter iter;
385 	struct bkey_i_subvolume *s =
386 		bch2_bkey_get_mut_typed(trans, &iter,
387 			BTREE_ID_subvolumes, POS(0, subvol),
388 			BTREE_ITER_cached, subvolume);
389 	int ret = PTR_ERR_OR_ZERO(s);
390 	if (ret)
391 		return ret;
392 
393 	s->v.fs_path_parent = cpu_to_le32(new_parent);
394 	bch2_trans_iter_exit(trans, &iter);
395 	return 0;
396 }
397 
bch2_rename_trans(struct btree_trans * trans,subvol_inum src_dir,struct bch_inode_unpacked * src_dir_u,subvol_inum dst_dir,struct bch_inode_unpacked * dst_dir_u,struct bch_inode_unpacked * src_inode_u,struct bch_inode_unpacked * dst_inode_u,const struct qstr * src_name,const struct qstr * dst_name,enum bch_rename_mode mode)398 int bch2_rename_trans(struct btree_trans *trans,
399 		      subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
400 		      subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
401 		      struct bch_inode_unpacked *src_inode_u,
402 		      struct bch_inode_unpacked *dst_inode_u,
403 		      const struct qstr *src_name,
404 		      const struct qstr *dst_name,
405 		      enum bch_rename_mode mode)
406 {
407 	struct bch_fs *c = trans->c;
408 	struct btree_iter src_dir_iter = {};
409 	struct btree_iter dst_dir_iter = {};
410 	struct btree_iter src_inode_iter = {};
411 	struct btree_iter dst_inode_iter = {};
412 	struct bch_hash_info src_hash, dst_hash;
413 	subvol_inum src_inum, dst_inum;
414 	u64 src_offset, dst_offset;
415 	u64 now = bch2_current_time(c);
416 	int ret;
417 
418 	ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
419 			      BTREE_ITER_intent);
420 	if (ret)
421 		goto err;
422 
423 	src_hash = bch2_hash_info_init(c, src_dir_u);
424 
425 	if (!subvol_inum_eq(dst_dir, src_dir)) {
426 		ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
427 				      BTREE_ITER_intent);
428 		if (ret)
429 			goto err;
430 
431 		dst_hash = bch2_hash_info_init(c, dst_dir_u);
432 	} else {
433 		dst_dir_u = src_dir_u;
434 		dst_hash = src_hash;
435 	}
436 
437 	ret = bch2_dirent_rename(trans,
438 				 src_dir, &src_hash,
439 				 dst_dir, &dst_hash,
440 				 src_name, &src_inum, &src_offset,
441 				 dst_name, &dst_inum, &dst_offset,
442 				 mode);
443 	if (ret)
444 		goto err;
445 
446 	ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
447 			      BTREE_ITER_intent);
448 	if (ret)
449 		goto err;
450 
451 	if (dst_inum.inum) {
452 		ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
453 				      BTREE_ITER_intent);
454 		if (ret)
455 			goto err;
456 	}
457 
458 	if (src_inode_u->bi_subvol &&
459 	    dst_dir.subvol != src_inode_u->bi_parent_subvol) {
460 		ret = subvol_update_parent(trans, src_inode_u->bi_subvol, dst_dir.subvol);
461 		if (ret)
462 			goto err;
463 	}
464 
465 	if (mode == BCH_RENAME_EXCHANGE &&
466 	    dst_inode_u->bi_subvol &&
467 	    src_dir.subvol != dst_inode_u->bi_parent_subvol) {
468 		ret = subvol_update_parent(trans, dst_inode_u->bi_subvol, src_dir.subvol);
469 		if (ret)
470 			goto err;
471 	}
472 
473 	/* Can't move across subvolumes, unless it's a subvolume root: */
474 	if (src_dir.subvol != dst_dir.subvol &&
475 	    (!src_inode_u->bi_subvol ||
476 	     (dst_inum.inum && !dst_inode_u->bi_subvol))) {
477 		ret = -EXDEV;
478 		goto err;
479 	}
480 
481 	if (src_inode_u->bi_parent_subvol)
482 		src_inode_u->bi_parent_subvol = dst_dir.subvol;
483 
484 	if ((mode == BCH_RENAME_EXCHANGE) &&
485 	    dst_inode_u->bi_parent_subvol)
486 		dst_inode_u->bi_parent_subvol = src_dir.subvol;
487 
488 	src_inode_u->bi_dir		= dst_dir_u->bi_inum;
489 	src_inode_u->bi_dir_offset	= dst_offset;
490 
491 	if (mode == BCH_RENAME_EXCHANGE) {
492 		dst_inode_u->bi_dir		= src_dir_u->bi_inum;
493 		dst_inode_u->bi_dir_offset	= src_offset;
494 	}
495 
496 	if (mode == BCH_RENAME_OVERWRITE &&
497 	    dst_inode_u->bi_dir		== dst_dir_u->bi_inum &&
498 	    dst_inode_u->bi_dir_offset	== src_offset) {
499 		dst_inode_u->bi_dir		= 0;
500 		dst_inode_u->bi_dir_offset	= 0;
501 	}
502 
503 	if (mode == BCH_RENAME_OVERWRITE) {
504 		if (S_ISDIR(src_inode_u->bi_mode) !=
505 		    S_ISDIR(dst_inode_u->bi_mode)) {
506 			ret = -ENOTDIR;
507 			goto err;
508 		}
509 
510 		if (S_ISDIR(dst_inode_u->bi_mode)) {
511 			ret = bch2_empty_dir_trans(trans, dst_inum);
512 			if (ret)
513 				goto err;
514 		}
515 	}
516 
517 	if (!subvol_inum_eq(dst_dir, src_dir)) {
518 		if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
519 		    S_ISDIR(src_inode_u->bi_mode)) {
520 			ret = -EXDEV;
521 			goto err;
522 		}
523 
524 		if (mode == BCH_RENAME_EXCHANGE &&
525 		    bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
526 		    S_ISDIR(dst_inode_u->bi_mode)) {
527 			ret = -EXDEV;
528 			goto err;
529 		}
530 
531 		ret =   bch2_maybe_propagate_has_case_insensitive(trans, src_inum, src_inode_u) ?:
532 			(mode == BCH_RENAME_EXCHANGE
533 			 ? bch2_maybe_propagate_has_case_insensitive(trans, dst_inum, dst_inode_u)
534 			 : 0);
535 		if (ret)
536 			goto err;
537 
538 		if (is_subdir_for_nlink(src_inode_u)) {
539 			src_dir_u->bi_nlink--;
540 			dst_dir_u->bi_nlink++;
541 		}
542 
543 		if (S_ISDIR(src_inode_u->bi_mode) &&
544 		    !src_inode_u->bi_subvol)
545 			src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
546 
547 		if (mode == BCH_RENAME_EXCHANGE &&
548 		    S_ISDIR(dst_inode_u->bi_mode) &&
549 		    !dst_inode_u->bi_subvol)
550 			dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
551 	}
552 
553 	if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
554 		dst_dir_u->bi_nlink--;
555 		src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
556 	}
557 
558 	if (mode == BCH_RENAME_OVERWRITE)
559 		bch2_inode_nlink_dec(trans, dst_inode_u);
560 
561 	src_dir_u->bi_mtime		= now;
562 	src_dir_u->bi_ctime		= now;
563 
564 	if (src_dir.inum != dst_dir.inum) {
565 		dst_dir_u->bi_mtime	= now;
566 		dst_dir_u->bi_ctime	= now;
567 	}
568 
569 	src_inode_u->bi_ctime		= now;
570 
571 	if (dst_inum.inum)
572 		dst_inode_u->bi_ctime	= now;
573 
574 	ret =   bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
575 		(src_dir.inum != dst_dir.inum
576 		 ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
577 		 : 0) ?:
578 		bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
579 		(dst_inum.inum
580 		 ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
581 		 : 0);
582 err:
583 	bch2_trans_iter_exit(trans, &dst_inode_iter);
584 	bch2_trans_iter_exit(trans, &src_inode_iter);
585 	bch2_trans_iter_exit(trans, &dst_dir_iter);
586 	bch2_trans_iter_exit(trans, &src_dir_iter);
587 	return ret;
588 }
589 
590 /* inum_to_path */
591 
prt_bytes_reversed(struct printbuf * out,const void * b,unsigned n)592 static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n)
593 {
594 	bch2_printbuf_make_room(out, n);
595 
596 	unsigned can_print = min(n, printbuf_remaining(out));
597 
598 	b += n;
599 
600 	for (unsigned i = 0; i < can_print; i++)
601 		out->buf[out->pos++] = *((char *) --b);
602 
603 	printbuf_nul_terminate(out);
604 }
605 
prt_str_reversed(struct printbuf * out,const char * s)606 static inline void prt_str_reversed(struct printbuf *out, const char *s)
607 {
608 	prt_bytes_reversed(out, s, strlen(s));
609 }
610 
reverse_bytes(void * b,size_t n)611 static inline void reverse_bytes(void *b, size_t n)
612 {
613 	char *e = b + n, *s = b;
614 
615 	while (s < e) {
616 		--e;
617 		swap(*s, *e);
618 		s++;
619 	}
620 }
621 
__bch2_inum_to_path(struct btree_trans * trans,u32 subvol,u64 inum,u32 snapshot,struct printbuf * path)622 static int __bch2_inum_to_path(struct btree_trans *trans,
623 			       u32 subvol, u64 inum, u32 snapshot,
624 			       struct printbuf *path)
625 {
626 	unsigned orig_pos = path->pos;
627 	int ret = 0;
628 	DARRAY(subvol_inum) inums = {};
629 
630 	if (!snapshot) {
631 		ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
632 		if (ret)
633 			goto disconnected;
634 	}
635 
636 	while (true) {
637 		subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum };
638 
639 		if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) {
640 			prt_str_reversed(path, "(loop)");
641 			break;
642 		}
643 
644 		ret = darray_push(&inums, n);
645 		if (ret)
646 			goto err;
647 
648 		struct bch_inode_unpacked inode;
649 		ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
650 		if (ret)
651 			goto disconnected;
652 
653 		if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL &&
654 		    inode.bi_inum == BCACHEFS_ROOT_INO)
655 			break;
656 
657 		if (!inode.bi_dir && !inode.bi_dir_offset) {
658 			ret = bch_err_throw(trans->c, ENOENT_inode_no_backpointer);
659 			goto disconnected;
660 		}
661 
662 		inum = inode.bi_dir;
663 		if (inode.bi_parent_subvol) {
664 			subvol = inode.bi_parent_subvol;
665 			ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot);
666 			if (ret)
667 				goto disconnected;
668 		}
669 
670 		struct btree_iter d_iter;
671 		struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
672 				BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot),
673 				0, dirent);
674 		ret = bkey_err(d.s_c);
675 		if (ret)
676 			goto disconnected;
677 
678 		struct qstr dirent_name = bch2_dirent_get_name(d);
679 
680 		prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
681 
682 		prt_char(path, '/');
683 
684 		bch2_trans_iter_exit(trans, &d_iter);
685 	}
686 
687 	if (orig_pos == path->pos)
688 		prt_char(path, '/');
689 out:
690 	ret = path->allocation_failure ? -ENOMEM : 0;
691 	if (ret)
692 		goto err;
693 
694 	reverse_bytes(path->buf + orig_pos, path->pos - orig_pos);
695 	darray_exit(&inums);
696 	return 0;
697 err:
698 	darray_exit(&inums);
699 	return ret;
700 disconnected:
701 	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
702 		goto err;
703 
704 	prt_str_reversed(path, "(disconnected)");
705 	goto out;
706 }
707 
bch2_inum_to_path(struct btree_trans * trans,subvol_inum inum,struct printbuf * path)708 int bch2_inum_to_path(struct btree_trans *trans,
709 		      subvol_inum inum,
710 		      struct printbuf *path)
711 {
712 	return __bch2_inum_to_path(trans, inum.subvol, inum.inum, 0, path);
713 }
714 
bch2_inum_snapshot_to_path(struct btree_trans * trans,u64 inum,u32 snapshot,snapshot_id_list * snapshot_overwrites,struct printbuf * path)715 int bch2_inum_snapshot_to_path(struct btree_trans *trans, u64 inum, u32 snapshot,
716 			       snapshot_id_list *snapshot_overwrites,
717 			       struct printbuf *path)
718 {
719 	return __bch2_inum_to_path(trans, 0, inum, snapshot, path);
720 }
721 
722 /* fsck */
723 
bch2_check_dirent_inode_dirent(struct btree_trans * trans,struct bkey_s_c_dirent d,struct bch_inode_unpacked * target,bool in_fsck)724 static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
725 					  struct bkey_s_c_dirent d,
726 					  struct bch_inode_unpacked *target,
727 					  bool in_fsck)
728 {
729 	struct bch_fs *c = trans->c;
730 	struct printbuf buf = PRINTBUF;
731 	struct btree_iter bp_iter = {};
732 	int ret = 0;
733 
734 	if (inode_points_to_dirent(target, d))
735 		return 0;
736 
737 	if (!bch2_inode_has_backpointer(target)) {
738 		fsck_err_on(S_ISDIR(target->bi_mode),
739 			    trans, inode_dir_missing_backpointer,
740 			    "directory with missing backpointer\n%s",
741 			    (printbuf_reset(&buf),
742 			     bch2_bkey_val_to_text(&buf, c, d.s_c),
743 			     prt_printf(&buf, "\n"),
744 			     bch2_inode_unpacked_to_text(&buf, target),
745 			     buf.buf));
746 
747 		fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
748 			    trans, inode_unlinked_but_has_dirent,
749 			    "inode unlinked but has dirent\n%s",
750 			    (printbuf_reset(&buf),
751 			     bch2_bkey_val_to_text(&buf, c, d.s_c),
752 			     prt_printf(&buf, "\n"),
753 			     bch2_inode_unpacked_to_text(&buf, target),
754 			     buf.buf));
755 
756 		target->bi_flags &= ~BCH_INODE_unlinked;
757 		target->bi_dir		= d.k->p.inode;
758 		target->bi_dir_offset	= d.k->p.offset;
759 		return __bch2_fsck_write_inode(trans, target);
760 	}
761 
762 	struct bkey_s_c_dirent bp_dirent =
763 		bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents,
764 			      SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot),
765 			      0, dirent);
766 	ret = bkey_err(bp_dirent);
767 	if (ret && !bch2_err_matches(ret, ENOENT))
768 		goto err;
769 
770 	bool backpointer_exists = !ret;
771 	ret = 0;
772 
773 	if (!backpointer_exists) {
774 		if (fsck_err(trans, inode_wrong_backpointer,
775 			     "inode %llu:%u has wrong backpointer:\n"
776 			     "got       %llu:%llu\n"
777 			     "should be %llu:%llu",
778 			     target->bi_inum, target->bi_snapshot,
779 			     target->bi_dir,
780 			     target->bi_dir_offset,
781 			     d.k->p.inode,
782 			     d.k->p.offset)) {
783 			target->bi_dir		= d.k->p.inode;
784 			target->bi_dir_offset	= d.k->p.offset;
785 			ret = __bch2_fsck_write_inode(trans, target);
786 		}
787 	} else {
788 		printbuf_reset(&buf);
789 		bch2_bkey_val_to_text(&buf, c, d.s_c);
790 		prt_newline(&buf);
791 		bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
792 
793 		if (S_ISDIR(target->bi_mode) || target->bi_subvol) {
794 			/*
795 			 * XXX: verify connectivity of the other dirent
796 			 * up to the root before removing this one
797 			 *
798 			 * Additionally, bch2_lookup would need to cope with the
799 			 * dirent it found being removed - or should we remove
800 			 * the other one, even though the inode points to it?
801 			 */
802 			if (in_fsck) {
803 				if (fsck_err(trans, inode_dir_multiple_links,
804 					     "%s %llu:%u with multiple links\n%s",
805 					     S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
806 					     target->bi_inum, target->bi_snapshot, buf.buf))
807 					ret = bch2_fsck_remove_dirent(trans, d.k->p);
808 			} else {
809 				bch2_fs_inconsistent(c,
810 						"%s %llu:%u with multiple links\n%s",
811 						S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
812 						target->bi_inum, target->bi_snapshot, buf.buf);
813 			}
814 
815 			goto out;
816 		} else {
817 			/*
818 			 * hardlinked file with nlink 0:
819 			 * We're just adjusting nlink here so check_nlinks() will pick
820 			 * it up, it ignores inodes with nlink 0
821 			 */
822 			if (fsck_err_on(!target->bi_nlink,
823 					trans, inode_multiple_links_but_nlink_0,
824 					"inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
825 					target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
826 				target->bi_nlink++;
827 				target->bi_flags &= ~BCH_INODE_unlinked;
828 				ret = __bch2_fsck_write_inode(trans, target);
829 				if (ret)
830 					goto err;
831 			}
832 		}
833 	}
834 out:
835 err:
836 fsck_err:
837 	bch2_trans_iter_exit(trans, &bp_iter);
838 	printbuf_exit(&buf);
839 	bch_err_fn(c, ret);
840 	return ret;
841 }
842 
__bch2_check_dirent_target(struct btree_trans * trans,struct btree_iter * dirent_iter,struct bkey_s_c_dirent d,struct bch_inode_unpacked * target,bool in_fsck)843 int __bch2_check_dirent_target(struct btree_trans *trans,
844 			       struct btree_iter *dirent_iter,
845 			       struct bkey_s_c_dirent d,
846 			       struct bch_inode_unpacked *target,
847 			       bool in_fsck)
848 {
849 	struct bch_fs *c = trans->c;
850 	struct printbuf buf = PRINTBUF;
851 	int ret = 0;
852 
853 	ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck);
854 	if (ret)
855 		goto err;
856 
857 	if (fsck_err_on(d.v->d_type != inode_d_type(target),
858 			trans, dirent_d_type_wrong,
859 			"incorrect d_type: got %s, should be %s:\n%s",
860 			bch2_d_type_str(d.v->d_type),
861 			bch2_d_type_str(inode_d_type(target)),
862 			(printbuf_reset(&buf),
863 			 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
864 		struct bkey_i_dirent *n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
865 		ret = PTR_ERR_OR_ZERO(n);
866 		if (ret)
867 			goto err;
868 
869 		bkey_reassemble(&n->k_i, d.s_c);
870 		n->v.d_type = inode_d_type(target);
871 		if (n->v.d_type == DT_SUBVOL) {
872 			n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
873 			n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
874 		} else {
875 			n->v.d_inum = cpu_to_le64(target->bi_inum);
876 		}
877 
878 		ret = bch2_trans_update(trans, dirent_iter, &n->k_i,
879 					BTREE_UPDATE_internal_snapshot_node);
880 		if (ret)
881 			goto err;
882 	}
883 err:
884 fsck_err:
885 	printbuf_exit(&buf);
886 	bch_err_fn(c, ret);
887 	return ret;
888 }
889 
890 /*
891  * BCH_INODE_has_case_insensitive:
892  * We have to track whether directories have any descendent directory that is
893  * casefolded - for overlayfs:
894  */
895 
bch2_propagate_has_case_insensitive(struct btree_trans * trans,subvol_inum inum)896 static int bch2_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum)
897 {
898 	struct btree_iter iter = {};
899 	int ret = 0;
900 
901 	while (true) {
902 		struct bch_inode_unpacked inode;
903 		ret = bch2_inode_peek(trans, &iter, &inode, inum,
904 				      BTREE_ITER_intent|BTREE_ITER_with_updates);
905 		if (ret)
906 			break;
907 
908 		if (inode.bi_flags & BCH_INODE_has_case_insensitive)
909 			break;
910 
911 		inode.bi_flags |= BCH_INODE_has_case_insensitive;
912 		ret = bch2_inode_write(trans, &iter, &inode);
913 		if (ret)
914 			break;
915 
916 		bch2_trans_iter_exit(trans, &iter);
917 		if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM))
918 			break;
919 
920 		inum = parent_inum(inum, &inode);
921 	}
922 
923 	bch2_trans_iter_exit(trans, &iter);
924 	return ret;
925 }
926 
bch2_maybe_propagate_has_case_insensitive(struct btree_trans * trans,subvol_inum inum,struct bch_inode_unpacked * inode)927 int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum,
928 					      struct bch_inode_unpacked *inode)
929 {
930 	if (!bch2_inode_casefold(trans->c, inode))
931 		return 0;
932 
933 	inode->bi_flags |= BCH_INODE_has_case_insensitive;
934 
935 	return bch2_propagate_has_case_insensitive(trans, parent_inum(inum, inode));
936 }
937 
bch2_check_inode_has_case_insensitive(struct btree_trans * trans,struct bch_inode_unpacked * inode,snapshot_id_list * snapshot_overwrites,bool * do_update)938 int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
939 					  struct bch_inode_unpacked *inode,
940 					  snapshot_id_list *snapshot_overwrites,
941 					  bool *do_update)
942 {
943 	struct printbuf buf = PRINTBUF;
944 	bool repairing_parents = false;
945 	int ret = 0;
946 
947 	if (!S_ISDIR(inode->bi_mode)) {
948 		/*
949 		 * Old versions set bi_casefold for non dirs, but that's
950 		 * unnecessary and wasteful
951 		 */
952 		if (inode->bi_casefold) {
953 			inode->bi_casefold = 0;
954 			*do_update = true;
955 		}
956 		return 0;
957 	}
958 
959 	if (trans->c->sb.version < bcachefs_metadata_version_inode_has_case_insensitive)
960 		return 0;
961 
962 	if (bch2_inode_casefold(trans->c, inode) &&
963 	    !(inode->bi_flags & BCH_INODE_has_case_insensitive)) {
964 		prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
965 			   inode->bi_inum, inode->bi_snapshot);
966 
967 		ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
968 						 snapshot_overwrites, &buf);
969 		if (ret)
970 			goto err;
971 
972 		if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
973 			inode->bi_flags |= BCH_INODE_has_case_insensitive;
974 			*do_update = true;
975 		}
976 	}
977 
978 	if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
979 		goto out;
980 
981 	struct bch_inode_unpacked dir = *inode;
982 	u32 snapshot = dir.bi_snapshot;
983 
984 	while (!(dir.bi_inum	== BCACHEFS_ROOT_INO &&
985 		 dir.bi_subvol	== BCACHEFS_ROOT_SUBVOL)) {
986 		if (dir.bi_parent_subvol) {
987 			ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
988 			if (ret)
989 				goto err;
990 
991 			snapshot_overwrites = NULL;
992 		}
993 
994 		ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
995 		if (ret)
996 			goto err;
997 
998 		if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
999 			prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
1000 
1001 			ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
1002 							 snapshot_overwrites, &buf);
1003 			if (ret)
1004 				goto err;
1005 
1006 			if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
1007 				dir.bi_flags |= BCH_INODE_has_case_insensitive;
1008 				ret = __bch2_fsck_write_inode(trans, &dir);
1009 				if (ret)
1010 					goto err;
1011 			}
1012 		}
1013 
1014 		/*
1015 		 * We only need to check the first parent, unless we find an
1016 		 * inconsistency
1017 		 */
1018 		if (!repairing_parents)
1019 			break;
1020 	}
1021 out:
1022 err:
1023 fsck_err:
1024 	printbuf_exit(&buf);
1025 	if (ret)
1026 		return ret;
1027 
1028 	if (repairing_parents) {
1029 		return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
1030 			-BCH_ERR_transaction_restart_nested;
1031 	}
1032 
1033 	return 0;
1034 }
1035