1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "acl.h"
5 #include "btree_update.h"
6 #include "dirent.h"
7 #include "inode.h"
8 #include "namei.h"
9 #include "subvolume.h"
10 #include "xattr.h"
11
12 #include <linux/posix_acl.h>
13
parent_inum(subvol_inum inum,struct bch_inode_unpacked * inode)14 static inline subvol_inum parent_inum(subvol_inum inum, struct bch_inode_unpacked *inode)
15 {
16 return (subvol_inum) {
17 .subvol = inode->bi_parent_subvol ?: inum.subvol,
18 .inum = inode->bi_dir,
19 };
20 }
21
is_subdir_for_nlink(struct bch_inode_unpacked * inode)22 static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
23 {
24 return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
25 }
26
bch2_create_trans(struct btree_trans * trans,subvol_inum dir,struct bch_inode_unpacked * dir_u,struct bch_inode_unpacked * new_inode,const struct qstr * name,uid_t uid,gid_t gid,umode_t mode,dev_t rdev,struct posix_acl * default_acl,struct posix_acl * acl,subvol_inum snapshot_src,unsigned flags)27 int bch2_create_trans(struct btree_trans *trans,
28 subvol_inum dir,
29 struct bch_inode_unpacked *dir_u,
30 struct bch_inode_unpacked *new_inode,
31 const struct qstr *name,
32 uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
33 struct posix_acl *default_acl,
34 struct posix_acl *acl,
35 subvol_inum snapshot_src,
36 unsigned flags)
37 {
38 struct bch_fs *c = trans->c;
39 struct btree_iter dir_iter = {};
40 struct btree_iter inode_iter = {};
41 subvol_inum new_inum = dir;
42 u64 now = bch2_current_time(c);
43 u64 cpu = raw_smp_processor_id();
44 u64 dir_target;
45 u32 snapshot;
46 unsigned dir_type = mode_to_type(mode);
47 int ret;
48
49 ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
50 if (ret)
51 goto err;
52
53 ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir,
54 BTREE_ITER_intent|BTREE_ITER_with_updates);
55 if (ret)
56 goto err;
57
58 if (!(flags & BCH_CREATE_SNAPSHOT)) {
59 /* Normal create path - allocate a new inode: */
60 bch2_inode_init_late(c, new_inode, now, uid, gid, mode, rdev, dir_u);
61
62 if (flags & BCH_CREATE_TMPFILE)
63 new_inode->bi_flags |= BCH_INODE_unlinked;
64
65 ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
66 if (ret)
67 goto err;
68
69 snapshot_src = (subvol_inum) { 0 };
70 } else {
71 /*
72 * Creating a snapshot - we're not allocating a new inode, but
73 * we do have to lookup the root inode of the subvolume we're
74 * snapshotting and update it (in the new snapshot):
75 */
76
77 if (!snapshot_src.inum) {
78 /* Inode wasn't specified, just snapshot: */
79 struct bch_subvolume s;
80 ret = bch2_subvolume_get(trans, snapshot_src.subvol, true, &s);
81 if (ret)
82 goto err;
83
84 snapshot_src.inum = le64_to_cpu(s.inode);
85 }
86
87 ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
88 BTREE_ITER_intent);
89 if (ret)
90 goto err;
91
92 if (new_inode->bi_subvol != snapshot_src.subvol) {
93 /* Not a subvolume root: */
94 ret = -EINVAL;
95 goto err;
96 }
97
98 /*
99 * If we're not root, we have to own the subvolume being
100 * snapshotted:
101 */
102 if (uid && new_inode->bi_uid != uid) {
103 ret = -EPERM;
104 goto err;
105 }
106
107 flags |= BCH_CREATE_SUBVOL;
108 }
109
110 new_inum.inum = new_inode->bi_inum;
111 dir_target = new_inode->bi_inum;
112
113 if (flags & BCH_CREATE_SUBVOL) {
114 u32 new_subvol, dir_snapshot;
115
116 ret = bch2_subvolume_create(trans, new_inode->bi_inum,
117 dir.subvol,
118 snapshot_src.subvol,
119 &new_subvol, &snapshot,
120 (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
121 if (ret)
122 goto err;
123
124 new_inode->bi_parent_subvol = dir.subvol;
125 new_inode->bi_subvol = new_subvol;
126 new_inum.subvol = new_subvol;
127 dir_target = new_subvol;
128 dir_type = DT_SUBVOL;
129
130 ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
131 if (ret)
132 goto err;
133
134 bch2_btree_iter_set_snapshot(trans, &dir_iter, dir_snapshot);
135 ret = bch2_btree_iter_traverse(trans, &dir_iter);
136 if (ret)
137 goto err;
138 }
139
140 if (!(flags & BCH_CREATE_SNAPSHOT)) {
141 if (default_acl) {
142 ret = bch2_set_acl_trans(trans, new_inum, new_inode,
143 default_acl, ACL_TYPE_DEFAULT);
144 if (ret)
145 goto err;
146 }
147
148 if (acl) {
149 ret = bch2_set_acl_trans(trans, new_inum, new_inode,
150 acl, ACL_TYPE_ACCESS);
151 if (ret)
152 goto err;
153 }
154 }
155
156 if (!(flags & BCH_CREATE_TMPFILE)) {
157 struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
158 u64 dir_offset;
159
160 if (is_subdir_for_nlink(new_inode))
161 dir_u->bi_nlink++;
162 dir_u->bi_mtime = dir_u->bi_ctime = now;
163
164 ret = bch2_dirent_create(trans, dir, &dir_hash,
165 dir_type,
166 name,
167 dir_target,
168 &dir_offset,
169 STR_HASH_must_create|BTREE_ITER_with_updates) ?:
170 bch2_inode_write(trans, &dir_iter, dir_u);
171 if (ret)
172 goto err;
173
174 new_inode->bi_dir = dir_u->bi_inum;
175 new_inode->bi_dir_offset = dir_offset;
176 }
177
178 if (S_ISDIR(mode)) {
179 ret = bch2_maybe_propagate_has_case_insensitive(trans,
180 (subvol_inum) {
181 new_inode->bi_subvol ?: dir.subvol,
182 new_inode->bi_inum },
183 new_inode);
184 if (ret)
185 goto err;
186 }
187
188 if (S_ISDIR(mode) &&
189 !new_inode->bi_subvol)
190 new_inode->bi_depth = dir_u->bi_depth + 1;
191
192 inode_iter.flags &= ~BTREE_ITER_all_snapshots;
193 bch2_btree_iter_set_snapshot(trans, &inode_iter, snapshot);
194
195 ret = bch2_btree_iter_traverse(trans, &inode_iter) ?:
196 bch2_inode_write(trans, &inode_iter, new_inode);
197 err:
198 bch2_trans_iter_exit(trans, &inode_iter);
199 bch2_trans_iter_exit(trans, &dir_iter);
200 return ret;
201 }
202
bch2_link_trans(struct btree_trans * trans,subvol_inum dir,struct bch_inode_unpacked * dir_u,subvol_inum inum,struct bch_inode_unpacked * inode_u,const struct qstr * name)203 int bch2_link_trans(struct btree_trans *trans,
204 subvol_inum dir, struct bch_inode_unpacked *dir_u,
205 subvol_inum inum, struct bch_inode_unpacked *inode_u,
206 const struct qstr *name)
207 {
208 struct bch_fs *c = trans->c;
209 struct btree_iter dir_iter = {};
210 struct btree_iter inode_iter = {};
211 struct bch_hash_info dir_hash;
212 u64 now = bch2_current_time(c);
213 u64 dir_offset = 0;
214 int ret;
215
216 if (dir.subvol != inum.subvol)
217 return -EXDEV;
218
219 ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_intent);
220 if (ret)
221 return ret;
222
223 inode_u->bi_ctime = now;
224 ret = bch2_inode_nlink_inc(inode_u);
225 if (ret)
226 goto err;
227
228 ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
229 if (ret)
230 goto err;
231
232 if (bch2_reinherit_attrs(inode_u, dir_u)) {
233 ret = -EXDEV;
234 goto err;
235 }
236
237 dir_u->bi_mtime = dir_u->bi_ctime = now;
238
239 dir_hash = bch2_hash_info_init(c, dir_u);
240
241 ret = bch2_dirent_create(trans, dir, &dir_hash,
242 mode_to_type(inode_u->bi_mode),
243 name, inum.inum,
244 &dir_offset,
245 STR_HASH_must_create);
246 if (ret)
247 goto err;
248
249 inode_u->bi_dir = dir.inum;
250 inode_u->bi_dir_offset = dir_offset;
251
252 ret = bch2_inode_write(trans, &dir_iter, dir_u) ?:
253 bch2_inode_write(trans, &inode_iter, inode_u);
254 err:
255 bch2_trans_iter_exit(trans, &dir_iter);
256 bch2_trans_iter_exit(trans, &inode_iter);
257 return ret;
258 }
259
bch2_unlink_trans(struct btree_trans * trans,subvol_inum dir,struct bch_inode_unpacked * dir_u,struct bch_inode_unpacked * inode_u,const struct qstr * name,bool deleting_subvol)260 int bch2_unlink_trans(struct btree_trans *trans,
261 subvol_inum dir,
262 struct bch_inode_unpacked *dir_u,
263 struct bch_inode_unpacked *inode_u,
264 const struct qstr *name,
265 bool deleting_subvol)
266 {
267 struct bch_fs *c = trans->c;
268 struct btree_iter dir_iter = {};
269 struct btree_iter dirent_iter = {};
270 struct btree_iter inode_iter = {};
271 struct bch_hash_info dir_hash;
272 subvol_inum inum;
273 u64 now = bch2_current_time(c);
274 struct bkey_s_c k;
275 int ret;
276
277 ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
278 if (ret)
279 goto err;
280
281 dir_hash = bch2_hash_info_init(c, dir_u);
282
283 ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
284 name, &inum, BTREE_ITER_intent);
285 if (ret)
286 goto err;
287
288 ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
289 BTREE_ITER_intent);
290 if (ret)
291 goto err;
292
293 if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
294 ret = bch2_empty_dir_trans(trans, inum);
295 if (ret)
296 goto err;
297 }
298
299 if (deleting_subvol && !inode_u->bi_subvol) {
300 ret = bch_err_throw(c, ENOENT_not_subvol);
301 goto err;
302 }
303
304 if (inode_u->bi_subvol) {
305 /* Recursive subvolume destroy not allowed (yet?) */
306 ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
307 if (ret)
308 goto err;
309 }
310
311 if (deleting_subvol || inode_u->bi_subvol) {
312 ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
313 if (ret)
314 goto err;
315
316 k = bch2_btree_iter_peek_slot(trans, &dirent_iter);
317 ret = bkey_err(k);
318 if (ret)
319 goto err;
320
321 /*
322 * If we're deleting a subvolume, we need to really delete the
323 * dirent, not just emit a whiteout in the current snapshot:
324 */
325 bch2_btree_iter_set_snapshot(trans, &dirent_iter, k.k->p.snapshot);
326 ret = bch2_btree_iter_traverse(trans, &dirent_iter);
327 if (ret)
328 goto err;
329 } else {
330 bch2_inode_nlink_dec(trans, inode_u);
331 }
332
333 if (inode_u->bi_dir == dirent_iter.pos.inode &&
334 inode_u->bi_dir_offset == dirent_iter.pos.offset) {
335 inode_u->bi_dir = 0;
336 inode_u->bi_dir_offset = 0;
337 }
338
339 dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
340 dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
341
342 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
343 &dir_hash, &dirent_iter,
344 BTREE_UPDATE_internal_snapshot_node) ?:
345 bch2_inode_write(trans, &dir_iter, dir_u) ?:
346 bch2_inode_write(trans, &inode_iter, inode_u);
347 err:
348 bch2_trans_iter_exit(trans, &inode_iter);
349 bch2_trans_iter_exit(trans, &dirent_iter);
350 bch2_trans_iter_exit(trans, &dir_iter);
351 return ret;
352 }
353
bch2_reinherit_attrs(struct bch_inode_unpacked * dst_u,struct bch_inode_unpacked * src_u)354 bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
355 struct bch_inode_unpacked *src_u)
356 {
357 u64 src, dst;
358 unsigned id;
359 bool ret = false;
360
361 for (id = 0; id < Inode_opt_nr; id++) {
362 if (!S_ISDIR(dst_u->bi_mode) && id == Inode_opt_casefold)
363 continue;
364
365 /* Skip attributes that were explicitly set on this inode */
366 if (dst_u->bi_fields_set & (1 << id))
367 continue;
368
369 src = bch2_inode_opt_get(src_u, id);
370 dst = bch2_inode_opt_get(dst_u, id);
371
372 if (src == dst)
373 continue;
374
375 bch2_inode_opt_set(dst_u, id, src);
376 ret = true;
377 }
378
379 return ret;
380 }
381
subvol_update_parent(struct btree_trans * trans,u32 subvol,u32 new_parent)382 static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent)
383 {
384 struct btree_iter iter;
385 struct bkey_i_subvolume *s =
386 bch2_bkey_get_mut_typed(trans, &iter,
387 BTREE_ID_subvolumes, POS(0, subvol),
388 BTREE_ITER_cached, subvolume);
389 int ret = PTR_ERR_OR_ZERO(s);
390 if (ret)
391 return ret;
392
393 s->v.fs_path_parent = cpu_to_le32(new_parent);
394 bch2_trans_iter_exit(trans, &iter);
395 return 0;
396 }
397
bch2_rename_trans(struct btree_trans * trans,subvol_inum src_dir,struct bch_inode_unpacked * src_dir_u,subvol_inum dst_dir,struct bch_inode_unpacked * dst_dir_u,struct bch_inode_unpacked * src_inode_u,struct bch_inode_unpacked * dst_inode_u,const struct qstr * src_name,const struct qstr * dst_name,enum bch_rename_mode mode)398 int bch2_rename_trans(struct btree_trans *trans,
399 subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
400 subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
401 struct bch_inode_unpacked *src_inode_u,
402 struct bch_inode_unpacked *dst_inode_u,
403 const struct qstr *src_name,
404 const struct qstr *dst_name,
405 enum bch_rename_mode mode)
406 {
407 struct bch_fs *c = trans->c;
408 struct btree_iter src_dir_iter = {};
409 struct btree_iter dst_dir_iter = {};
410 struct btree_iter src_inode_iter = {};
411 struct btree_iter dst_inode_iter = {};
412 struct bch_hash_info src_hash, dst_hash;
413 subvol_inum src_inum, dst_inum;
414 u64 src_offset, dst_offset;
415 u64 now = bch2_current_time(c);
416 int ret;
417
418 ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
419 BTREE_ITER_intent);
420 if (ret)
421 goto err;
422
423 src_hash = bch2_hash_info_init(c, src_dir_u);
424
425 if (!subvol_inum_eq(dst_dir, src_dir)) {
426 ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
427 BTREE_ITER_intent);
428 if (ret)
429 goto err;
430
431 dst_hash = bch2_hash_info_init(c, dst_dir_u);
432 } else {
433 dst_dir_u = src_dir_u;
434 dst_hash = src_hash;
435 }
436
437 ret = bch2_dirent_rename(trans,
438 src_dir, &src_hash,
439 dst_dir, &dst_hash,
440 src_name, &src_inum, &src_offset,
441 dst_name, &dst_inum, &dst_offset,
442 mode);
443 if (ret)
444 goto err;
445
446 ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
447 BTREE_ITER_intent);
448 if (ret)
449 goto err;
450
451 if (dst_inum.inum) {
452 ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
453 BTREE_ITER_intent);
454 if (ret)
455 goto err;
456 }
457
458 if (src_inode_u->bi_subvol &&
459 dst_dir.subvol != src_inode_u->bi_parent_subvol) {
460 ret = subvol_update_parent(trans, src_inode_u->bi_subvol, dst_dir.subvol);
461 if (ret)
462 goto err;
463 }
464
465 if (mode == BCH_RENAME_EXCHANGE &&
466 dst_inode_u->bi_subvol &&
467 src_dir.subvol != dst_inode_u->bi_parent_subvol) {
468 ret = subvol_update_parent(trans, dst_inode_u->bi_subvol, src_dir.subvol);
469 if (ret)
470 goto err;
471 }
472
473 /* Can't move across subvolumes, unless it's a subvolume root: */
474 if (src_dir.subvol != dst_dir.subvol &&
475 (!src_inode_u->bi_subvol ||
476 (dst_inum.inum && !dst_inode_u->bi_subvol))) {
477 ret = -EXDEV;
478 goto err;
479 }
480
481 if (src_inode_u->bi_parent_subvol)
482 src_inode_u->bi_parent_subvol = dst_dir.subvol;
483
484 if ((mode == BCH_RENAME_EXCHANGE) &&
485 dst_inode_u->bi_parent_subvol)
486 dst_inode_u->bi_parent_subvol = src_dir.subvol;
487
488 src_inode_u->bi_dir = dst_dir_u->bi_inum;
489 src_inode_u->bi_dir_offset = dst_offset;
490
491 if (mode == BCH_RENAME_EXCHANGE) {
492 dst_inode_u->bi_dir = src_dir_u->bi_inum;
493 dst_inode_u->bi_dir_offset = src_offset;
494 }
495
496 if (mode == BCH_RENAME_OVERWRITE &&
497 dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
498 dst_inode_u->bi_dir_offset == src_offset) {
499 dst_inode_u->bi_dir = 0;
500 dst_inode_u->bi_dir_offset = 0;
501 }
502
503 if (mode == BCH_RENAME_OVERWRITE) {
504 if (S_ISDIR(src_inode_u->bi_mode) !=
505 S_ISDIR(dst_inode_u->bi_mode)) {
506 ret = -ENOTDIR;
507 goto err;
508 }
509
510 if (S_ISDIR(dst_inode_u->bi_mode)) {
511 ret = bch2_empty_dir_trans(trans, dst_inum);
512 if (ret)
513 goto err;
514 }
515 }
516
517 if (!subvol_inum_eq(dst_dir, src_dir)) {
518 if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
519 S_ISDIR(src_inode_u->bi_mode)) {
520 ret = -EXDEV;
521 goto err;
522 }
523
524 if (mode == BCH_RENAME_EXCHANGE &&
525 bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
526 S_ISDIR(dst_inode_u->bi_mode)) {
527 ret = -EXDEV;
528 goto err;
529 }
530
531 ret = bch2_maybe_propagate_has_case_insensitive(trans, src_inum, src_inode_u) ?:
532 (mode == BCH_RENAME_EXCHANGE
533 ? bch2_maybe_propagate_has_case_insensitive(trans, dst_inum, dst_inode_u)
534 : 0);
535 if (ret)
536 goto err;
537
538 if (is_subdir_for_nlink(src_inode_u)) {
539 src_dir_u->bi_nlink--;
540 dst_dir_u->bi_nlink++;
541 }
542
543 if (S_ISDIR(src_inode_u->bi_mode) &&
544 !src_inode_u->bi_subvol)
545 src_inode_u->bi_depth = dst_dir_u->bi_depth + 1;
546
547 if (mode == BCH_RENAME_EXCHANGE &&
548 S_ISDIR(dst_inode_u->bi_mode) &&
549 !dst_inode_u->bi_subvol)
550 dst_inode_u->bi_depth = src_dir_u->bi_depth + 1;
551 }
552
553 if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
554 dst_dir_u->bi_nlink--;
555 src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
556 }
557
558 if (mode == BCH_RENAME_OVERWRITE)
559 bch2_inode_nlink_dec(trans, dst_inode_u);
560
561 src_dir_u->bi_mtime = now;
562 src_dir_u->bi_ctime = now;
563
564 if (src_dir.inum != dst_dir.inum) {
565 dst_dir_u->bi_mtime = now;
566 dst_dir_u->bi_ctime = now;
567 }
568
569 src_inode_u->bi_ctime = now;
570
571 if (dst_inum.inum)
572 dst_inode_u->bi_ctime = now;
573
574 ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
575 (src_dir.inum != dst_dir.inum
576 ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
577 : 0) ?:
578 bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
579 (dst_inum.inum
580 ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
581 : 0);
582 err:
583 bch2_trans_iter_exit(trans, &dst_inode_iter);
584 bch2_trans_iter_exit(trans, &src_inode_iter);
585 bch2_trans_iter_exit(trans, &dst_dir_iter);
586 bch2_trans_iter_exit(trans, &src_dir_iter);
587 return ret;
588 }
589
590 /* inum_to_path */
591
prt_bytes_reversed(struct printbuf * out,const void * b,unsigned n)592 static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n)
593 {
594 bch2_printbuf_make_room(out, n);
595
596 unsigned can_print = min(n, printbuf_remaining(out));
597
598 b += n;
599
600 for (unsigned i = 0; i < can_print; i++)
601 out->buf[out->pos++] = *((char *) --b);
602
603 printbuf_nul_terminate(out);
604 }
605
prt_str_reversed(struct printbuf * out,const char * s)606 static inline void prt_str_reversed(struct printbuf *out, const char *s)
607 {
608 prt_bytes_reversed(out, s, strlen(s));
609 }
610
reverse_bytes(void * b,size_t n)611 static inline void reverse_bytes(void *b, size_t n)
612 {
613 char *e = b + n, *s = b;
614
615 while (s < e) {
616 --e;
617 swap(*s, *e);
618 s++;
619 }
620 }
621
__bch2_inum_to_path(struct btree_trans * trans,u32 subvol,u64 inum,u32 snapshot,struct printbuf * path)622 static int __bch2_inum_to_path(struct btree_trans *trans,
623 u32 subvol, u64 inum, u32 snapshot,
624 struct printbuf *path)
625 {
626 unsigned orig_pos = path->pos;
627 int ret = 0;
628 DARRAY(subvol_inum) inums = {};
629
630 if (!snapshot) {
631 ret = bch2_subvolume_get_snapshot(trans, subvol, &snapshot);
632 if (ret)
633 goto disconnected;
634 }
635
636 while (true) {
637 subvol_inum n = (subvol_inum) { subvol ?: snapshot, inum };
638
639 if (darray_find_p(inums, i, i->subvol == n.subvol && i->inum == n.inum)) {
640 prt_str_reversed(path, "(loop)");
641 break;
642 }
643
644 ret = darray_push(&inums, n);
645 if (ret)
646 goto err;
647
648 struct bch_inode_unpacked inode;
649 ret = bch2_inode_find_by_inum_snapshot(trans, inum, snapshot, &inode, 0);
650 if (ret)
651 goto disconnected;
652
653 if (inode.bi_subvol == BCACHEFS_ROOT_SUBVOL &&
654 inode.bi_inum == BCACHEFS_ROOT_INO)
655 break;
656
657 if (!inode.bi_dir && !inode.bi_dir_offset) {
658 ret = bch_err_throw(trans->c, ENOENT_inode_no_backpointer);
659 goto disconnected;
660 }
661
662 inum = inode.bi_dir;
663 if (inode.bi_parent_subvol) {
664 subvol = inode.bi_parent_subvol;
665 ret = bch2_subvolume_get_snapshot(trans, inode.bi_parent_subvol, &snapshot);
666 if (ret)
667 goto disconnected;
668 }
669
670 struct btree_iter d_iter;
671 struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
672 BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot),
673 0, dirent);
674 ret = bkey_err(d.s_c);
675 if (ret)
676 goto disconnected;
677
678 struct qstr dirent_name = bch2_dirent_get_name(d);
679
680 prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
681
682 prt_char(path, '/');
683
684 bch2_trans_iter_exit(trans, &d_iter);
685 }
686
687 if (orig_pos == path->pos)
688 prt_char(path, '/');
689 out:
690 ret = path->allocation_failure ? -ENOMEM : 0;
691 if (ret)
692 goto err;
693
694 reverse_bytes(path->buf + orig_pos, path->pos - orig_pos);
695 darray_exit(&inums);
696 return 0;
697 err:
698 darray_exit(&inums);
699 return ret;
700 disconnected:
701 if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
702 goto err;
703
704 prt_str_reversed(path, "(disconnected)");
705 goto out;
706 }
707
bch2_inum_to_path(struct btree_trans * trans,subvol_inum inum,struct printbuf * path)708 int bch2_inum_to_path(struct btree_trans *trans,
709 subvol_inum inum,
710 struct printbuf *path)
711 {
712 return __bch2_inum_to_path(trans, inum.subvol, inum.inum, 0, path);
713 }
714
bch2_inum_snapshot_to_path(struct btree_trans * trans,u64 inum,u32 snapshot,snapshot_id_list * snapshot_overwrites,struct printbuf * path)715 int bch2_inum_snapshot_to_path(struct btree_trans *trans, u64 inum, u32 snapshot,
716 snapshot_id_list *snapshot_overwrites,
717 struct printbuf *path)
718 {
719 return __bch2_inum_to_path(trans, 0, inum, snapshot, path);
720 }
721
722 /* fsck */
723
bch2_check_dirent_inode_dirent(struct btree_trans * trans,struct bkey_s_c_dirent d,struct bch_inode_unpacked * target,bool in_fsck)724 static int bch2_check_dirent_inode_dirent(struct btree_trans *trans,
725 struct bkey_s_c_dirent d,
726 struct bch_inode_unpacked *target,
727 bool in_fsck)
728 {
729 struct bch_fs *c = trans->c;
730 struct printbuf buf = PRINTBUF;
731 struct btree_iter bp_iter = {};
732 int ret = 0;
733
734 if (inode_points_to_dirent(target, d))
735 return 0;
736
737 if (!bch2_inode_has_backpointer(target)) {
738 fsck_err_on(S_ISDIR(target->bi_mode),
739 trans, inode_dir_missing_backpointer,
740 "directory with missing backpointer\n%s",
741 (printbuf_reset(&buf),
742 bch2_bkey_val_to_text(&buf, c, d.s_c),
743 prt_printf(&buf, "\n"),
744 bch2_inode_unpacked_to_text(&buf, target),
745 buf.buf));
746
747 fsck_err_on(target->bi_flags & BCH_INODE_unlinked,
748 trans, inode_unlinked_but_has_dirent,
749 "inode unlinked but has dirent\n%s",
750 (printbuf_reset(&buf),
751 bch2_bkey_val_to_text(&buf, c, d.s_c),
752 prt_printf(&buf, "\n"),
753 bch2_inode_unpacked_to_text(&buf, target),
754 buf.buf));
755
756 target->bi_flags &= ~BCH_INODE_unlinked;
757 target->bi_dir = d.k->p.inode;
758 target->bi_dir_offset = d.k->p.offset;
759 return __bch2_fsck_write_inode(trans, target);
760 }
761
762 struct bkey_s_c_dirent bp_dirent =
763 bch2_bkey_get_iter_typed(trans, &bp_iter, BTREE_ID_dirents,
764 SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot),
765 0, dirent);
766 ret = bkey_err(bp_dirent);
767 if (ret && !bch2_err_matches(ret, ENOENT))
768 goto err;
769
770 bool backpointer_exists = !ret;
771 ret = 0;
772
773 if (!backpointer_exists) {
774 if (fsck_err(trans, inode_wrong_backpointer,
775 "inode %llu:%u has wrong backpointer:\n"
776 "got %llu:%llu\n"
777 "should be %llu:%llu",
778 target->bi_inum, target->bi_snapshot,
779 target->bi_dir,
780 target->bi_dir_offset,
781 d.k->p.inode,
782 d.k->p.offset)) {
783 target->bi_dir = d.k->p.inode;
784 target->bi_dir_offset = d.k->p.offset;
785 ret = __bch2_fsck_write_inode(trans, target);
786 }
787 } else {
788 printbuf_reset(&buf);
789 bch2_bkey_val_to_text(&buf, c, d.s_c);
790 prt_newline(&buf);
791 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c);
792
793 if (S_ISDIR(target->bi_mode) || target->bi_subvol) {
794 /*
795 * XXX: verify connectivity of the other dirent
796 * up to the root before removing this one
797 *
798 * Additionally, bch2_lookup would need to cope with the
799 * dirent it found being removed - or should we remove
800 * the other one, even though the inode points to it?
801 */
802 if (in_fsck) {
803 if (fsck_err(trans, inode_dir_multiple_links,
804 "%s %llu:%u with multiple links\n%s",
805 S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
806 target->bi_inum, target->bi_snapshot, buf.buf))
807 ret = bch2_fsck_remove_dirent(trans, d.k->p);
808 } else {
809 bch2_fs_inconsistent(c,
810 "%s %llu:%u with multiple links\n%s",
811 S_ISDIR(target->bi_mode) ? "directory" : "subvolume",
812 target->bi_inum, target->bi_snapshot, buf.buf);
813 }
814
815 goto out;
816 } else {
817 /*
818 * hardlinked file with nlink 0:
819 * We're just adjusting nlink here so check_nlinks() will pick
820 * it up, it ignores inodes with nlink 0
821 */
822 if (fsck_err_on(!target->bi_nlink,
823 trans, inode_multiple_links_but_nlink_0,
824 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s",
825 target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) {
826 target->bi_nlink++;
827 target->bi_flags &= ~BCH_INODE_unlinked;
828 ret = __bch2_fsck_write_inode(trans, target);
829 if (ret)
830 goto err;
831 }
832 }
833 }
834 out:
835 err:
836 fsck_err:
837 bch2_trans_iter_exit(trans, &bp_iter);
838 printbuf_exit(&buf);
839 bch_err_fn(c, ret);
840 return ret;
841 }
842
__bch2_check_dirent_target(struct btree_trans * trans,struct btree_iter * dirent_iter,struct bkey_s_c_dirent d,struct bch_inode_unpacked * target,bool in_fsck)843 int __bch2_check_dirent_target(struct btree_trans *trans,
844 struct btree_iter *dirent_iter,
845 struct bkey_s_c_dirent d,
846 struct bch_inode_unpacked *target,
847 bool in_fsck)
848 {
849 struct bch_fs *c = trans->c;
850 struct printbuf buf = PRINTBUF;
851 int ret = 0;
852
853 ret = bch2_check_dirent_inode_dirent(trans, d, target, in_fsck);
854 if (ret)
855 goto err;
856
857 if (fsck_err_on(d.v->d_type != inode_d_type(target),
858 trans, dirent_d_type_wrong,
859 "incorrect d_type: got %s, should be %s:\n%s",
860 bch2_d_type_str(d.v->d_type),
861 bch2_d_type_str(inode_d_type(target)),
862 (printbuf_reset(&buf),
863 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
864 struct bkey_i_dirent *n = bch2_trans_kmalloc(trans, bkey_bytes(d.k));
865 ret = PTR_ERR_OR_ZERO(n);
866 if (ret)
867 goto err;
868
869 bkey_reassemble(&n->k_i, d.s_c);
870 n->v.d_type = inode_d_type(target);
871 if (n->v.d_type == DT_SUBVOL) {
872 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol);
873 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol);
874 } else {
875 n->v.d_inum = cpu_to_le64(target->bi_inum);
876 }
877
878 ret = bch2_trans_update(trans, dirent_iter, &n->k_i,
879 BTREE_UPDATE_internal_snapshot_node);
880 if (ret)
881 goto err;
882 }
883 err:
884 fsck_err:
885 printbuf_exit(&buf);
886 bch_err_fn(c, ret);
887 return ret;
888 }
889
890 /*
891 * BCH_INODE_has_case_insensitive:
892 * We have to track whether directories have any descendent directory that is
893 * casefolded - for overlayfs:
894 */
895
bch2_propagate_has_case_insensitive(struct btree_trans * trans,subvol_inum inum)896 static int bch2_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum)
897 {
898 struct btree_iter iter = {};
899 int ret = 0;
900
901 while (true) {
902 struct bch_inode_unpacked inode;
903 ret = bch2_inode_peek(trans, &iter, &inode, inum,
904 BTREE_ITER_intent|BTREE_ITER_with_updates);
905 if (ret)
906 break;
907
908 if (inode.bi_flags & BCH_INODE_has_case_insensitive)
909 break;
910
911 inode.bi_flags |= BCH_INODE_has_case_insensitive;
912 ret = bch2_inode_write(trans, &iter, &inode);
913 if (ret)
914 break;
915
916 bch2_trans_iter_exit(trans, &iter);
917 if (subvol_inum_eq(inum, BCACHEFS_ROOT_SUBVOL_INUM))
918 break;
919
920 inum = parent_inum(inum, &inode);
921 }
922
923 bch2_trans_iter_exit(trans, &iter);
924 return ret;
925 }
926
bch2_maybe_propagate_has_case_insensitive(struct btree_trans * trans,subvol_inum inum,struct bch_inode_unpacked * inode)927 int bch2_maybe_propagate_has_case_insensitive(struct btree_trans *trans, subvol_inum inum,
928 struct bch_inode_unpacked *inode)
929 {
930 if (!bch2_inode_casefold(trans->c, inode))
931 return 0;
932
933 inode->bi_flags |= BCH_INODE_has_case_insensitive;
934
935 return bch2_propagate_has_case_insensitive(trans, parent_inum(inum, inode));
936 }
937
bch2_check_inode_has_case_insensitive(struct btree_trans * trans,struct bch_inode_unpacked * inode,snapshot_id_list * snapshot_overwrites,bool * do_update)938 int bch2_check_inode_has_case_insensitive(struct btree_trans *trans,
939 struct bch_inode_unpacked *inode,
940 snapshot_id_list *snapshot_overwrites,
941 bool *do_update)
942 {
943 struct printbuf buf = PRINTBUF;
944 bool repairing_parents = false;
945 int ret = 0;
946
947 if (!S_ISDIR(inode->bi_mode)) {
948 /*
949 * Old versions set bi_casefold for non dirs, but that's
950 * unnecessary and wasteful
951 */
952 if (inode->bi_casefold) {
953 inode->bi_casefold = 0;
954 *do_update = true;
955 }
956 return 0;
957 }
958
959 if (trans->c->sb.version < bcachefs_metadata_version_inode_has_case_insensitive)
960 return 0;
961
962 if (bch2_inode_casefold(trans->c, inode) &&
963 !(inode->bi_flags & BCH_INODE_has_case_insensitive)) {
964 prt_printf(&buf, "casefolded dir with has_case_insensitive not set\ninum %llu:%u ",
965 inode->bi_inum, inode->bi_snapshot);
966
967 ret = bch2_inum_snapshot_to_path(trans, inode->bi_inum, inode->bi_snapshot,
968 snapshot_overwrites, &buf);
969 if (ret)
970 goto err;
971
972 if (fsck_err(trans, inode_has_case_insensitive_not_set, "%s", buf.buf)) {
973 inode->bi_flags |= BCH_INODE_has_case_insensitive;
974 *do_update = true;
975 }
976 }
977
978 if (!(inode->bi_flags & BCH_INODE_has_case_insensitive))
979 goto out;
980
981 struct bch_inode_unpacked dir = *inode;
982 u32 snapshot = dir.bi_snapshot;
983
984 while (!(dir.bi_inum == BCACHEFS_ROOT_INO &&
985 dir.bi_subvol == BCACHEFS_ROOT_SUBVOL)) {
986 if (dir.bi_parent_subvol) {
987 ret = bch2_subvolume_get_snapshot(trans, dir.bi_parent_subvol, &snapshot);
988 if (ret)
989 goto err;
990
991 snapshot_overwrites = NULL;
992 }
993
994 ret = bch2_inode_find_by_inum_snapshot(trans, dir.bi_dir, snapshot, &dir, 0);
995 if (ret)
996 goto err;
997
998 if (!(dir.bi_flags & BCH_INODE_has_case_insensitive)) {
999 prt_printf(&buf, "parent of casefolded dir with has_case_insensitive not set\n");
1000
1001 ret = bch2_inum_snapshot_to_path(trans, dir.bi_inum, dir.bi_snapshot,
1002 snapshot_overwrites, &buf);
1003 if (ret)
1004 goto err;
1005
1006 if (fsck_err(trans, inode_parent_has_case_insensitive_not_set, "%s", buf.buf)) {
1007 dir.bi_flags |= BCH_INODE_has_case_insensitive;
1008 ret = __bch2_fsck_write_inode(trans, &dir);
1009 if (ret)
1010 goto err;
1011 }
1012 }
1013
1014 /*
1015 * We only need to check the first parent, unless we find an
1016 * inconsistency
1017 */
1018 if (!repairing_parents)
1019 break;
1020 }
1021 out:
1022 err:
1023 fsck_err:
1024 printbuf_exit(&buf);
1025 if (ret)
1026 return ret;
1027
1028 if (repairing_parents) {
1029 return bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
1030 -BCH_ERR_transaction_restart_nested;
1031 }
1032
1033 return 0;
1034 }
1035