1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs_platform.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_trans_space.h"
18 #include "xfs_dir2.h"
19 #include "xfs_icache.h"
20 #include "xfs_bmap.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_parent.h"
23 #include "xfs_attr_sf.h"
24 #include "scrub/scrub.h"
25 #include "scrub/common.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/readdir.h"
30
31 #include <linux/namei.h>
32
33 /*
34 * The Orphanage
35 * =============
36 *
37 * If the directory tree is damaged, children of that directory become
38 * inaccessible via that file path. If a child has no other parents, the file
39 * is said to be orphaned. xfs_repair fixes this situation by creating a
40 * orphanage directory (specifically, /lost+found) and creating a directory
41 * entry pointing to the orphaned file.
42 *
43 * Online repair follows this tactic by creating a root-owned /lost+found
44 * directory if one does not exist. If an orphan is found, it will move that
45 * files into orphanage.
46 */
47
48 /* Make the orphanage owned by root. */
49 STATIC int
xrep_chown_orphanage(struct xfs_scrub * sc,struct xfs_inode * dp)50 xrep_chown_orphanage(
51 struct xfs_scrub *sc,
52 struct xfs_inode *dp)
53 {
54 struct xfs_trans *tp;
55 struct xfs_mount *mp = sc->mp;
56 struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL;
57 struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL;
58 struct inode *inode = VFS_I(dp);
59 int error;
60
61 error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
62 XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
63 if (error)
64 return error;
65
66 error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
67 if (error)
68 goto out_dqrele;
69
70 /*
71 * Always clear setuid/setgid/sticky on the orphanage since we don't
72 * normally want that functionality on this directory and xfs_repair
73 * doesn't create it this way either. Leave the other access bits
74 * unchanged.
75 */
76 inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX);
77
78 /*
79 * Change the ownerships and register quota modifications
80 * in the transaction.
81 */
82 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
83 if (XFS_IS_UQUOTA_ON(mp))
84 oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
85 inode->i_uid = GLOBAL_ROOT_UID;
86 }
87 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
88 if (XFS_IS_GQUOTA_ON(mp))
89 oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
90 inode->i_gid = GLOBAL_ROOT_GID;
91 }
92 if (dp->i_projid != 0) {
93 if (XFS_IS_PQUOTA_ON(mp))
94 oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
95 dp->i_projid = 0;
96 }
97
98 dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
100
101 XFS_STATS_INC(mp, xs_ig_attrchg);
102
103 if (xfs_has_wsync(mp))
104 xfs_trans_set_sync(tp);
105 error = xfs_trans_commit(tp);
106
107 xfs_qm_dqrele(oldu);
108 xfs_qm_dqrele(oldg);
109 xfs_qm_dqrele(oldp);
110
111 out_dqrele:
112 xfs_qm_dqrele(udqp);
113 xfs_qm_dqrele(gdqp);
114 xfs_qm_dqrele(pdqp);
115 return error;
116 }
117
118 #define ORPHANAGE "lost+found"
119
120 /* Create the orphanage directory, and set sc->orphanage to it. */
121 int
xrep_orphanage_create(struct xfs_scrub * sc)122 xrep_orphanage_create(
123 struct xfs_scrub *sc)
124 {
125 struct xfs_mount *mp = sc->mp;
126 struct dentry *root_dentry, *orphanage_dentry;
127 struct inode *root_inode = VFS_I(sc->mp->m_rootip);
128 struct inode *orphanage_inode;
129 int error;
130
131 if (xfs_is_shutdown(mp))
132 return -EIO;
133 if (xfs_is_readonly(mp)) {
134 sc->orphanage = NULL;
135 return 0;
136 }
137
138 ASSERT(sc->tp == NULL);
139 ASSERT(sc->orphanage == NULL);
140
141 /* Find the dentry for the root directory... */
142 root_dentry = d_find_alias(root_inode);
143 if (!root_dentry) {
144 error = -EFSCORRUPTED;
145 goto out;
146 }
147
148 /* ...which is a directory, right? */
149 if (!d_is_dir(root_dentry)) {
150 error = -EFSCORRUPTED;
151 goto out_dput_root;
152 }
153
154 /* Try to find the orphanage directory. */
155 orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE));
156 if (IS_ERR(orphanage_dentry)) {
157 error = PTR_ERR(orphanage_dentry);
158 goto out_dput_root;
159 }
160
161 /*
162 * Nothing found? Call mkdir to create the orphanage. Create the
163 * directory without other-user access because we're live and someone
164 * could have been relying partly on minimal access to a parent
165 * directory to control access to a file we put in here.
166 */
167 if (d_really_is_negative(orphanage_dentry)) {
168 orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
169 orphanage_dentry, 0750, NULL);
170 error = PTR_ERR(orphanage_dentry);
171 if (IS_ERR(orphanage_dentry))
172 goto out_dput_orphanage;
173 }
174
175 /* Not a directory? Bail out. */
176 if (!d_is_dir(orphanage_dentry)) {
177 error = -ENOTDIR;
178 goto out_dput_orphanage;
179 }
180
181 /*
182 * Grab a reference to the orphanage. This /should/ succeed since
183 * we hold the root directory locked and therefore nobody can delete
184 * the orphanage.
185 */
186 orphanage_inode = igrab(d_inode(orphanage_dentry));
187 if (!orphanage_inode) {
188 error = -ENOENT;
189 goto out_dput_orphanage;
190 }
191
192 /* Make sure the orphanage is owned by root. */
193 error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
194 if (error)
195 goto out_dput_orphanage;
196
197 /* Stash the reference for later and bail out. */
198 sc->orphanage = XFS_I(orphanage_inode);
199 sc->orphanage_ilock_flags = 0;
200
201 out_dput_orphanage:
202 end_creating(orphanage_dentry);
203 out_dput_root:
204 dput(root_dentry);
205 out:
206 return error;
207 }
208
209 void
xrep_orphanage_ilock(struct xfs_scrub * sc,unsigned int ilock_flags)210 xrep_orphanage_ilock(
211 struct xfs_scrub *sc,
212 unsigned int ilock_flags)
213 {
214 sc->orphanage_ilock_flags |= ilock_flags;
215 xfs_ilock(sc->orphanage, ilock_flags);
216 }
217
218 bool
xrep_orphanage_ilock_nowait(struct xfs_scrub * sc,unsigned int ilock_flags)219 xrep_orphanage_ilock_nowait(
220 struct xfs_scrub *sc,
221 unsigned int ilock_flags)
222 {
223 if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
224 sc->orphanage_ilock_flags |= ilock_flags;
225 return true;
226 }
227
228 return false;
229 }
230
231 void
xrep_orphanage_iunlock(struct xfs_scrub * sc,unsigned int ilock_flags)232 xrep_orphanage_iunlock(
233 struct xfs_scrub *sc,
234 unsigned int ilock_flags)
235 {
236 xfs_iunlock(sc->orphanage, ilock_flags);
237 sc->orphanage_ilock_flags &= ~ilock_flags;
238 }
239
240 /* Grab the IOLOCK of the orphanage and sc->ip. */
241 int
xrep_orphanage_iolock_two(struct xfs_scrub * sc)242 xrep_orphanage_iolock_two(
243 struct xfs_scrub *sc)
244 {
245 int error = 0;
246
247 while (true) {
248 if (xchk_should_terminate(sc, &error))
249 return error;
250
251 /*
252 * Normal XFS takes the IOLOCK before grabbing a transaction.
253 * Scrub holds a transaction, which means that we can't block
254 * on either IOLOCK.
255 */
256 if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
257 if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
258 break;
259 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
260 }
261 delay(1);
262 }
263
264 return 0;
265 }
266
267 /* Release the orphanage. */
268 void
xrep_orphanage_rele(struct xfs_scrub * sc)269 xrep_orphanage_rele(
270 struct xfs_scrub *sc)
271 {
272 if (!sc->orphanage)
273 return;
274
275 if (sc->orphanage_ilock_flags)
276 xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);
277
278 xchk_irele(sc, sc->orphanage);
279 sc->orphanage = NULL;
280 }
281
282 /* Adoption moves a file into /lost+found */
283
284 /* Can the orphanage adopt @sc->ip? */
285 bool
xrep_orphanage_can_adopt(struct xfs_scrub * sc)286 xrep_orphanage_can_adopt(
287 struct xfs_scrub *sc)
288 {
289 ASSERT(sc->ip != NULL);
290
291 if (!sc->orphanage)
292 return false;
293 if (sc->ip == sc->orphanage)
294 return false;
295 if (xchk_inode_is_sb_rooted(sc->ip))
296 return false;
297 if (xfs_is_internal_inode(sc->ip))
298 return false;
299 return true;
300 }
301
302 /*
303 * Create a new transaction to send a child to the orphanage.
304 *
305 * Allocate a new transaction with sufficient disk space to handle the
306 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
307 * transaction, and reserve quota to reparent the latter. Caller must hold the
308 * IOLOCK of the orphanage and sc->ip.
309 */
310 int
xrep_adoption_trans_alloc(struct xfs_scrub * sc,struct xrep_adoption * adopt)311 xrep_adoption_trans_alloc(
312 struct xfs_scrub *sc,
313 struct xrep_adoption *adopt)
314 {
315 struct xfs_mount *mp = sc->mp;
316 unsigned int child_blkres = 0;
317 int error;
318
319 ASSERT(sc->tp == NULL);
320 ASSERT(sc->ip != NULL);
321 ASSERT(sc->orphanage != NULL);
322 ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL);
323 ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL);
324 ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
325 ASSERT(!(sc->orphanage_ilock_flags &
326 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
327
328 /* Compute the worst case space reservation that we need. */
329 adopt->sc = sc;
330 adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
331 if (S_ISDIR(VFS_I(sc->ip)->i_mode))
332 child_blkres = xfs_rename_space_res(mp, 0, false,
333 xfs_name_dotdot.len, false);
334 if (xfs_has_parent(mp))
335 child_blkres += XFS_ADDAFORK_SPACE_RES(mp);
336 adopt->child_blkres = child_blkres;
337
338 /*
339 * Allocate a transaction to link the child into the parent, along with
340 * enough disk space to handle expansion of both the orphanage and the
341 * dotdot entry of a child directory.
342 */
343 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link,
344 adopt->orphanage_blkres + adopt->child_blkres, 0, 0,
345 &sc->tp);
346 if (error)
347 return error;
348
349 xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
350 sc->ip, XFS_ILOCK_EXCL);
351 sc->ilock_flags |= XFS_ILOCK_EXCL;
352 sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;
353
354 xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
355 xfs_trans_ijoin(sc->tp, sc->ip, 0);
356
357 /*
358 * Reserve enough quota in the orphan directory to add the new name.
359 * Normally the orphanage should have user/group/project ids of zero
360 * and hence is not subject to quota enforcement, but we're allowed to
361 * exceed quota to reattach disconnected parts of the directory tree.
362 */
363 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
364 adopt->orphanage_blkres, 0, true);
365 if (error)
366 goto out_cancel;
367
368 /*
369 * Reserve enough quota in the child directory to change dotdot.
370 * Here we're also allowed to exceed file quota to repair inconsistent
371 * metadata.
372 */
373 if (adopt->child_blkres) {
374 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
375 adopt->child_blkres, 0, true);
376 if (error)
377 goto out_cancel;
378 }
379
380 return 0;
381 out_cancel:
382 xchk_trans_cancel(sc);
383 xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
384 xchk_iunlock(sc, XFS_ILOCK_EXCL);
385 return error;
386 }
387
388 /*
389 * Compute the xfs_name for the directory entry that we're adding to the
390 * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not
391 * reuse namebuf until the adoption completes or is dissolved.
392 */
393 int
xrep_adoption_compute_name(struct xrep_adoption * adopt,struct xfs_name * xname)394 xrep_adoption_compute_name(
395 struct xrep_adoption *adopt,
396 struct xfs_name *xname)
397 {
398 struct xfs_scrub *sc = adopt->sc;
399 char *namebuf = (void *)xname->name;
400 xfs_ino_t ino;
401 unsigned int incr = 0;
402 int error = 0;
403
404 adopt->xname = xname;
405 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
406 xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);
407
408 /* Make sure the filename is unique in the lost+found. */
409 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
410 while (error == 0 && incr < 10000) {
411 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
412 sc->ip->i_ino, ++incr);
413 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
414 }
415 if (error == 0) {
416 /* We already have 10,000 entries in the orphanage? */
417 return -EFSCORRUPTED;
418 }
419
420 if (error != -ENOENT)
421 return error;
422 return 0;
423 }
424
425 /*
426 * Make sure the dcache does not have a positive dentry for the name we've
427 * chosen. The caller should have checked with the ondisk directory, so any
428 * discrepancy is a sign that something is seriously wrong.
429 */
430 static int
xrep_adoption_check_dcache(struct xrep_adoption * adopt)431 xrep_adoption_check_dcache(
432 struct xrep_adoption *adopt)
433 {
434 struct qstr qname = QSTR_INIT(adopt->xname->name,
435 adopt->xname->len);
436 struct xfs_scrub *sc = adopt->sc;
437 struct dentry *d_orphanage, *d_child;
438 int error = 0;
439
440 d_orphanage = d_find_alias(VFS_I(sc->orphanage));
441 if (!d_orphanage)
442 return 0;
443
444 d_child = try_lookup_noperm(&qname, d_orphanage);
445 if (IS_ERR(d_child)) {
446 dput(d_orphanage);
447 return PTR_ERR(d_child);
448 }
449
450 if (d_child) {
451 trace_xrep_adoption_check_child(sc->mp, d_child);
452
453 if (d_is_positive(d_child)) {
454 ASSERT(d_is_negative(d_child));
455 error = -EFSCORRUPTED;
456 }
457
458 dput(d_child);
459 }
460
461 dput(d_orphanage);
462 return error;
463 }
464
465 /*
466 * Invalidate all dentries for the name that was added to the orphanage
467 * directory, and all dentries pointing to the child inode that was moved.
468 *
469 * There should not be any positive entries for the name, since we've
470 * maintained our lock on the orphanage directory.
471 */
472 static void
xrep_adoption_zap_dcache(struct xrep_adoption * adopt)473 xrep_adoption_zap_dcache(
474 struct xrep_adoption *adopt)
475 {
476 struct qstr qname = QSTR_INIT(adopt->xname->name,
477 adopt->xname->len);
478 struct xfs_scrub *sc = adopt->sc;
479 struct dentry *d_orphanage, *d_child;
480
481 /* Invalidate all dentries for the adoption name */
482 d_orphanage = d_find_alias(VFS_I(sc->orphanage));
483 if (!d_orphanage)
484 return;
485
486 d_child = try_lookup_noperm(&qname, d_orphanage);
487 while (!IS_ERR_OR_NULL(d_child)) {
488 trace_xrep_adoption_invalidate_child(sc->mp, d_child);
489
490 ASSERT(d_is_negative(d_child));
491 d_invalidate(d_child);
492 dput(d_child);
493 d_child = d_lookup(d_orphanage, &qname);
494 }
495
496 dput(d_orphanage);
497
498 /* Invalidate all the dentries pointing down to this file. */
499 while ((d_child = d_find_alias(VFS_I(sc->ip))) != NULL) {
500 trace_xrep_adoption_invalidate_child(sc->mp, d_child);
501
502 d_invalidate(d_child);
503 dput(d_child);
504 }
505 }
506
507 /*
508 * If we have to add an attr fork ahead of a parent pointer update, how much
509 * space should we ask for?
510 */
511 static inline int
xrep_adoption_attr_sizeof(const struct xrep_adoption * adopt)512 xrep_adoption_attr_sizeof(
513 const struct xrep_adoption *adopt)
514 {
515 return sizeof(struct xfs_attr_sf_hdr) +
516 xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec),
517 adopt->xname->len);
518 }
519
520 /*
521 * Move the current file to the orphanage under the computed name.
522 *
523 * Returns with a dirty transaction so that the caller can handle any other
524 * work, such as fixing up unlinked lists or resetting link counts.
525 */
526 int
xrep_adoption_move(struct xrep_adoption * adopt)527 xrep_adoption_move(
528 struct xrep_adoption *adopt)
529 {
530 struct xfs_scrub *sc = adopt->sc;
531 bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
532 int error;
533
534 trace_xrep_adoption_reparent(sc->orphanage, adopt->xname,
535 sc->ip->i_ino);
536
537 error = xrep_adoption_check_dcache(adopt);
538 if (error)
539 return error;
540
541 /*
542 * If this filesystem has parent pointers, ensure that the file being
543 * moved to the orphanage has an attribute fork. This is required
544 * because the parent pointer code does not itself add attr forks.
545 */
546 if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) {
547 int sf_size = xrep_adoption_attr_sizeof(adopt);
548
549 error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true);
550 if (error)
551 return error;
552 }
553
554 /* Create the new name in the orphanage. */
555 error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
556 sc->ip->i_ino, adopt->orphanage_blkres);
557 if (error)
558 return error;
559
560 /*
561 * Bump the link count of the orphanage if we just added a
562 * subdirectory, and update its timestamps.
563 */
564 xfs_trans_ichgtime(sc->tp, sc->orphanage,
565 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
566 if (isdir)
567 xfs_bumplink(sc->tp, sc->orphanage);
568 xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);
569
570 /* Bump the link count of the child. */
571 if (adopt->bump_child_nlink) {
572 xfs_bumplink(sc->tp, sc->ip);
573 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
574 }
575
576 /* Replace the dotdot entry if the child is a subdirectory. */
577 if (isdir) {
578 error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
579 sc->orphanage->i_ino, adopt->child_blkres);
580 if (error)
581 return error;
582 }
583
584 /* Add a parent pointer from the file back to the lost+found. */
585 if (xfs_has_parent(sc->mp)) {
586 error = xfs_parent_addname(sc->tp, &adopt->ppargs,
587 sc->orphanage, adopt->xname, sc->ip);
588 if (error)
589 return error;
590 }
591
592 /*
593 * Notify dirent hooks that we moved the file to /lost+found, and
594 * finish all the deferred work so that we know the adoption is fully
595 * recorded in the log.
596 */
597 xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname);
598
599 /* Remove negative dentries from the lost+found's dcache */
600 xrep_adoption_zap_dcache(adopt);
601 return 0;
602 }
603
604 /*
605 * Roll to a clean scrub transaction so that we can release the orphanage,
606 * even if xrep_adoption_move was not called.
607 *
608 * Commits all the work and deferred ops attached to an adoption request and
609 * rolls to a clean scrub transaction. On success, returns 0 with the scrub
610 * context holding a clean transaction with no inodes joined. On failure,
611 * returns negative errno with no scrub transaction. All inode locks are
612 * still held after this function returns.
613 */
614 int
xrep_adoption_trans_roll(struct xrep_adoption * adopt)615 xrep_adoption_trans_roll(
616 struct xrep_adoption *adopt)
617 {
618 struct xfs_scrub *sc = adopt->sc;
619 int error;
620
621 trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip,
622 !!(sc->tp->t_flags & XFS_TRANS_DIRTY));
623
624 /* Finish all the deferred ops to commit all repairs. */
625 error = xrep_defer_finish(sc);
626 if (error)
627 return error;
628
629 /* Roll the transaction once more to detach the inodes. */
630 return xfs_trans_roll(&sc->tp);
631 }
632