xref: /linux/fs/xfs/scrub/orphanage.c (revision 7fc2cd2e4b398c57c9cf961cfea05eadbf34c05c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_trans_space.h"
18 #include "xfs_dir2.h"
19 #include "xfs_icache.h"
20 #include "xfs_bmap.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_parent.h"
23 #include "xfs_attr_sf.h"
24 #include "scrub/scrub.h"
25 #include "scrub/common.h"
26 #include "scrub/repair.h"
27 #include "scrub/trace.h"
28 #include "scrub/orphanage.h"
29 #include "scrub/readdir.h"
30 
31 #include <linux/namei.h>
32 
33 /*
34  * The Orphanage
35  * =============
36  *
37  * If the directory tree is damaged, children of that directory become
38  * inaccessible via that file path.  If a child has no other parents, the file
39  * is said to be orphaned.  xfs_repair fixes this situation by creating a
40  * orphanage directory (specifically, /lost+found) and creating a directory
41  * entry pointing to the orphaned file.
42  *
43  * Online repair follows this tactic by creating a root-owned /lost+found
44  * directory if one does not exist.  If an orphan is found, it will move that
45  * files into orphanage.
46  */
47 
48 /* Make the orphanage owned by root. */
49 STATIC int
50 xrep_chown_orphanage(
51 	struct xfs_scrub	*sc,
52 	struct xfs_inode	*dp)
53 {
54 	struct xfs_trans	*tp;
55 	struct xfs_mount	*mp = sc->mp;
56 	struct xfs_dquot	*udqp = NULL, *gdqp = NULL, *pdqp = NULL;
57 	struct xfs_dquot	*oldu = NULL, *oldg = NULL, *oldp = NULL;
58 	struct inode		*inode = VFS_I(dp);
59 	int			error;
60 
61 	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
62 			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
63 	if (error)
64 		return error;
65 
66 	error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
67 	if (error)
68 		goto out_dqrele;
69 
70 	/*
71 	 * Always clear setuid/setgid/sticky on the orphanage since we don't
72 	 * normally want that functionality on this directory and xfs_repair
73 	 * doesn't create it this way either.  Leave the other access bits
74 	 * unchanged.
75 	 */
76 	inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX);
77 
78 	/*
79 	 * Change the ownerships and register quota modifications
80 	 * in the transaction.
81 	 */
82 	if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
83 		if (XFS_IS_UQUOTA_ON(mp))
84 			oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
85 		inode->i_uid = GLOBAL_ROOT_UID;
86 	}
87 	if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
88 		if (XFS_IS_GQUOTA_ON(mp))
89 			oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
90 		inode->i_gid = GLOBAL_ROOT_GID;
91 	}
92 	if (dp->i_projid != 0) {
93 		if (XFS_IS_PQUOTA_ON(mp))
94 			oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
95 		dp->i_projid = 0;
96 	}
97 
98 	dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
100 
101 	XFS_STATS_INC(mp, xs_ig_attrchg);
102 
103 	if (xfs_has_wsync(mp))
104 		xfs_trans_set_sync(tp);
105 	error = xfs_trans_commit(tp);
106 
107 	xfs_qm_dqrele(oldu);
108 	xfs_qm_dqrele(oldg);
109 	xfs_qm_dqrele(oldp);
110 
111 out_dqrele:
112 	xfs_qm_dqrele(udqp);
113 	xfs_qm_dqrele(gdqp);
114 	xfs_qm_dqrele(pdqp);
115 	return error;
116 }
117 
118 #define ORPHANAGE	"lost+found"
119 
120 /* Create the orphanage directory, and set sc->orphanage to it. */
121 int
122 xrep_orphanage_create(
123 	struct xfs_scrub	*sc)
124 {
125 	struct xfs_mount	*mp = sc->mp;
126 	struct dentry		*root_dentry, *orphanage_dentry;
127 	struct inode		*root_inode = VFS_I(sc->mp->m_rootip);
128 	struct inode		*orphanage_inode;
129 	int			error;
130 
131 	if (xfs_is_shutdown(mp))
132 		return -EIO;
133 	if (xfs_is_readonly(mp)) {
134 		sc->orphanage = NULL;
135 		return 0;
136 	}
137 
138 	ASSERT(sc->tp == NULL);
139 	ASSERT(sc->orphanage == NULL);
140 
141 	/* Find the dentry for the root directory... */
142 	root_dentry = d_find_alias(root_inode);
143 	if (!root_dentry) {
144 		error = -EFSCORRUPTED;
145 		goto out;
146 	}
147 
148 	/* ...which is a directory, right? */
149 	if (!d_is_dir(root_dentry)) {
150 		error = -EFSCORRUPTED;
151 		goto out_dput_root;
152 	}
153 
154 	/* Try to find the orphanage directory. */
155 	orphanage_dentry = start_creating_noperm(root_dentry, &QSTR(ORPHANAGE));
156 	if (IS_ERR(orphanage_dentry)) {
157 		error = PTR_ERR(orphanage_dentry);
158 		goto out_dput_root;
159 	}
160 
161 	/*
162 	 * Nothing found?  Call mkdir to create the orphanage.  Create the
163 	 * directory without other-user access because we're live and someone
164 	 * could have been relying partly on minimal access to a parent
165 	 * directory to control access to a file we put in here.
166 	 */
167 	if (d_really_is_negative(orphanage_dentry)) {
168 		orphanage_dentry = vfs_mkdir(&nop_mnt_idmap, root_inode,
169 					     orphanage_dentry, 0750, NULL);
170 		error = PTR_ERR(orphanage_dentry);
171 		if (IS_ERR(orphanage_dentry))
172 			goto out_dput_orphanage;
173 	}
174 
175 	/* Not a directory? Bail out. */
176 	if (!d_is_dir(orphanage_dentry)) {
177 		error = -ENOTDIR;
178 		goto out_dput_orphanage;
179 	}
180 
181 	/*
182 	 * Grab a reference to the orphanage.  This /should/ succeed since
183 	 * we hold the root directory locked and therefore nobody can delete
184 	 * the orphanage.
185 	 */
186 	orphanage_inode = igrab(d_inode(orphanage_dentry));
187 	if (!orphanage_inode) {
188 		error = -ENOENT;
189 		goto out_dput_orphanage;
190 	}
191 
192 	/* Make sure the orphanage is owned by root. */
193 	error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
194 	if (error)
195 		goto out_dput_orphanage;
196 
197 	/* Stash the reference for later and bail out. */
198 	sc->orphanage = XFS_I(orphanage_inode);
199 	sc->orphanage_ilock_flags = 0;
200 
201 out_dput_orphanage:
202 	end_creating(orphanage_dentry);
203 out_dput_root:
204 	dput(root_dentry);
205 out:
206 	return error;
207 }
208 
209 void
210 xrep_orphanage_ilock(
211 	struct xfs_scrub	*sc,
212 	unsigned int		ilock_flags)
213 {
214 	sc->orphanage_ilock_flags |= ilock_flags;
215 	xfs_ilock(sc->orphanage, ilock_flags);
216 }
217 
218 bool
219 xrep_orphanage_ilock_nowait(
220 	struct xfs_scrub	*sc,
221 	unsigned int		ilock_flags)
222 {
223 	if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
224 		sc->orphanage_ilock_flags |= ilock_flags;
225 		return true;
226 	}
227 
228 	return false;
229 }
230 
231 void
232 xrep_orphanage_iunlock(
233 	struct xfs_scrub	*sc,
234 	unsigned int		ilock_flags)
235 {
236 	xfs_iunlock(sc->orphanage, ilock_flags);
237 	sc->orphanage_ilock_flags &= ~ilock_flags;
238 }
239 
240 /* Grab the IOLOCK of the orphanage and sc->ip. */
241 int
242 xrep_orphanage_iolock_two(
243 	struct xfs_scrub	*sc)
244 {
245 	int			error = 0;
246 
247 	while (true) {
248 		if (xchk_should_terminate(sc, &error))
249 			return error;
250 
251 		/*
252 		 * Normal XFS takes the IOLOCK before grabbing a transaction.
253 		 * Scrub holds a transaction, which means that we can't block
254 		 * on either IOLOCK.
255 		 */
256 		if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
257 			if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
258 				break;
259 			xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
260 		}
261 		delay(1);
262 	}
263 
264 	return 0;
265 }
266 
267 /* Release the orphanage. */
268 void
269 xrep_orphanage_rele(
270 	struct xfs_scrub	*sc)
271 {
272 	if (!sc->orphanage)
273 		return;
274 
275 	if (sc->orphanage_ilock_flags)
276 		xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);
277 
278 	xchk_irele(sc, sc->orphanage);
279 	sc->orphanage = NULL;
280 }
281 
282 /* Adoption moves a file into /lost+found */
283 
284 /* Can the orphanage adopt @sc->ip? */
285 bool
286 xrep_orphanage_can_adopt(
287 	struct xfs_scrub	*sc)
288 {
289 	ASSERT(sc->ip != NULL);
290 
291 	if (!sc->orphanage)
292 		return false;
293 	if (sc->ip == sc->orphanage)
294 		return false;
295 	if (xchk_inode_is_sb_rooted(sc->ip))
296 		return false;
297 	if (xfs_is_internal_inode(sc->ip))
298 		return false;
299 	return true;
300 }
301 
302 /*
303  * Create a new transaction to send a child to the orphanage.
304  *
305  * Allocate a new transaction with sufficient disk space to handle the
306  * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
307  * transaction, and reserve quota to reparent the latter.  Caller must hold the
308  * IOLOCK of the orphanage and sc->ip.
309  */
310 int
311 xrep_adoption_trans_alloc(
312 	struct xfs_scrub	*sc,
313 	struct xrep_adoption	*adopt)
314 {
315 	struct xfs_mount	*mp = sc->mp;
316 	unsigned int		child_blkres = 0;
317 	int			error;
318 
319 	ASSERT(sc->tp == NULL);
320 	ASSERT(sc->ip != NULL);
321 	ASSERT(sc->orphanage != NULL);
322 	ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL);
323 	ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL);
324 	ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
325 	ASSERT(!(sc->orphanage_ilock_flags &
326 				(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
327 
328 	/* Compute the worst case space reservation that we need. */
329 	adopt->sc = sc;
330 	adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
331 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
332 		child_blkres = xfs_rename_space_res(mp, 0, false,
333 						    xfs_name_dotdot.len, false);
334 	if (xfs_has_parent(mp))
335 		child_blkres += XFS_ADDAFORK_SPACE_RES(mp);
336 	adopt->child_blkres = child_blkres;
337 
338 	/*
339 	 * Allocate a transaction to link the child into the parent, along with
340 	 * enough disk space to handle expansion of both the orphanage and the
341 	 * dotdot entry of a child directory.
342 	 */
343 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link,
344 			adopt->orphanage_blkres + adopt->child_blkres, 0, 0,
345 			&sc->tp);
346 	if (error)
347 		return error;
348 
349 	xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
350 			    sc->ip, XFS_ILOCK_EXCL);
351 	sc->ilock_flags |= XFS_ILOCK_EXCL;
352 	sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;
353 
354 	xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
355 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
356 
357 	/*
358 	 * Reserve enough quota in the orphan directory to add the new name.
359 	 * Normally the orphanage should have user/group/project ids of zero
360 	 * and hence is not subject to quota enforcement, but we're allowed to
361 	 * exceed quota to reattach disconnected parts of the directory tree.
362 	 */
363 	error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
364 			adopt->orphanage_blkres, 0, true);
365 	if (error)
366 		goto out_cancel;
367 
368 	/*
369 	 * Reserve enough quota in the child directory to change dotdot.
370 	 * Here we're also allowed to exceed file quota to repair inconsistent
371 	 * metadata.
372 	 */
373 	if (adopt->child_blkres) {
374 		error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
375 				adopt->child_blkres, 0, true);
376 		if (error)
377 			goto out_cancel;
378 	}
379 
380 	return 0;
381 out_cancel:
382 	xchk_trans_cancel(sc);
383 	xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
384 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
385 	return error;
386 }
387 
388 /*
389  * Compute the xfs_name for the directory entry that we're adding to the
390  * orphanage.  Caller must hold ILOCKs of sc->ip and the orphanage and must not
391  * reuse namebuf until the adoption completes or is dissolved.
392  */
393 int
394 xrep_adoption_compute_name(
395 	struct xrep_adoption	*adopt,
396 	struct xfs_name		*xname)
397 {
398 	struct xfs_scrub	*sc = adopt->sc;
399 	char			*namebuf = (void *)xname->name;
400 	xfs_ino_t		ino;
401 	unsigned int		incr = 0;
402 	int			error = 0;
403 
404 	adopt->xname = xname;
405 	xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
406 	xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);
407 
408 	/* Make sure the filename is unique in the lost+found. */
409 	error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
410 	while (error == 0 && incr < 10000) {
411 		xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
412 				sc->ip->i_ino, ++incr);
413 		error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
414 	}
415 	if (error == 0) {
416 		/* We already have 10,000 entries in the orphanage? */
417 		return -EFSCORRUPTED;
418 	}
419 
420 	if (error != -ENOENT)
421 		return error;
422 	return 0;
423 }
424 
425 /*
426  * Make sure the dcache does not have a positive dentry for the name we've
427  * chosen.  The caller should have checked with the ondisk directory, so any
428  * discrepancy is a sign that something is seriously wrong.
429  */
430 static int
431 xrep_adoption_check_dcache(
432 	struct xrep_adoption	*adopt)
433 {
434 	struct qstr		qname = QSTR_INIT(adopt->xname->name,
435 						  adopt->xname->len);
436 	struct xfs_scrub	*sc = adopt->sc;
437 	struct dentry		*d_orphanage, *d_child;
438 	int			error = 0;
439 
440 	d_orphanage = d_find_alias(VFS_I(sc->orphanage));
441 	if (!d_orphanage)
442 		return 0;
443 
444 	d_child = try_lookup_noperm(&qname, d_orphanage);
445 	if (d_child) {
446 		trace_xrep_adoption_check_child(sc->mp, d_child);
447 
448 		if (d_is_positive(d_child)) {
449 			ASSERT(d_is_negative(d_child));
450 			error = -EFSCORRUPTED;
451 		}
452 
453 		dput(d_child);
454 	}
455 
456 	dput(d_orphanage);
457 	return error;
458 }
459 
460 /*
461  * Invalidate all dentries for the name that was added to the orphanage
462  * directory, and all dentries pointing to the child inode that was moved.
463  *
464  * There should not be any positive entries for the name, since we've
465  * maintained our lock on the orphanage directory.
466  */
467 static void
468 xrep_adoption_zap_dcache(
469 	struct xrep_adoption	*adopt)
470 {
471 	struct qstr		qname = QSTR_INIT(adopt->xname->name,
472 						  adopt->xname->len);
473 	struct xfs_scrub	*sc = adopt->sc;
474 	struct dentry		*d_orphanage, *d_child;
475 
476 	/* Invalidate all dentries for the adoption name */
477 	d_orphanage = d_find_alias(VFS_I(sc->orphanage));
478 	if (!d_orphanage)
479 		return;
480 
481 	d_child = try_lookup_noperm(&qname, d_orphanage);
482 	while (d_child != NULL) {
483 		trace_xrep_adoption_invalidate_child(sc->mp, d_child);
484 
485 		ASSERT(d_is_negative(d_child));
486 		d_invalidate(d_child);
487 		dput(d_child);
488 		d_child = d_lookup(d_orphanage, &qname);
489 	}
490 
491 	dput(d_orphanage);
492 
493 	/* Invalidate all the dentries pointing down to this file. */
494 	while ((d_child = d_find_alias(VFS_I(sc->ip))) != NULL) {
495 		trace_xrep_adoption_invalidate_child(sc->mp, d_child);
496 
497 		d_invalidate(d_child);
498 		dput(d_child);
499 	}
500 }
501 
502 /*
503  * If we have to add an attr fork ahead of a parent pointer update, how much
504  * space should we ask for?
505  */
506 static inline int
507 xrep_adoption_attr_sizeof(
508 	const struct xrep_adoption	*adopt)
509 {
510 	return sizeof(struct xfs_attr_sf_hdr) +
511 		xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec),
512 					   adopt->xname->len);
513 }
514 
515 /*
516  * Move the current file to the orphanage under the computed name.
517  *
518  * Returns with a dirty transaction so that the caller can handle any other
519  * work, such as fixing up unlinked lists or resetting link counts.
520  */
521 int
522 xrep_adoption_move(
523 	struct xrep_adoption	*adopt)
524 {
525 	struct xfs_scrub	*sc = adopt->sc;
526 	bool			isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
527 	int			error;
528 
529 	trace_xrep_adoption_reparent(sc->orphanage, adopt->xname,
530 			sc->ip->i_ino);
531 
532 	error = xrep_adoption_check_dcache(adopt);
533 	if (error)
534 		return error;
535 
536 	/*
537 	 * If this filesystem has parent pointers, ensure that the file being
538 	 * moved to the orphanage has an attribute fork.  This is required
539 	 * because the parent pointer code does not itself add attr forks.
540 	 */
541 	if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) {
542 		int sf_size = xrep_adoption_attr_sizeof(adopt);
543 
544 		error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true);
545 		if (error)
546 			return error;
547 	}
548 
549 	/* Create the new name in the orphanage. */
550 	error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
551 			sc->ip->i_ino, adopt->orphanage_blkres);
552 	if (error)
553 		return error;
554 
555 	/*
556 	 * Bump the link count of the orphanage if we just added a
557 	 * subdirectory, and update its timestamps.
558 	 */
559 	xfs_trans_ichgtime(sc->tp, sc->orphanage,
560 			XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
561 	if (isdir)
562 		xfs_bumplink(sc->tp, sc->orphanage);
563 	xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);
564 
565 	/* Bump the link count of the child. */
566 	if (adopt->bump_child_nlink) {
567 		xfs_bumplink(sc->tp, sc->ip);
568 		xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
569 	}
570 
571 	/* Replace the dotdot entry if the child is a subdirectory. */
572 	if (isdir) {
573 		error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
574 				sc->orphanage->i_ino, adopt->child_blkres);
575 		if (error)
576 			return error;
577 	}
578 
579 	/* Add a parent pointer from the file back to the lost+found. */
580 	if (xfs_has_parent(sc->mp)) {
581 		error = xfs_parent_addname(sc->tp, &adopt->ppargs,
582 				sc->orphanage, adopt->xname, sc->ip);
583 		if (error)
584 			return error;
585 	}
586 
587 	/*
588 	 * Notify dirent hooks that we moved the file to /lost+found, and
589 	 * finish all the deferred work so that we know the adoption is fully
590 	 * recorded in the log.
591 	 */
592 	xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname);
593 
594 	/* Remove negative dentries from the lost+found's dcache */
595 	xrep_adoption_zap_dcache(adopt);
596 	return 0;
597 }
598 
599 /*
600  * Roll to a clean scrub transaction so that we can release the orphanage,
601  * even if xrep_adoption_move was not called.
602  *
603  * Commits all the work and deferred ops attached to an adoption request and
604  * rolls to a clean scrub transaction.  On success, returns 0 with the scrub
605  * context holding a clean transaction with no inodes joined.  On failure,
606  * returns negative errno with no scrub transaction.  All inode locks are
607  * still held after this function returns.
608  */
609 int
610 xrep_adoption_trans_roll(
611 	struct xrep_adoption	*adopt)
612 {
613 	struct xfs_scrub	*sc = adopt->sc;
614 	int			error;
615 
616 	trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip,
617 			!!(sc->tp->t_flags & XFS_TRANS_DIRTY));
618 
619 	/* Finish all the deferred ops to commit all repairs. */
620 	error = xrep_defer_finish(sc);
621 	if (error)
622 		return error;
623 
624 	/* Roll the transaction once more to detach the inodes. */
625 	return xfs_trans_roll(&sc->tp);
626 }
627