xref: /linux/fs/xfs/scrub/orphanage.c (revision 47d83c1946067526e7c48255efa263ad2cce1907)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_ialloc.h"
16 #include "xfs_quota.h"
17 #include "xfs_trans_space.h"
18 #include "xfs_dir2.h"
19 #include "xfs_icache.h"
20 #include "xfs_bmap.h"
21 #include "xfs_bmap_btree.h"
22 #include "scrub/scrub.h"
23 #include "scrub/common.h"
24 #include "scrub/repair.h"
25 #include "scrub/trace.h"
26 #include "scrub/orphanage.h"
27 #include "scrub/readdir.h"
28 
29 #include <linux/namei.h>
30 
31 /*
32  * The Orphanage
33  * =============
34  *
35  * If the directory tree is damaged, children of that directory become
36  * inaccessible via that file path.  If a child has no other parents, the file
37  * is said to be orphaned.  xfs_repair fixes this situation by creating a
38  * orphanage directory (specifically, /lost+found) and creating a directory
39  * entry pointing to the orphaned file.
40  *
41  * Online repair follows this tactic by creating a root-owned /lost+found
42  * directory if one does not exist.  If an orphan is found, it will move that
43  * files into orphanage.
44  */
45 
46 /* Make the orphanage owned by root. */
47 STATIC int
48 xrep_chown_orphanage(
49 	struct xfs_scrub	*sc,
50 	struct xfs_inode	*dp)
51 {
52 	struct xfs_trans	*tp;
53 	struct xfs_mount	*mp = sc->mp;
54 	struct xfs_dquot	*udqp = NULL, *gdqp = NULL, *pdqp = NULL;
55 	struct xfs_dquot	*oldu = NULL, *oldg = NULL, *oldp = NULL;
56 	struct inode		*inode = VFS_I(dp);
57 	int			error;
58 
59 	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
60 			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
61 	if (error)
62 		return error;
63 
64 	error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
65 	if (error)
66 		goto out_dqrele;
67 
68 	/*
69 	 * Always clear setuid/setgid/sticky on the orphanage since we don't
70 	 * normally want that functionality on this directory and xfs_repair
71 	 * doesn't create it this way either.  Leave the other access bits
72 	 * unchanged.
73 	 */
74 	inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX);
75 
76 	/*
77 	 * Change the ownerships and register quota modifications
78 	 * in the transaction.
79 	 */
80 	if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
81 		if (XFS_IS_UQUOTA_ON(mp))
82 			oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
83 		inode->i_uid = GLOBAL_ROOT_UID;
84 	}
85 	if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
86 		if (XFS_IS_GQUOTA_ON(mp))
87 			oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
88 		inode->i_gid = GLOBAL_ROOT_GID;
89 	}
90 	if (dp->i_projid != 0) {
91 		if (XFS_IS_PQUOTA_ON(mp))
92 			oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
93 		dp->i_projid = 0;
94 	}
95 
96 	dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
97 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
98 
99 	XFS_STATS_INC(mp, xs_ig_attrchg);
100 
101 	if (xfs_has_wsync(mp))
102 		xfs_trans_set_sync(tp);
103 	error = xfs_trans_commit(tp);
104 
105 	xfs_qm_dqrele(oldu);
106 	xfs_qm_dqrele(oldg);
107 	xfs_qm_dqrele(oldp);
108 
109 out_dqrele:
110 	xfs_qm_dqrele(udqp);
111 	xfs_qm_dqrele(gdqp);
112 	xfs_qm_dqrele(pdqp);
113 	return error;
114 }
115 
116 #define ORPHANAGE	"lost+found"
117 
118 /* Create the orphanage directory, and set sc->orphanage to it. */
119 int
120 xrep_orphanage_create(
121 	struct xfs_scrub	*sc)
122 {
123 	struct xfs_mount	*mp = sc->mp;
124 	struct dentry		*root_dentry, *orphanage_dentry;
125 	struct inode		*root_inode = VFS_I(sc->mp->m_rootip);
126 	struct inode		*orphanage_inode;
127 	int			error;
128 
129 	if (xfs_is_shutdown(mp))
130 		return -EIO;
131 	if (xfs_is_readonly(mp)) {
132 		sc->orphanage = NULL;
133 		return 0;
134 	}
135 
136 	ASSERT(sc->tp == NULL);
137 	ASSERT(sc->orphanage == NULL);
138 
139 	/* Find the dentry for the root directory... */
140 	root_dentry = d_find_alias(root_inode);
141 	if (!root_dentry) {
142 		error = -EFSCORRUPTED;
143 		goto out;
144 	}
145 
146 	/* ...which is a directory, right? */
147 	if (!d_is_dir(root_dentry)) {
148 		error = -EFSCORRUPTED;
149 		goto out_dput_root;
150 	}
151 
152 	/* Try to find the orphanage directory. */
153 	inode_lock_nested(root_inode, I_MUTEX_PARENT);
154 	orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry,
155 			strlen(ORPHANAGE));
156 	if (IS_ERR(orphanage_dentry)) {
157 		error = PTR_ERR(orphanage_dentry);
158 		goto out_unlock_root;
159 	}
160 
161 	/*
162 	 * Nothing found?  Call mkdir to create the orphanage.  Create the
163 	 * directory without other-user access because we're live and someone
164 	 * could have been relying partly on minimal access to a parent
165 	 * directory to control access to a file we put in here.
166 	 */
167 	if (d_really_is_negative(orphanage_dentry)) {
168 		error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry,
169 				0750);
170 		if (error)
171 			goto out_dput_orphanage;
172 	}
173 
174 	/* Not a directory? Bail out. */
175 	if (!d_is_dir(orphanage_dentry)) {
176 		error = -ENOTDIR;
177 		goto out_dput_orphanage;
178 	}
179 
180 	/*
181 	 * Grab a reference to the orphanage.  This /should/ succeed since
182 	 * we hold the root directory locked and therefore nobody can delete
183 	 * the orphanage.
184 	 */
185 	orphanage_inode = igrab(d_inode(orphanage_dentry));
186 	if (!orphanage_inode) {
187 		error = -ENOENT;
188 		goto out_dput_orphanage;
189 	}
190 
191 	/* Make sure the orphanage is owned by root. */
192 	error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
193 	if (error)
194 		goto out_dput_orphanage;
195 
196 	/* Stash the reference for later and bail out. */
197 	sc->orphanage = XFS_I(orphanage_inode);
198 	sc->orphanage_ilock_flags = 0;
199 
200 out_dput_orphanage:
201 	dput(orphanage_dentry);
202 out_unlock_root:
203 	inode_unlock(VFS_I(sc->mp->m_rootip));
204 out_dput_root:
205 	dput(root_dentry);
206 out:
207 	return error;
208 }
209 
210 void
211 xrep_orphanage_ilock(
212 	struct xfs_scrub	*sc,
213 	unsigned int		ilock_flags)
214 {
215 	sc->orphanage_ilock_flags |= ilock_flags;
216 	xfs_ilock(sc->orphanage, ilock_flags);
217 }
218 
219 bool
220 xrep_orphanage_ilock_nowait(
221 	struct xfs_scrub	*sc,
222 	unsigned int		ilock_flags)
223 {
224 	if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
225 		sc->orphanage_ilock_flags |= ilock_flags;
226 		return true;
227 	}
228 
229 	return false;
230 }
231 
232 void
233 xrep_orphanage_iunlock(
234 	struct xfs_scrub	*sc,
235 	unsigned int		ilock_flags)
236 {
237 	xfs_iunlock(sc->orphanage, ilock_flags);
238 	sc->orphanage_ilock_flags &= ~ilock_flags;
239 }
240 
241 /* Grab the IOLOCK of the orphanage and sc->ip. */
242 int
243 xrep_orphanage_iolock_two(
244 	struct xfs_scrub	*sc)
245 {
246 	int			error = 0;
247 
248 	while (true) {
249 		if (xchk_should_terminate(sc, &error))
250 			return error;
251 
252 		/*
253 		 * Normal XFS takes the IOLOCK before grabbing a transaction.
254 		 * Scrub holds a transaction, which means that we can't block
255 		 * on either IOLOCK.
256 		 */
257 		if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
258 			if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
259 				break;
260 			xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
261 		}
262 		delay(1);
263 	}
264 
265 	return 0;
266 }
267 
268 /* Release the orphanage. */
269 void
270 xrep_orphanage_rele(
271 	struct xfs_scrub	*sc)
272 {
273 	if (!sc->orphanage)
274 		return;
275 
276 	if (sc->orphanage_ilock_flags)
277 		xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);
278 
279 	xchk_irele(sc, sc->orphanage);
280 	sc->orphanage = NULL;
281 }
282 
283 /* Adoption moves a file into /lost+found */
284 
285 /* Can the orphanage adopt @sc->ip? */
286 bool
287 xrep_orphanage_can_adopt(
288 	struct xfs_scrub	*sc)
289 {
290 	ASSERT(sc->ip != NULL);
291 
292 	if (!sc->orphanage)
293 		return false;
294 	if (sc->ip == sc->orphanage)
295 		return false;
296 	if (xfs_internal_inum(sc->mp, sc->ip->i_ino))
297 		return false;
298 	return true;
299 }
300 
301 /*
302  * Create a new transaction to send a child to the orphanage.
303  *
304  * Allocate a new transaction with sufficient disk space to handle the
305  * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the
306  * transaction, and reserve quota to reparent the latter.  Caller must hold the
307  * IOLOCK of the orphanage and sc->ip.
308  */
309 int
310 xrep_adoption_trans_alloc(
311 	struct xfs_scrub	*sc,
312 	struct xrep_adoption	*adopt)
313 {
314 	struct xfs_mount	*mp = sc->mp;
315 	unsigned int		child_blkres = 0;
316 	int			error;
317 
318 	ASSERT(sc->tp == NULL);
319 	ASSERT(sc->ip != NULL);
320 	ASSERT(sc->orphanage != NULL);
321 	ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL);
322 	ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL);
323 	ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
324 	ASSERT(!(sc->orphanage_ilock_flags &
325 				(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)));
326 
327 	/* Compute the worst case space reservation that we need. */
328 	adopt->sc = sc;
329 	adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
330 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
331 		child_blkres = xfs_rename_space_res(mp, 0, false,
332 						    xfs_name_dotdot.len, false);
333 	adopt->child_blkres = child_blkres;
334 
335 	/*
336 	 * Allocate a transaction to link the child into the parent, along with
337 	 * enough disk space to handle expansion of both the orphanage and the
338 	 * dotdot entry of a child directory.
339 	 */
340 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link,
341 			adopt->orphanage_blkres + adopt->child_blkres, 0, 0,
342 			&sc->tp);
343 	if (error)
344 		return error;
345 
346 	xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
347 			    sc->ip, XFS_ILOCK_EXCL);
348 	sc->ilock_flags |= XFS_ILOCK_EXCL;
349 	sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;
350 
351 	xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
352 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
353 
354 	/*
355 	 * Reserve enough quota in the orphan directory to add the new name.
356 	 * Normally the orphanage should have user/group/project ids of zero
357 	 * and hence is not subject to quota enforcement, but we're allowed to
358 	 * exceed quota to reattach disconnected parts of the directory tree.
359 	 */
360 	error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
361 			adopt->orphanage_blkres, 0, true);
362 	if (error)
363 		goto out_cancel;
364 
365 	/*
366 	 * Reserve enough quota in the child directory to change dotdot.
367 	 * Here we're also allowed to exceed file quota to repair inconsistent
368 	 * metadata.
369 	 */
370 	if (adopt->child_blkres) {
371 		error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
372 				adopt->child_blkres, 0, true);
373 		if (error)
374 			goto out_cancel;
375 	}
376 
377 	return 0;
378 out_cancel:
379 	xchk_trans_cancel(sc);
380 	xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
381 	xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
382 	return error;
383 }
384 
385 /*
386  * Compute the xfs_name for the directory entry that we're adding to the
387  * orphanage.  Caller must hold ILOCKs of sc->ip and the orphanage and must not
388  * reuse namebuf until the adoption completes or is dissolved.
389  */
390 int
391 xrep_adoption_compute_name(
392 	struct xrep_adoption	*adopt,
393 	struct xfs_name		*xname)
394 {
395 	struct xfs_scrub	*sc = adopt->sc;
396 	char			*namebuf = (void *)xname->name;
397 	xfs_ino_t		ino;
398 	unsigned int		incr = 0;
399 	int			error = 0;
400 
401 	adopt->xname = xname;
402 	xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
403 	xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);
404 
405 	/* Make sure the filename is unique in the lost+found. */
406 	error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
407 	while (error == 0 && incr < 10000) {
408 		xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
409 				sc->ip->i_ino, ++incr);
410 		error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
411 	}
412 	if (error == 0) {
413 		/* We already have 10,000 entries in the orphanage? */
414 		return -EFSCORRUPTED;
415 	}
416 
417 	if (error != -ENOENT)
418 		return error;
419 	return 0;
420 }
421 
422 /*
423  * Make sure the dcache does not have a positive dentry for the name we've
424  * chosen.  The caller should have checked with the ondisk directory, so any
425  * discrepancy is a sign that something is seriously wrong.
426  */
427 static int
428 xrep_adoption_check_dcache(
429 	struct xrep_adoption	*adopt)
430 {
431 	struct qstr		qname = QSTR_INIT(adopt->xname->name,
432 						  adopt->xname->len);
433 	struct dentry		*d_orphanage, *d_child;
434 	int			error = 0;
435 
436 	d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage));
437 	if (!d_orphanage)
438 		return 0;
439 
440 	d_child = d_hash_and_lookup(d_orphanage, &qname);
441 	if (d_child) {
442 		trace_xrep_adoption_check_child(adopt->sc->mp, d_child);
443 
444 		if (d_is_positive(d_child)) {
445 			ASSERT(d_is_negative(d_child));
446 			error = -EFSCORRUPTED;
447 		}
448 
449 		dput(d_child);
450 	}
451 
452 	dput(d_orphanage);
453 	if (error)
454 		return error;
455 
456 	/*
457 	 * Do we need to update d_parent of the dentry for the file being
458 	 * repaired?  There shouldn't be a hashed dentry with a parent since
459 	 * the file had nonzero nlink but wasn't connected to any parent dir.
460 	 */
461 	d_child = d_find_alias(VFS_I(adopt->sc->ip));
462 	if (!d_child)
463 		return 0;
464 
465 	trace_xrep_adoption_check_alias(adopt->sc->mp, d_child);
466 
467 	if (d_child->d_parent && !d_unhashed(d_child)) {
468 		ASSERT(d_child->d_parent == NULL || d_unhashed(d_child));
469 		error = -EFSCORRUPTED;
470 	}
471 
472 	dput(d_child);
473 	return error;
474 }
475 
476 /*
477  * Remove all negative dentries from the dcache.  There should not be any
478  * positive entries, since we've maintained our lock on the orphanage
479  * directory.
480  */
481 static void
482 xrep_adoption_zap_dcache(
483 	struct xrep_adoption	*adopt)
484 {
485 	struct qstr		qname = QSTR_INIT(adopt->xname->name,
486 						  adopt->xname->len);
487 	struct dentry		*d_orphanage, *d_child;
488 
489 	d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage));
490 	if (!d_orphanage)
491 		return;
492 
493 	d_child = d_hash_and_lookup(d_orphanage, &qname);
494 	while (d_child != NULL) {
495 		trace_xrep_adoption_invalidate_child(adopt->sc->mp, d_child);
496 
497 		ASSERT(d_is_negative(d_child));
498 		d_invalidate(d_child);
499 		dput(d_child);
500 		d_child = d_lookup(d_orphanage, &qname);
501 	}
502 
503 	dput(d_orphanage);
504 }
505 
506 /*
507  * Move the current file to the orphanage under the computed name.
508  *
509  * Returns with a dirty transaction so that the caller can handle any other
510  * work, such as fixing up unlinked lists or resetting link counts.
511  */
512 int
513 xrep_adoption_move(
514 	struct xrep_adoption	*adopt)
515 {
516 	struct xfs_scrub	*sc = adopt->sc;
517 	bool			isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
518 	int			error;
519 
520 	trace_xrep_adoption_reparent(sc->orphanage, adopt->xname,
521 			sc->ip->i_ino);
522 
523 	error = xrep_adoption_check_dcache(adopt);
524 	if (error)
525 		return error;
526 
527 	/* Create the new name in the orphanage. */
528 	error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
529 			sc->ip->i_ino, adopt->orphanage_blkres);
530 	if (error)
531 		return error;
532 
533 	/*
534 	 * Bump the link count of the orphanage if we just added a
535 	 * subdirectory, and update its timestamps.
536 	 */
537 	xfs_trans_ichgtime(sc->tp, sc->orphanage,
538 			XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
539 	if (isdir)
540 		xfs_bumplink(sc->tp, sc->orphanage);
541 	xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);
542 
543 	/* Replace the dotdot entry if the child is a subdirectory. */
544 	if (isdir) {
545 		error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
546 				sc->orphanage->i_ino, adopt->child_blkres);
547 		if (error)
548 			return error;
549 	}
550 
551 	/*
552 	 * Notify dirent hooks that we moved the file to /lost+found, and
553 	 * finish all the deferred work so that we know the adoption is fully
554 	 * recorded in the log.
555 	 */
556 	xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname);
557 
558 	/* Remove negative dentries from the lost+found's dcache */
559 	xrep_adoption_zap_dcache(adopt);
560 	return 0;
561 }
562 
563 /*
564  * Roll to a clean scrub transaction so that we can release the orphanage,
565  * even if xrep_adoption_move was not called.
566  *
567  * Commits all the work and deferred ops attached to an adoption request and
568  * rolls to a clean scrub transaction.  On success, returns 0 with the scrub
569  * context holding a clean transaction with no inodes joined.  On failure,
570  * returns negative errno with no scrub transaction.  All inode locks are
571  * still held after this function returns.
572  */
573 int
574 xrep_adoption_trans_roll(
575 	struct xrep_adoption	*adopt)
576 {
577 	struct xfs_scrub	*sc = adopt->sc;
578 	int			error;
579 
580 	trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip,
581 			!!(sc->tp->t_flags & XFS_TRANS_DIRTY));
582 
583 	/* Finish all the deferred ops to commit all repairs. */
584 	error = xrep_defer_finish(sc);
585 	if (error)
586 		return error;
587 
588 	/* Roll the transaction once more to detach the inodes. */
589 	return xfs_trans_roll(&sc->tp);
590 }
591