xref: /linux/fs/xfs/scrub/parent_repair.c (revision f3f5edc5e41e038cf66d124a4cbacf6ff0983513)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_bit.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_sb.h"
17 #include "xfs_inode.h"
18 #include "xfs_icache.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
21 #include "xfs_dir2.h"
22 #include "xfs_bmap_btree.h"
23 #include "xfs_dir2_priv.h"
24 #include "xfs_trans_space.h"
25 #include "xfs_health.h"
26 #include "xfs_exchmaps.h"
27 #include "xfs_parent.h"
28 #include "xfs_attr.h"
29 #include "xfs_bmap.h"
30 #include "xfs_ag.h"
31 #include "scrub/xfs_scrub.h"
32 #include "scrub/scrub.h"
33 #include "scrub/common.h"
34 #include "scrub/trace.h"
35 #include "scrub/repair.h"
36 #include "scrub/iscan.h"
37 #include "scrub/findparent.h"
38 #include "scrub/readdir.h"
39 #include "scrub/tempfile.h"
40 #include "scrub/tempexch.h"
41 #include "scrub/orphanage.h"
42 #include "scrub/xfile.h"
43 #include "scrub/xfarray.h"
44 #include "scrub/xfblob.h"
45 #include "scrub/attr_repair.h"
46 #include "scrub/listxattr.h"
47 
48 /*
49  * Repairing The Directory Parent Pointer
50  * ======================================
51  *
52  * Currently, only directories support parent pointers (in the form of '..'
53  * entries), so we simply scan the filesystem and update the '..' entry.
54  *
55  * Note that because the only parent pointer is the dotdot entry, we won't
56  * touch an unhealthy directory, since the directory repair code is perfectly
57  * capable of rebuilding a directory with the proper parent inode.
58  *
59  * See the section on locking issues in dir_repair.c for more information about
60  * conflicts with the VFS.  The findparent code wll keep our incore parent
61  * inode up to date.
62  *
63  * If parent pointers are enabled, we instead reconstruct the parent pointer
64  * information by visiting every directory entry of every directory in the
65  * system and translating the relevant dirents into parent pointers.  In this
66  * case, it is advantageous to stash all parent pointers created from dirents
67  * from a single parent file before replaying them into the temporary file.  To
68  * save memory, the live filesystem scan reuses the findparent object.  Parent
69  * pointer repair chooses either directory scanning or findparent, but not
70  * both.
71  *
72  * When salvaging completes, the remaining stashed entries are replayed to the
73  * temporary file.  All non-parent pointer extended attributes are copied to
74  * the temporary file's extended attributes.  An atomic file mapping exchange
75  * is used to commit the new xattr blocks to the file being repaired.  This
76  * will disrupt attrmulti cursors.
77  */
78 
79 /* Create a parent pointer in the tempfile. */
80 #define XREP_PPTR_ADD		(1)
81 
82 /* Remove a parent pointer from the tempfile. */
83 #define XREP_PPTR_REMOVE	(2)
84 
85 /* A stashed parent pointer update. */
86 struct xrep_pptr {
87 	/* Cookie for retrieval of the pptr name. */
88 	xfblob_cookie		name_cookie;
89 
90 	/* Parent pointer record. */
91 	struct xfs_parent_rec	pptr_rec;
92 
93 	/* Length of the pptr name. */
94 	uint8_t			namelen;
95 
96 	/* XREP_PPTR_{ADD,REMOVE} */
97 	uint8_t			action;
98 };
99 
100 /*
101  * Stash up to 8 pages of recovered parent pointers in pptr_recs and
102  * pptr_names before we write them to the temp file.
103  */
104 #define XREP_PARENT_MAX_STASH_BYTES	(PAGE_SIZE * 8)
105 
106 struct xrep_parent {
107 	struct xfs_scrub	*sc;
108 
109 	/* Fixed-size array of xrep_pptr structures. */
110 	struct xfarray		*pptr_recs;
111 
112 	/* Blobs containing parent pointer names. */
113 	struct xfblob		*pptr_names;
114 
115 	/* xattr keys */
116 	struct xfarray		*xattr_records;
117 
118 	/* xattr values */
119 	struct xfblob		*xattr_blobs;
120 
121 	/* Scratch buffers for saving extended attributes */
122 	unsigned char		*xattr_name;
123 	void			*xattr_value;
124 	unsigned int		xattr_value_sz;
125 
126 	/*
127 	 * Information used to exchange the attr fork mappings, if the fs
128 	 * supports parent pointers.
129 	 */
130 	struct xrep_tempexch	tx;
131 
132 	/*
133 	 * Information used to scan the filesystem to find the inumber of the
134 	 * dotdot entry for this directory.  On filesystems without parent
135 	 * pointers, we use the findparent_* functions on this object and
136 	 * access only the parent_ino field directly.
137 	 *
138 	 * When parent pointers are enabled, the directory entry scanner uses
139 	 * the iscan, hooks, and lock fields of this object directly.
140 	 * @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and
141 	 * pptr_scratch.  This reduces the memory requirements of this
142 	 * structure.
143 	 *
144 	 * The lock also controls access to xattr_records and xattr_blobs(?)
145 	 */
146 	struct xrep_parent_scan_info pscan;
147 
148 	/* Orphanage reparenting request. */
149 	struct xrep_adoption	adoption;
150 
151 	/* Directory entry name, plus the trailing null. */
152 	struct xfs_name		xname;
153 	unsigned char		namebuf[MAXNAMELEN];
154 
155 	/* Scratch buffer for scanning pptr xattrs */
156 	struct xfs_da_args	pptr_args;
157 
158 	/* Have we seen any live updates of parent pointers recently? */
159 	bool			saw_pptr_updates;
160 
161 	/* Number of parents we found after all other repairs */
162 	unsigned long long	parents;
163 };
164 
165 struct xrep_parent_xattr {
166 	/* Cookie for retrieval of the xattr name. */
167 	xfblob_cookie		name_cookie;
168 
169 	/* Cookie for retrieval of the xattr value. */
170 	xfblob_cookie		value_cookie;
171 
172 	/* XFS_ATTR_* flags */
173 	int			flags;
174 
175 	/* Length of the value and name. */
176 	uint32_t		valuelen;
177 	uint16_t		namelen;
178 };
179 
180 /*
181  * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
182  * them to the temp file.
183  */
184 #define XREP_PARENT_XATTR_MAX_STASH_BYTES	(PAGE_SIZE * 8)
185 
186 /* Tear down all the incore stuff we created. */
187 static void
xrep_parent_teardown(struct xrep_parent * rp)188 xrep_parent_teardown(
189 	struct xrep_parent	*rp)
190 {
191 	xrep_findparent_scan_teardown(&rp->pscan);
192 	kvfree(rp->xattr_name);
193 	rp->xattr_name = NULL;
194 	kvfree(rp->xattr_value);
195 	rp->xattr_value = NULL;
196 	if (rp->xattr_blobs)
197 		xfblob_destroy(rp->xattr_blobs);
198 	rp->xattr_blobs = NULL;
199 	if (rp->xattr_records)
200 		xfarray_destroy(rp->xattr_records);
201 	rp->xattr_records = NULL;
202 	if (rp->pptr_names)
203 		xfblob_destroy(rp->pptr_names);
204 	rp->pptr_names = NULL;
205 	if (rp->pptr_recs)
206 		xfarray_destroy(rp->pptr_recs);
207 	rp->pptr_recs = NULL;
208 }
209 
210 /* Set up for a parent repair. */
211 int
xrep_setup_parent(struct xfs_scrub * sc)212 xrep_setup_parent(
213 	struct xfs_scrub	*sc)
214 {
215 	struct xrep_parent	*rp;
216 	int			error;
217 
218 	xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
219 
220 	rp = kvzalloc(sizeof(struct xrep_parent), XCHK_GFP_FLAGS);
221 	if (!rp)
222 		return -ENOMEM;
223 	rp->sc = sc;
224 	rp->xname.name = rp->namebuf;
225 	sc->buf = rp;
226 
227 	error = xrep_tempfile_create(sc, S_IFREG);
228 	if (error)
229 		return error;
230 
231 	return xrep_orphanage_try_create(sc);
232 }
233 
234 /*
235  * Scan all files in the filesystem for a child dirent that we can turn into
236  * the dotdot entry for this directory.
237  */
238 STATIC int
xrep_parent_find_dotdot(struct xrep_parent * rp)239 xrep_parent_find_dotdot(
240 	struct xrep_parent	*rp)
241 {
242 	struct xfs_scrub	*sc = rp->sc;
243 	xfs_ino_t		ino;
244 	unsigned int		sick, checked;
245 	int			error;
246 
247 	/*
248 	 * Avoid sick directories.  There shouldn't be anyone else clearing the
249 	 * directory's sick status.
250 	 */
251 	xfs_inode_measure_sickness(sc->ip, &sick, &checked);
252 	if (sick & XFS_SICK_INO_DIR)
253 		return -EFSCORRUPTED;
254 
255 	ino = xrep_findparent_self_reference(sc);
256 	if (ino != NULLFSINO) {
257 		xrep_findparent_scan_finish_early(&rp->pscan, ino);
258 		return 0;
259 	}
260 
261 	/*
262 	 * Drop the ILOCK on this directory so that we can scan for the dotdot
263 	 * entry.  Figure out who is going to be the parent of this directory,
264 	 * then retake the ILOCK so that we can salvage directory entries.
265 	 */
266 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
267 
268 	/* Does the VFS dcache have an answer for us? */
269 	ino = xrep_findparent_from_dcache(sc);
270 	if (ino != NULLFSINO) {
271 		error = xrep_findparent_confirm(sc, &ino);
272 		if (!error && ino != NULLFSINO) {
273 			xrep_findparent_scan_finish_early(&rp->pscan, ino);
274 			goto out_relock;
275 		}
276 	}
277 
278 	/* Scan the entire filesystem for a parent. */
279 	error = xrep_findparent_scan(&rp->pscan);
280 out_relock:
281 	xchk_ilock(sc, XFS_ILOCK_EXCL);
282 
283 	return error;
284 }
285 
286 /*
287  * Add this stashed incore parent pointer to the temporary file.
288  * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
289  * must not be in transaction context.
290  */
291 STATIC int
xrep_parent_replay_update(struct xrep_parent * rp,const struct xfs_name * xname,struct xrep_pptr * pptr)292 xrep_parent_replay_update(
293 	struct xrep_parent	*rp,
294 	const struct xfs_name	*xname,
295 	struct xrep_pptr	*pptr)
296 {
297 	struct xfs_scrub	*sc = rp->sc;
298 
299 	switch (pptr->action) {
300 	case XREP_PPTR_ADD:
301 		/* Create parent pointer. */
302 		trace_xrep_parent_replay_parentadd(sc->tempip, xname,
303 				&pptr->pptr_rec);
304 
305 		return xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
306 				&pptr->pptr_rec, &rp->pptr_args);
307 	case XREP_PPTR_REMOVE:
308 		/* Remove parent pointer. */
309 		trace_xrep_parent_replay_parentremove(sc->tempip, xname,
310 				&pptr->pptr_rec);
311 
312 		return xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
313 				&pptr->pptr_rec, &rp->pptr_args);
314 	}
315 
316 	ASSERT(0);
317 	return -EIO;
318 }
319 
320 /*
321  * Flush stashed parent pointer updates that have been recorded by the scanner.
322  * This is done to reduce the memory requirements of the parent pointer
323  * rebuild, since files can have a lot of hardlinks and the fs can be busy.
324  *
325  * Caller must not hold transactions or ILOCKs.  Caller must hold the tempfile
326  * IOLOCK.
327  */
328 STATIC int
xrep_parent_replay_updates(struct xrep_parent * rp)329 xrep_parent_replay_updates(
330 	struct xrep_parent	*rp)
331 {
332 	xfarray_idx_t		array_cur;
333 	int			error;
334 
335 	mutex_lock(&rp->pscan.lock);
336 	foreach_xfarray_idx(rp->pptr_recs, array_cur) {
337 		struct xrep_pptr	pptr;
338 
339 		error = xfarray_load(rp->pptr_recs, array_cur, &pptr);
340 		if (error)
341 			goto out_unlock;
342 
343 		error = xfblob_loadname(rp->pptr_names, pptr.name_cookie,
344 				&rp->xname, pptr.namelen);
345 		if (error)
346 			goto out_unlock;
347 		rp->xname.len = pptr.namelen;
348 		mutex_unlock(&rp->pscan.lock);
349 
350 		error = xrep_parent_replay_update(rp, &rp->xname, &pptr);
351 		if (error)
352 			return error;
353 
354 		mutex_lock(&rp->pscan.lock);
355 	}
356 
357 	/* Empty out both arrays now that we've added the entries. */
358 	xfarray_truncate(rp->pptr_recs);
359 	xfblob_truncate(rp->pptr_names);
360 	mutex_unlock(&rp->pscan.lock);
361 	return 0;
362 out_unlock:
363 	mutex_unlock(&rp->pscan.lock);
364 	return error;
365 }
366 
367 /*
368  * Remember that we want to create a parent pointer in the tempfile.  These
369  * stashed actions will be replayed later.
370  */
371 STATIC int
xrep_parent_stash_parentadd(struct xrep_parent * rp,const struct xfs_name * name,const struct xfs_inode * dp)372 xrep_parent_stash_parentadd(
373 	struct xrep_parent	*rp,
374 	const struct xfs_name	*name,
375 	const struct xfs_inode	*dp)
376 {
377 	struct xrep_pptr	pptr = {
378 		.action		= XREP_PPTR_ADD,
379 		.namelen	= name->len,
380 	};
381 	int			error;
382 
383 	trace_xrep_parent_stash_parentadd(rp->sc->tempip, dp, name);
384 
385 	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
386 	error = xfblob_storename(rp->pptr_names, &pptr.name_cookie, name);
387 	if (error)
388 		return error;
389 
390 	return xfarray_append(rp->pptr_recs, &pptr);
391 }
392 
393 /*
394  * Remember that we want to remove a parent pointer from the tempfile.  These
395  * stashed actions will be replayed later.
396  */
397 STATIC int
xrep_parent_stash_parentremove(struct xrep_parent * rp,const struct xfs_name * name,const struct xfs_inode * dp)398 xrep_parent_stash_parentremove(
399 	struct xrep_parent	*rp,
400 	const struct xfs_name	*name,
401 	const struct xfs_inode	*dp)
402 {
403 	struct xrep_pptr	pptr = {
404 		.action		= XREP_PPTR_REMOVE,
405 		.namelen	= name->len,
406 	};
407 	int			error;
408 
409 	trace_xrep_parent_stash_parentremove(rp->sc->tempip, dp, name);
410 
411 	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
412 	error = xfblob_storename(rp->pptr_names, &pptr.name_cookie, name);
413 	if (error)
414 		return error;
415 
416 	return xfarray_append(rp->pptr_recs, &pptr);
417 }
418 
419 /*
420  * Examine an entry of a directory.  If this dirent leads us back to the file
421  * whose parent pointers we're rebuilding, add a pptr to the temporary
422  * directory.
423  */
424 STATIC int
xrep_parent_scan_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)425 xrep_parent_scan_dirent(
426 	struct xfs_scrub	*sc,
427 	struct xfs_inode	*dp,
428 	xfs_dir2_dataptr_t	dapos,
429 	const struct xfs_name	*name,
430 	xfs_ino_t		ino,
431 	void			*priv)
432 {
433 	struct xrep_parent	*rp = priv;
434 	int			error;
435 
436 	/* Dirent doesn't point to this directory. */
437 	if (ino != rp->sc->ip->i_ino)
438 		return 0;
439 
440 	/* No weird looking names. */
441 	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
442 		return -EFSCORRUPTED;
443 
444 	/* No mismatching ftypes. */
445 	if (name->type != xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode))
446 		return -EFSCORRUPTED;
447 
448 	/* Don't pick up dot or dotdot entries; we only want child dirents. */
449 	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
450 	    xfs_dir2_samename(name, &xfs_name_dot))
451 		return 0;
452 
453 	/*
454 	 * Transform this dirent into a parent pointer and queue it for later
455 	 * addition to the temporary file.
456 	 */
457 	mutex_lock(&rp->pscan.lock);
458 	error = xrep_parent_stash_parentadd(rp, name, dp);
459 	mutex_unlock(&rp->pscan.lock);
460 	return error;
461 }
462 
463 /*
464  * Decide if we want to look for dirents in this directory.  Skip the file
465  * being repaired and any files being used to stage repairs.
466  */
467 static inline bool
xrep_parent_want_scan(struct xrep_parent * rp,const struct xfs_inode * ip)468 xrep_parent_want_scan(
469 	struct xrep_parent	*rp,
470 	const struct xfs_inode	*ip)
471 {
472 	return ip != rp->sc->ip && !xrep_is_tempfile(ip);
473 }
474 
475 /*
476  * Take ILOCK on a file that we want to scan.
477  *
478  * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt.
479  * Otherwise, take ILOCK_SHARED.
480  */
481 static inline unsigned int
xrep_parent_scan_ilock(struct xrep_parent * rp,struct xfs_inode * ip)482 xrep_parent_scan_ilock(
483 	struct xrep_parent	*rp,
484 	struct xfs_inode	*ip)
485 {
486 	uint			lock_mode = XFS_ILOCK_SHARED;
487 
488 	/* Still need to take the shared ILOCK to advance the iscan cursor. */
489 	if (!xrep_parent_want_scan(rp, ip))
490 		goto lock;
491 
492 	if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
493 		lock_mode = XFS_ILOCK_EXCL;
494 		goto lock;
495 	}
496 
497 lock:
498 	xfs_ilock(ip, lock_mode);
499 	return lock_mode;
500 }
501 
502 /*
503  * Scan this file for relevant child dirents that point to the file whose
504  * parent pointers we're rebuilding.
505  */
506 STATIC int
xrep_parent_scan_file(struct xrep_parent * rp,struct xfs_inode * ip)507 xrep_parent_scan_file(
508 	struct xrep_parent	*rp,
509 	struct xfs_inode	*ip)
510 {
511 	unsigned int		lock_mode;
512 	int			error = 0;
513 
514 	lock_mode = xrep_parent_scan_ilock(rp, ip);
515 
516 	if (!xrep_parent_want_scan(rp, ip))
517 		goto scan_done;
518 
519 	if (S_ISDIR(VFS_I(ip)->i_mode)) {
520 		/*
521 		 * If the directory looks as though it has been zapped by the
522 		 * inode record repair code, we cannot scan for child dirents.
523 		 */
524 		if (xchk_dir_looks_zapped(ip)) {
525 			error = -EBUSY;
526 			goto scan_done;
527 		}
528 
529 		error = xchk_dir_walk(rp->sc, ip, xrep_parent_scan_dirent, rp);
530 		if (error)
531 			goto scan_done;
532 	}
533 
534 scan_done:
535 	xchk_iscan_mark_visited(&rp->pscan.iscan, ip);
536 	xfs_iunlock(ip, lock_mode);
537 	return error;
538 }
539 
540 /* Decide if we've stashed too much pptr data in memory. */
541 static inline bool
xrep_parent_want_flush_stashed(struct xrep_parent * rp)542 xrep_parent_want_flush_stashed(
543 	struct xrep_parent	*rp)
544 {
545 	unsigned long long	bytes;
546 
547 	bytes = xfarray_bytes(rp->pptr_recs) + xfblob_bytes(rp->pptr_names);
548 	return bytes > XREP_PARENT_MAX_STASH_BYTES;
549 }
550 
551 /*
552  * Scan all directories in the filesystem to look for dirents that we can turn
553  * into parent pointers.
554  */
555 STATIC int
xrep_parent_scan_dirtree(struct xrep_parent * rp)556 xrep_parent_scan_dirtree(
557 	struct xrep_parent	*rp)
558 {
559 	struct xfs_scrub	*sc = rp->sc;
560 	struct xfs_inode	*ip;
561 	int			error;
562 
563 	/*
564 	 * Filesystem scans are time consuming.  Drop the file ILOCK and all
565 	 * other resources for the duration of the scan and hope for the best.
566 	 * The live update hooks will keep our scan information up to date.
567 	 */
568 	xchk_trans_cancel(sc);
569 	if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
570 		xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
571 						    XFS_ILOCK_EXCL));
572 	xchk_trans_alloc_empty(sc);
573 
574 	while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) {
575 		bool		flush;
576 
577 		error = xrep_parent_scan_file(rp, ip);
578 		xchk_irele(sc, ip);
579 		if (error)
580 			break;
581 
582 		/* Flush stashed pptr updates to constrain memory usage. */
583 		mutex_lock(&rp->pscan.lock);
584 		flush = xrep_parent_want_flush_stashed(rp);
585 		mutex_unlock(&rp->pscan.lock);
586 		if (flush) {
587 			xchk_trans_cancel(sc);
588 
589 			error = xrep_tempfile_iolock_polled(sc);
590 			if (error)
591 				break;
592 
593 			error = xrep_parent_replay_updates(rp);
594 			xrep_tempfile_iounlock(sc);
595 			if (error)
596 				break;
597 
598 			xchk_trans_alloc_empty(sc);
599 		}
600 
601 		if (xchk_should_terminate(sc, &error))
602 			break;
603 	}
604 	xchk_iscan_iter_finish(&rp->pscan.iscan);
605 	if (error) {
606 		/*
607 		 * If we couldn't grab an inode that was busy with a state
608 		 * change, change the error code so that we exit to userspace
609 		 * as quickly as possible.
610 		 */
611 		if (error == -EBUSY)
612 			return -ECANCELED;
613 		return error;
614 	}
615 
616 	/*
617 	 * Retake sc->ip's ILOCK now that we're done flushing stashed parent
618 	 * pointers.  We end this function with an empty transaction and the
619 	 * ILOCK.
620 	 */
621 	xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
622 	return 0;
623 }
624 
625 /*
626  * Capture dirent updates being made by other threads which are relevant to the
627  * file being repaired.
628  */
629 STATIC int
xrep_parent_live_update(struct notifier_block * nb,unsigned long action,void * data)630 xrep_parent_live_update(
631 	struct notifier_block		*nb,
632 	unsigned long			action,
633 	void				*data)
634 {
635 	struct xfs_dir_update_params	*p = data;
636 	struct xrep_parent		*rp;
637 	struct xfs_scrub		*sc;
638 	int				error;
639 
640 	rp = container_of(nb, struct xrep_parent, pscan.dhook.dirent_hook.nb);
641 	sc = rp->sc;
642 
643 	/*
644 	 * This thread updated a dirent that points to the file that we're
645 	 * repairing, so stash the update for replay against the temporary
646 	 * file.
647 	 */
648 	if (p->ip->i_ino == sc->ip->i_ino &&
649 	    xchk_iscan_want_live_update(&rp->pscan.iscan, p->dp->i_ino)) {
650 		mutex_lock(&rp->pscan.lock);
651 		if (p->delta > 0)
652 			error = xrep_parent_stash_parentadd(rp, p->name, p->dp);
653 		else
654 			error = xrep_parent_stash_parentremove(rp, p->name,
655 					p->dp);
656 		if (!error)
657 			rp->saw_pptr_updates = true;
658 		mutex_unlock(&rp->pscan.lock);
659 		if (error)
660 			goto out_abort;
661 	}
662 
663 	return NOTIFY_DONE;
664 out_abort:
665 	xchk_iscan_abort(&rp->pscan.iscan);
666 	return NOTIFY_DONE;
667 }
668 
669 /* Reset a directory's dotdot entry, if needed. */
670 STATIC int
xrep_parent_reset_dotdot(struct xrep_parent * rp)671 xrep_parent_reset_dotdot(
672 	struct xrep_parent	*rp)
673 {
674 	struct xfs_scrub	*sc = rp->sc;
675 	xfs_ino_t		ino;
676 	unsigned int		spaceres;
677 	int			error = 0;
678 
679 	ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
680 
681 	error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &ino);
682 	if (error || ino == rp->pscan.parent_ino)
683 		return error;
684 
685 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
686 
687 	trace_xrep_parent_reset_dotdot(sc->ip, rp->pscan.parent_ino);
688 
689 	/*
690 	 * Reserve more space just in case we have to expand the dir.  We're
691 	 * allowed to exceed quota to repair inconsistent metadata.
692 	 */
693 	spaceres = xfs_rename_space_res(sc->mp, 0, false, xfs_name_dotdot.len,
694 			false);
695 	error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, spaceres, 0,
696 			true);
697 	if (error)
698 		return error;
699 
700 	error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
701 			rp->pscan.parent_ino, spaceres);
702 	if (error)
703 		return error;
704 
705 	/*
706 	 * Roll transaction to detach the inode from the transaction but retain
707 	 * ILOCK_EXCL.
708 	 */
709 	return xfs_trans_roll(&sc->tp);
710 }
711 
712 /* Pass back the parent inumber if this a parent pointer */
713 STATIC int
xrep_parent_lookup_pptr(struct xfs_scrub * sc,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,const void * value,unsigned int valuelen,void * priv)714 xrep_parent_lookup_pptr(
715 	struct xfs_scrub	*sc,
716 	struct xfs_inode	*ip,
717 	unsigned int		attr_flags,
718 	const unsigned char	*name,
719 	unsigned int		namelen,
720 	const void		*value,
721 	unsigned int		valuelen,
722 	void			*priv)
723 {
724 	xfs_ino_t		*inop = priv;
725 	xfs_ino_t		parent_ino;
726 	int			error;
727 
728 	if (!(attr_flags & XFS_ATTR_PARENT))
729 		return 0;
730 
731 	error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
732 			valuelen, &parent_ino, NULL);
733 	if (error)
734 		return error;
735 
736 	*inop = parent_ino;
737 	return -ECANCELED;
738 }
739 
740 /*
741  * Find the first parent of the scrub target by walking parent pointers for
742  * the purpose of deciding if we're going to move it to the orphanage.
743  * We don't care if the attr fork is zapped.
744  */
745 STATIC int
xrep_parent_lookup_pptrs(struct xfs_scrub * sc,xfs_ino_t * inop)746 xrep_parent_lookup_pptrs(
747 	struct xfs_scrub	*sc,
748 	xfs_ino_t		*inop)
749 {
750 	int			error;
751 
752 	*inop = NULLFSINO;
753 
754 	error = xchk_xattr_walk(sc, sc->ip, xrep_parent_lookup_pptr, NULL,
755 			inop);
756 	if (error && error != -ECANCELED)
757 		return error;
758 	return 0;
759 }
760 
761 /*
762  * Move the current file to the orphanage.
763  *
764  * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks.  Upon
765  * successful return, the scrub transaction will have enough extra reservation
766  * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
767  * orphanage; and both inodes will be ijoined.
768  */
769 STATIC int
xrep_parent_move_to_orphanage(struct xrep_parent * rp)770 xrep_parent_move_to_orphanage(
771 	struct xrep_parent	*rp)
772 {
773 	struct xfs_scrub	*sc = rp->sc;
774 	xfs_ino_t		orig_parent, new_parent;
775 	int			error;
776 
777 	if (S_ISDIR(VFS_I(sc->ip)->i_mode)) {
778 		/*
779 		 * We are about to drop the ILOCK on sc->ip to lock the
780 		 * orphanage and prepare for the adoption.  Therefore, look up
781 		 * the old dotdot entry for sc->ip so that we can compare it
782 		 * after we re-lock sc->ip.
783 		 */
784 		error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
785 				&orig_parent);
786 		if (error)
787 			return error;
788 	} else {
789 		/*
790 		 * We haven't dropped the ILOCK since we committed the new
791 		 * xattr structure (and hence the new parent pointer records),
792 		 * which means that the file cannot have been moved in the
793 		 * directory tree, and there are no parents.
794 		 */
795 		orig_parent = NULLFSINO;
796 	}
797 
798 	/*
799 	 * Drop the ILOCK on the scrub target and commit the transaction.
800 	 * Adoption computes its own resource requirements and gathers the
801 	 * necessary components.
802 	 */
803 	error = xrep_trans_commit(sc);
804 	if (error)
805 		return error;
806 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
807 
808 	/* If we can take the orphanage's iolock then we're ready to move. */
809 	if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
810 		xchk_iunlock(sc, sc->ilock_flags);
811 		error = xrep_orphanage_iolock_two(sc);
812 		if (error)
813 			return error;
814 	}
815 
816 	/* Grab transaction and ILOCK the two files. */
817 	error = xrep_adoption_trans_alloc(sc, &rp->adoption);
818 	if (error)
819 		return error;
820 
821 	error = xrep_adoption_compute_name(&rp->adoption, &rp->xname);
822 	if (error)
823 		return error;
824 
825 	/*
826 	 * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
827 	 * entry again.  If the parent changed or the child was unlinked while
828 	 * the child directory was unlocked, we don't need to move the child to
829 	 * the orphanage after all.  For a non-directory, we have to scan for
830 	 * the first parent pointer to see if one has been added.
831 	 */
832 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
833 		error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
834 				&new_parent);
835 	else
836 		error = xrep_parent_lookup_pptrs(sc, &new_parent);
837 	if (error)
838 		return error;
839 
840 	/*
841 	 * Attach to the orphanage if we still have a linked directory and it
842 	 * hasn't been moved.
843 	 */
844 	if (orig_parent == new_parent && VFS_I(sc->ip)->i_nlink > 0) {
845 		error = xrep_adoption_move(&rp->adoption);
846 		if (error)
847 			return error;
848 	}
849 
850 	/*
851 	 * Launder the scrub transaction so we can drop the orphanage ILOCK
852 	 * and IOLOCK.  Return holding the scrub target's ILOCK and IOLOCK.
853 	 */
854 	error = xrep_adoption_trans_roll(&rp->adoption);
855 	if (error)
856 		return error;
857 
858 	xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
859 	xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
860 	return 0;
861 }
862 
863 /* Ensure that the xattr value buffer is large enough. */
864 STATIC int
xrep_parent_alloc_xattr_value(struct xrep_parent * rp,size_t bufsize)865 xrep_parent_alloc_xattr_value(
866 	struct xrep_parent	*rp,
867 	size_t			bufsize)
868 {
869 	void			*new_val;
870 
871 	if (rp->xattr_value_sz >= bufsize)
872 		return 0;
873 
874 	if (rp->xattr_value) {
875 		kvfree(rp->xattr_value);
876 		rp->xattr_value = NULL;
877 		rp->xattr_value_sz = 0;
878 	}
879 
880 	new_val = kvmalloc(bufsize, XCHK_GFP_FLAGS);
881 	if (!new_val)
882 		return -ENOMEM;
883 
884 	rp->xattr_value = new_val;
885 	rp->xattr_value_sz = bufsize;
886 	return 0;
887 }
888 
889 /* Retrieve the (remote) value of a non-pptr xattr. */
890 STATIC int
xrep_parent_fetch_xattr_remote(struct xrep_parent * rp,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,unsigned int valuelen)891 xrep_parent_fetch_xattr_remote(
892 	struct xrep_parent	*rp,
893 	struct xfs_inode	*ip,
894 	unsigned int		attr_flags,
895 	const unsigned char	*name,
896 	unsigned int		namelen,
897 	unsigned int		valuelen)
898 {
899 	struct xfs_scrub	*sc = rp->sc;
900 	struct xfs_da_args	args = {
901 		.attr_filter	= attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
902 		.geo		= sc->mp->m_attr_geo,
903 		.whichfork	= XFS_ATTR_FORK,
904 		.dp		= ip,
905 		.name		= name,
906 		.namelen	= namelen,
907 		.trans		= sc->tp,
908 		.valuelen	= valuelen,
909 		.owner		= ip->i_ino,
910 	};
911 	int			error;
912 
913 	/*
914 	 * If we need a larger value buffer, try to allocate one.  If that
915 	 * fails, return with -EDEADLOCK to try harder.
916 	 */
917 	error = xrep_parent_alloc_xattr_value(rp, valuelen);
918 	if (error == -ENOMEM)
919 		return -EDEADLOCK;
920 	if (error)
921 		return error;
922 
923 	args.value = rp->xattr_value;
924 	xfs_attr_sethash(&args);
925 	return xfs_attr_get_ilocked(&args);
926 }
927 
928 /* Stash non-pptr attributes for later replay into the temporary file. */
929 STATIC int
xrep_parent_stash_xattr(struct xfs_scrub * sc,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,const void * value,unsigned int valuelen,void * priv)930 xrep_parent_stash_xattr(
931 	struct xfs_scrub	*sc,
932 	struct xfs_inode	*ip,
933 	unsigned int		attr_flags,
934 	const unsigned char	*name,
935 	unsigned int		namelen,
936 	const void		*value,
937 	unsigned int		valuelen,
938 	void			*priv)
939 {
940 	struct xrep_parent_xattr key = {
941 		.valuelen	= valuelen,
942 		.namelen	= namelen,
943 		.flags		= attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
944 	};
945 	struct xrep_parent	*rp = priv;
946 	int			error;
947 
948 	if (attr_flags & (XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT))
949 		return 0;
950 
951 	if (!value) {
952 		error = xrep_parent_fetch_xattr_remote(rp, ip, attr_flags,
953 				name, namelen, valuelen);
954 		if (error)
955 			return error;
956 
957 		value = rp->xattr_value;
958 	}
959 
960 	trace_xrep_parent_stash_xattr(rp->sc->tempip, key.flags, (void *)name,
961 			key.namelen, key.valuelen);
962 
963 	error = xfblob_store(rp->xattr_blobs, &key.name_cookie, name,
964 			key.namelen);
965 	if (error)
966 		return error;
967 
968 	error = xfblob_store(rp->xattr_blobs, &key.value_cookie, value,
969 			key.valuelen);
970 	if (error)
971 		return error;
972 
973 	return xfarray_append(rp->xattr_records, &key);
974 }
975 
976 /* Insert one xattr key/value. */
977 STATIC int
xrep_parent_insert_xattr(struct xrep_parent * rp,const struct xrep_parent_xattr * key)978 xrep_parent_insert_xattr(
979 	struct xrep_parent		*rp,
980 	const struct xrep_parent_xattr	*key)
981 {
982 	struct xfs_da_args		args = {
983 		.dp			= rp->sc->tempip,
984 		.attr_filter		= key->flags,
985 		.namelen		= key->namelen,
986 		.valuelen		= key->valuelen,
987 		.owner			= rp->sc->ip->i_ino,
988 		.geo			= rp->sc->mp->m_attr_geo,
989 		.whichfork		= XFS_ATTR_FORK,
990 		.op_flags		= XFS_DA_OP_OKNOENT,
991 	};
992 	int				error;
993 
994 	ASSERT(!(key->flags & XFS_ATTR_PARENT));
995 
996 	/*
997 	 * Grab pointers to the scrub buffer so that we can use them to insert
998 	 * attrs into the temp file.
999 	 */
1000 	args.name = rp->xattr_name;
1001 	args.value = rp->xattr_value;
1002 
1003 	/*
1004 	 * The attribute name is stored near the end of the in-core buffer,
1005 	 * though we reserve one more byte to ensure null termination.
1006 	 */
1007 	rp->xattr_name[XATTR_NAME_MAX] = 0;
1008 
1009 	error = xfblob_load(rp->xattr_blobs, key->name_cookie, rp->xattr_name,
1010 			key->namelen);
1011 	if (error)
1012 		return error;
1013 
1014 	error = xfblob_free(rp->xattr_blobs, key->name_cookie);
1015 	if (error)
1016 		return error;
1017 
1018 	error = xfblob_load(rp->xattr_blobs, key->value_cookie, args.value,
1019 			key->valuelen);
1020 	if (error)
1021 		return error;
1022 
1023 	error = xfblob_free(rp->xattr_blobs, key->value_cookie);
1024 	if (error)
1025 		return error;
1026 
1027 	rp->xattr_name[key->namelen] = 0;
1028 
1029 	trace_xrep_parent_insert_xattr(rp->sc->tempip, key->flags,
1030 			rp->xattr_name, key->namelen, key->valuelen);
1031 
1032 	xfs_attr_sethash(&args);
1033 	return xfs_attr_set(&args, XFS_ATTRUPDATE_UPSERT, false);
1034 }
1035 
1036 /*
1037  * Periodically flush salvaged attributes to the temporary file.  This is done
1038  * to reduce the memory requirements of the xattr rebuild because files can
1039  * contain millions of attributes.
1040  */
1041 STATIC int
xrep_parent_flush_xattrs(struct xrep_parent * rp)1042 xrep_parent_flush_xattrs(
1043 	struct xrep_parent	*rp)
1044 {
1045 	xfarray_idx_t		array_cur;
1046 	int			error;
1047 
1048 	/*
1049 	 * Entering this function, the scrub context has a reference to the
1050 	 * inode being repaired, the temporary file, and the empty scrub
1051 	 * transaction that we created for the xattr scan.  We hold ILOCK_EXCL
1052 	 * on the inode being repaired.
1053 	 *
1054 	 * To constrain kernel memory use, we occasionally flush salvaged
1055 	 * xattrs from the xfarray and xfblob structures into the temporary
1056 	 * file in preparation for exchanging the xattr structures at the end.
1057 	 * Updating the temporary file requires a transaction, so we commit the
1058 	 * scrub transaction and drop the ILOCK so that xfs_attr_set can
1059 	 * allocate whatever transaction it wants.
1060 	 *
1061 	 * We still hold IOLOCK_EXCL on the inode being repaired, which
1062 	 * prevents anyone from adding xattrs (or parent pointers) while we're
1063 	 * flushing.
1064 	 */
1065 	xchk_trans_cancel(rp->sc);
1066 	xchk_iunlock(rp->sc, XFS_ILOCK_EXCL);
1067 
1068 	/*
1069 	 * Take the IOLOCK of the temporary file while we modify xattrs.  This
1070 	 * isn't strictly required because the temporary file is never revealed
1071 	 * to userspace, but we follow the same locking rules.  We still hold
1072 	 * sc->ip's IOLOCK.
1073 	 */
1074 	error = xrep_tempfile_iolock_polled(rp->sc);
1075 	if (error)
1076 		return error;
1077 
1078 	/* Add all the salvaged attrs to the temporary file. */
1079 	foreach_xfarray_idx(rp->xattr_records, array_cur) {
1080 		struct xrep_parent_xattr	key;
1081 
1082 		error = xfarray_load(rp->xattr_records, array_cur, &key);
1083 		if (error)
1084 			return error;
1085 
1086 		error = xrep_parent_insert_xattr(rp, &key);
1087 		if (error)
1088 			return error;
1089 	}
1090 
1091 	/* Empty out both arrays now that we've added the entries. */
1092 	xfarray_truncate(rp->xattr_records);
1093 	xfblob_truncate(rp->xattr_blobs);
1094 
1095 	xrep_tempfile_iounlock(rp->sc);
1096 
1097 	/* Recreate the empty transaction and relock the inode. */
1098 	xchk_trans_alloc_empty(rp->sc);
1099 	xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
1100 	return 0;
1101 }
1102 
1103 /* Decide if we've stashed too much xattr data in memory. */
1104 static inline bool
xrep_parent_want_flush_xattrs(struct xrep_parent * rp)1105 xrep_parent_want_flush_xattrs(
1106 	struct xrep_parent	*rp)
1107 {
1108 	unsigned long long	bytes;
1109 
1110 	bytes = xfarray_bytes(rp->xattr_records) +
1111 		xfblob_bytes(rp->xattr_blobs);
1112 	return bytes > XREP_PARENT_XATTR_MAX_STASH_BYTES;
1113 }
1114 
1115 /* Flush staged attributes to the temporary file if we're over the limit. */
1116 STATIC int
xrep_parent_try_flush_xattrs(struct xfs_scrub * sc,void * priv)1117 xrep_parent_try_flush_xattrs(
1118 	struct xfs_scrub	*sc,
1119 	void			*priv)
1120 {
1121 	struct xrep_parent	*rp = priv;
1122 	int			error;
1123 
1124 	if (!xrep_parent_want_flush_xattrs(rp))
1125 		return 0;
1126 
1127 	error = xrep_parent_flush_xattrs(rp);
1128 	if (error)
1129 		return error;
1130 
1131 	/*
1132 	 * If there were any parent pointer updates to the xattr structure
1133 	 * while we dropped the ILOCK, the xattr structure is now stale.
1134 	 * Signal to the attr copy process that we need to start over, but
1135 	 * this time without opportunistic attr flushing.
1136 	 *
1137 	 * This is unlikely to happen, so we're ok with restarting the copy.
1138 	 */
1139 	mutex_lock(&rp->pscan.lock);
1140 	if (rp->saw_pptr_updates)
1141 		error = -ESTALE;
1142 	mutex_unlock(&rp->pscan.lock);
1143 	return error;
1144 }
1145 
1146 /* Copy all the non-pptr extended attributes into the temporary file. */
1147 STATIC int
xrep_parent_copy_xattrs(struct xrep_parent * rp)1148 xrep_parent_copy_xattrs(
1149 	struct xrep_parent	*rp)
1150 {
1151 	struct xfs_scrub	*sc = rp->sc;
1152 	int			error;
1153 
1154 	/*
1155 	 * Clear the pptr updates flag.  We hold sc->ip ILOCKed, so there
1156 	 * can't be any parent pointer updates in progress.
1157 	 */
1158 	mutex_lock(&rp->pscan.lock);
1159 	rp->saw_pptr_updates = false;
1160 	mutex_unlock(&rp->pscan.lock);
1161 
1162 	/* Copy xattrs, stopping periodically to flush the incore buffers. */
1163 	error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr,
1164 			xrep_parent_try_flush_xattrs, rp);
1165 	if (error && error != -ESTALE)
1166 		return error;
1167 
1168 	if (error == -ESTALE) {
1169 		/*
1170 		 * The xattr copy collided with a parent pointer update.
1171 		 * Restart the copy, but this time hold the ILOCK all the way
1172 		 * to the end to lock out any directory parent pointer updates.
1173 		 */
1174 		error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr,
1175 				NULL, rp);
1176 		if (error)
1177 			return error;
1178 	}
1179 
1180 	/* Flush any remaining stashed xattrs to the temporary file. */
1181 	if (xfarray_bytes(rp->xattr_records) == 0)
1182 		return 0;
1183 
1184 	return xrep_parent_flush_xattrs(rp);
1185 }
1186 
1187 /*
1188  * Ensure that @sc->ip and @sc->tempip both have attribute forks before we head
1189  * into the attr fork exchange transaction.  All files on a filesystem with
1190  * parent pointers must have an attr fork because the parent pointer code does
1191  * not itself add attribute forks.
1192  *
1193  * Note: Unlinkable unlinked files don't need one, but the overhead of having
1194  * an unnecessary attr fork is not justified by the additional code complexity
1195  * that would be needed to track that state correctly.
1196  */
1197 STATIC int
xrep_parent_ensure_attr_fork(struct xrep_parent * rp)1198 xrep_parent_ensure_attr_fork(
1199 	struct xrep_parent	*rp)
1200 {
1201 	struct xfs_scrub	*sc = rp->sc;
1202 	int			error;
1203 
1204 	error = xfs_attr_add_fork(sc->tempip,
1205 			sizeof(struct xfs_attr_sf_hdr), 1);
1206 	if (error)
1207 		return error;
1208 	return xfs_attr_add_fork(sc->ip, sizeof(struct xfs_attr_sf_hdr), 1);
1209 }
1210 
1211 /*
1212  * Finish replaying stashed parent pointer updates, allocate a transaction for
1213  * exchanging extent mappings, and take the ILOCKs of both files before we
1214  * commit the new attribute structure.
1215  */
1216 STATIC int
xrep_parent_finalize_tempfile(struct xrep_parent * rp)1217 xrep_parent_finalize_tempfile(
1218 	struct xrep_parent	*rp)
1219 {
1220 	struct xfs_scrub	*sc = rp->sc;
1221 	int			error;
1222 
1223 	/*
1224 	 * Repair relies on the ILOCK to quiesce all possible xattr updates.
1225 	 * Replay all queued parent pointer updates into the tempfile before
1226 	 * exchanging the contents, even if that means dropping the ILOCKs and
1227 	 * the transaction.
1228 	 */
1229 	do {
1230 		error = xrep_parent_replay_updates(rp);
1231 		if (error)
1232 			return error;
1233 
1234 		error = xrep_parent_ensure_attr_fork(rp);
1235 		if (error)
1236 			return error;
1237 
1238 		error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rp->tx);
1239 		if (error)
1240 			return error;
1241 
1242 		if (xfarray_length(rp->pptr_recs) == 0)
1243 			break;
1244 
1245 		xchk_trans_cancel(sc);
1246 		xrep_tempfile_iunlock_both(sc);
1247 	} while (!xchk_should_terminate(sc, &error));
1248 	return error;
1249 }
1250 
1251 /*
1252  * Replay all the stashed parent pointers into the temporary file, copy all
1253  * the non-pptr xattrs from the file being repaired into the temporary file,
1254  * and exchange the attr fork contents atomically.
1255  */
1256 STATIC int
xrep_parent_rebuild_pptrs(struct xrep_parent * rp)1257 xrep_parent_rebuild_pptrs(
1258 	struct xrep_parent	*rp)
1259 {
1260 	struct xfs_scrub	*sc = rp->sc;
1261 	xfs_ino_t		parent_ino = NULLFSINO;
1262 	int			error;
1263 
1264 	/*
1265 	 * Copy non-ppttr xattrs from the file being repaired into the
1266 	 * temporary file's xattr structure.  We hold sc->ip's IOLOCK, which
1267 	 * prevents setxattr/removexattr calls from occurring, but renames
1268 	 * update the parent pointers without holding IOLOCK.  If we detect
1269 	 * stale attr structures, we restart the scan but only flush at the
1270 	 * end.
1271 	 */
1272 	error = xrep_parent_copy_xattrs(rp);
1273 	if (error)
1274 		return error;
1275 
1276 	/*
1277 	 * Cancel the empty transaction that we used to walk and copy attrs,
1278 	 * and drop the ILOCK so that we can take the IOLOCK on the temporary
1279 	 * file.  We still hold sc->ip's IOLOCK.
1280 	 */
1281 	xchk_trans_cancel(sc);
1282 	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1283 
1284 	error = xrep_tempfile_iolock_polled(sc);
1285 	if (error)
1286 		return error;
1287 
1288 	/*
1289 	 * Allocate transaction, lock inodes, and make sure that we've replayed
1290 	 * all the stashed pptr updates to the tempdir.  After this point,
1291 	 * we're ready to exchange the attr fork mappings.
1292 	 */
1293 	error = xrep_parent_finalize_tempfile(rp);
1294 	if (error)
1295 		return error;
1296 
1297 	/* Last chance to abort before we start committing pptr fixes. */
1298 	if (xchk_should_terminate(sc, &error))
1299 		return error;
1300 
1301 	if (xchk_iscan_aborted(&rp->pscan.iscan))
1302 		return -ECANCELED;
1303 
1304 	/*
1305 	 * Exchange the attr fork contents and junk the old attr fork contents,
1306 	 * which are now in the tempfile.
1307 	 */
1308 	error = xrep_xattr_swap(sc, &rp->tx);
1309 	if (error)
1310 		return error;
1311 	error = xrep_xattr_reset_tempfile_fork(sc);
1312 	if (error)
1313 		return error;
1314 
1315 	/*
1316 	 * Roll to get a transaction without any inodes joined to it.  Then we
1317 	 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1318 	 * the scrub target file.
1319 	 */
1320 	error = xfs_trans_roll(&sc->tp);
1321 	if (error)
1322 		return error;
1323 	xrep_tempfile_iunlock(sc);
1324 	xrep_tempfile_iounlock(sc);
1325 
1326 	/*
1327 	 * We've committed the new parent pointers.  Find at least one parent
1328 	 * so that we can decide if we're moving this file to the orphanage.
1329 	 * For this purpose, root directories are their own parents.
1330 	 */
1331 	if (xchk_inode_is_dirtree_root(sc->ip)) {
1332 		xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino);
1333 	} else {
1334 		error = xrep_parent_lookup_pptrs(sc, &parent_ino);
1335 		if (error)
1336 			return error;
1337 		if (parent_ino != NULLFSINO)
1338 			xrep_findparent_scan_found(&rp->pscan, parent_ino);
1339 	}
1340 	return 0;
1341 }
1342 
1343 /*
1344  * Commit the new parent pointer structure (currently only the dotdot entry) to
1345  * the file that we're repairing.
1346  */
1347 STATIC int
xrep_parent_rebuild_tree(struct xrep_parent * rp)1348 xrep_parent_rebuild_tree(
1349 	struct xrep_parent	*rp)
1350 {
1351 	struct xfs_scrub	*sc = rp->sc;
1352 	bool			try_adoption;
1353 	int			error;
1354 
1355 	if (xfs_has_parent(sc->mp)) {
1356 		error = xrep_parent_rebuild_pptrs(rp);
1357 		if (error)
1358 			return error;
1359 	}
1360 
1361 	/*
1362 	 * Any file with no parent could be adopted.  This check happens after
1363 	 * rebuilding the parent pointer structure because we might have cycled
1364 	 * the ILOCK during that process.
1365 	 */
1366 	try_adoption = rp->pscan.parent_ino == NULLFSINO;
1367 
1368 	/*
1369 	 * Starting with metadir, we allow checking of parent pointers
1370 	 * of non-directory files that are children of the superblock.
1371 	 * Lack of parent is ok here.
1372 	 */
1373 	if (try_adoption && xfs_has_metadir(sc->mp) &&
1374 	    xchk_inode_is_sb_rooted(sc->ip))
1375 		try_adoption = false;
1376 
1377 	if (try_adoption) {
1378 		if (xrep_orphanage_can_adopt(sc))
1379 			return xrep_parent_move_to_orphanage(rp);
1380 		return -EFSCORRUPTED;
1381 
1382 	}
1383 
1384 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1385 		return xrep_parent_reset_dotdot(rp);
1386 
1387 	return 0;
1388 }
1389 
1390 /* Count the number of parent pointers. */
1391 STATIC int
xrep_parent_count_pptr(struct xfs_scrub * sc,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,const void * value,unsigned int valuelen,void * priv)1392 xrep_parent_count_pptr(
1393 	struct xfs_scrub	*sc,
1394 	struct xfs_inode	*ip,
1395 	unsigned int		attr_flags,
1396 	const unsigned char	*name,
1397 	unsigned int		namelen,
1398 	const void		*value,
1399 	unsigned int		valuelen,
1400 	void			*priv)
1401 {
1402 	struct xrep_parent	*rp = priv;
1403 	int			error;
1404 
1405 	if (!(attr_flags & XFS_ATTR_PARENT))
1406 		return 0;
1407 
1408 	error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
1409 			valuelen, NULL, NULL);
1410 	if (error)
1411 		return error;
1412 
1413 	rp->parents++;
1414 	return 0;
1415 }
1416 
1417 /*
1418  * After all parent pointer rebuilding and adoption activity completes, reset
1419  * the link count of this nondirectory, having scanned the fs to rebuild all
1420  * parent pointers.
1421  */
1422 STATIC int
xrep_parent_set_nondir_nlink(struct xrep_parent * rp)1423 xrep_parent_set_nondir_nlink(
1424 	struct xrep_parent	*rp)
1425 {
1426 	struct xfs_scrub	*sc = rp->sc;
1427 	struct xfs_inode	*ip = sc->ip;
1428 	struct xfs_perag	*pag;
1429 	bool			joined = false;
1430 	int			error;
1431 
1432 	/* Count parent pointers so we can reset the file link count. */
1433 	rp->parents = 0;
1434 	error = xchk_xattr_walk(sc, ip, xrep_parent_count_pptr, NULL, rp);
1435 	if (error)
1436 		return error;
1437 
1438 	/*
1439 	 * Starting with metadir, we allow checking of parent pointers of
1440 	 * non-directory files that are children of the superblock.  Pretend
1441 	 * that we found a parent pointer attr.
1442 	 */
1443 	if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip))
1444 		rp->parents++;
1445 
1446 	if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) {
1447 		xfs_trans_ijoin(sc->tp, sc->ip, 0);
1448 		joined = true;
1449 
1450 		/*
1451 		 * The file is on the unlinked list but we found parents.
1452 		 * Remove the file from the unlinked list.
1453 		 */
1454 		pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, ip->i_ino));
1455 		if (!pag) {
1456 			ASSERT(0);
1457 			return -EFSCORRUPTED;
1458 		}
1459 
1460 		error = xfs_iunlink_remove(sc->tp, pag, ip);
1461 		xfs_perag_put(pag);
1462 		if (error)
1463 			return error;
1464 	} else if (rp->parents == 0 && !xfs_inode_on_unlinked_list(ip)) {
1465 		xfs_trans_ijoin(sc->tp, sc->ip, 0);
1466 		joined = true;
1467 
1468 		/*
1469 		 * The file is not on the unlinked list but we found no
1470 		 * parents.  Add the file to the unlinked list.
1471 		 */
1472 		error = xfs_iunlink(sc->tp, ip);
1473 		if (error)
1474 			return error;
1475 	}
1476 
1477 	/* Set the correct link count. */
1478 	if (VFS_I(ip)->i_nlink != rp->parents) {
1479 		if (!joined) {
1480 			xfs_trans_ijoin(sc->tp, sc->ip, 0);
1481 			joined = true;
1482 		}
1483 
1484 		set_nlink(VFS_I(ip), min_t(unsigned long long, rp->parents,
1485 					   XFS_NLINK_PINNED));
1486 	}
1487 
1488 	/* Log the inode to keep it moving forward if we dirtied anything. */
1489 	if (joined)
1490 		xfs_trans_log_inode(sc->tp, ip, XFS_ILOG_CORE);
1491 	return 0;
1492 }
1493 
1494 /* Set up the filesystem scan so we can look for parents. */
1495 STATIC int
xrep_parent_setup_scan(struct xrep_parent * rp)1496 xrep_parent_setup_scan(
1497 	struct xrep_parent	*rp)
1498 {
1499 	struct xfs_scrub	*sc = rp->sc;
1500 	char			*descr;
1501 	struct xfs_da_geometry	*geo = sc->mp->m_attr_geo;
1502 	int			max_len;
1503 	int			error;
1504 
1505 	if (!xfs_has_parent(sc->mp))
1506 		return xrep_findparent_scan_start(sc, &rp->pscan);
1507 
1508 	/* Buffers for copying non-pptr attrs to the tempfile */
1509 	rp->xattr_name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS);
1510 	if (!rp->xattr_name)
1511 		return -ENOMEM;
1512 
1513 	/*
1514 	 * Allocate enough memory to handle loading local attr values from the
1515 	 * xfblob data while flushing stashed attrs to the temporary file.
1516 	 * We only realloc the buffer when salvaging remote attr values, so
1517 	 * TRY_HARDER means we allocate the maximal attr value size.
1518 	 */
1519 	if (sc->flags & XCHK_TRY_HARDER)
1520 		max_len = XATTR_SIZE_MAX;
1521 	else
1522 		max_len = xfs_attr_leaf_entsize_local_max(geo->blksize);
1523 	error = xrep_parent_alloc_xattr_value(rp, max_len);
1524 	if (error)
1525 		goto out_xattr_name;
1526 
1527 	/* Set up some staging memory for logging parent pointer updates. */
1528 	descr = xchk_xfile_ino_descr(sc, "parent pointer entries");
1529 	error = xfarray_create(descr, 0, sizeof(struct xrep_pptr),
1530 			&rp->pptr_recs);
1531 	kfree(descr);
1532 	if (error)
1533 		goto out_xattr_value;
1534 
1535 	descr = xchk_xfile_ino_descr(sc, "parent pointer names");
1536 	error = xfblob_create(descr, &rp->pptr_names);
1537 	kfree(descr);
1538 	if (error)
1539 		goto out_recs;
1540 
1541 	/* Set up some storage for copying attrs before the mapping exchange */
1542 	descr = xchk_xfile_ino_descr(sc,
1543 				"parent pointer retained xattr entries");
1544 	error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr),
1545 			&rp->xattr_records);
1546 	kfree(descr);
1547 	if (error)
1548 		goto out_names;
1549 
1550 	descr = xchk_xfile_ino_descr(sc,
1551 				"parent pointer retained xattr values");
1552 	error = xfblob_create(descr, &rp->xattr_blobs);
1553 	kfree(descr);
1554 	if (error)
1555 		goto out_attr_keys;
1556 
1557 	error = __xrep_findparent_scan_start(sc, &rp->pscan,
1558 			xrep_parent_live_update);
1559 	if (error)
1560 		goto out_attr_values;
1561 
1562 	return 0;
1563 
1564 out_attr_values:
1565 	xfblob_destroy(rp->xattr_blobs);
1566 	rp->xattr_blobs = NULL;
1567 out_attr_keys:
1568 	xfarray_destroy(rp->xattr_records);
1569 	rp->xattr_records = NULL;
1570 out_names:
1571 	xfblob_destroy(rp->pptr_names);
1572 	rp->pptr_names = NULL;
1573 out_recs:
1574 	xfarray_destroy(rp->pptr_recs);
1575 	rp->pptr_recs = NULL;
1576 out_xattr_value:
1577 	kvfree(rp->xattr_value);
1578 	rp->xattr_value = NULL;
1579 out_xattr_name:
1580 	kvfree(rp->xattr_name);
1581 	rp->xattr_name = NULL;
1582 	return error;
1583 }
1584 
1585 int
xrep_parent(struct xfs_scrub * sc)1586 xrep_parent(
1587 	struct xfs_scrub	*sc)
1588 {
1589 	struct xrep_parent	*rp = sc->buf;
1590 	int			error;
1591 
1592 	/*
1593 	 * When the parent pointers feature is enabled, repairs are committed
1594 	 * by atomically committing a new xattr structure and reaping the old
1595 	 * attr fork.  Reaping requires rmap and exchange-range to be enabled.
1596 	 */
1597 	if (xfs_has_parent(sc->mp)) {
1598 		if (!xfs_has_rmapbt(sc->mp))
1599 			return -EOPNOTSUPP;
1600 		if (!xfs_has_exchange_range(sc->mp))
1601 			return -EOPNOTSUPP;
1602 	}
1603 
1604 	error = xrep_parent_setup_scan(rp);
1605 	if (error)
1606 		return error;
1607 
1608 	if (xfs_has_parent(sc->mp))
1609 		error = xrep_parent_scan_dirtree(rp);
1610 	else
1611 		error = xrep_parent_find_dotdot(rp);
1612 	if (error)
1613 		goto out_teardown;
1614 
1615 	/* Last chance to abort before we start committing dotdot fixes. */
1616 	if (xchk_should_terminate(sc, &error))
1617 		goto out_teardown;
1618 
1619 	error = xrep_parent_rebuild_tree(rp);
1620 	if (error)
1621 		goto out_teardown;
1622 	if (xfs_has_parent(sc->mp) && !S_ISDIR(VFS_I(sc->ip)->i_mode)) {
1623 		error = xrep_parent_set_nondir_nlink(rp);
1624 		if (error)
1625 			goto out_teardown;
1626 	}
1627 
1628 	error = xrep_defer_finish(sc);
1629 
1630 out_teardown:
1631 	xrep_parent_teardown(rp);
1632 	return error;
1633 }
1634