xref: /linux/fs/xfs/scrub/inode_repair.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_icache.h"
20 #include "xfs_inode_buf.h"
21 #include "xfs_inode_fork.h"
22 #include "xfs_ialloc.h"
23 #include "xfs_da_format.h"
24 #include "xfs_reflink.h"
25 #include "xfs_alloc.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_bmap.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_bmap_util.h"
31 #include "xfs_dir2.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_quota_defs.h"
34 #include "xfs_quota.h"
35 #include "xfs_ag.h"
36 #include "xfs_rtbitmap.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_log_priv.h"
39 #include "xfs_health.h"
40 #include "xfs_symlink_remote.h"
41 #include "scrub/xfs_scrub.h"
42 #include "scrub/scrub.h"
43 #include "scrub/common.h"
44 #include "scrub/btree.h"
45 #include "scrub/trace.h"
46 #include "scrub/repair.h"
47 #include "scrub/iscan.h"
48 #include "scrub/readdir.h"
49 #include "scrub/tempfile.h"
50 
51 /*
52  * Inode Record Repair
53  * ===================
54  *
55  * Roughly speaking, inode problems can be classified based on whether or not
56  * they trip the dinode verifiers.  If those trip, then we won't be able to
57  * xfs_iget ourselves the inode.
58  *
59  * Therefore, the xrep_dinode_* functions fix anything that will cause the
60  * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
61  * fix things on live incore inodes.  The inode repair functions make decisions
62  * with security and usability implications when reviving a file:
63  *
64  * - Files with zero di_mode or a garbage di_mode are converted to regular file
65  *   that only root can read.  This file may not actually contain user data,
66  *   if the file was not previously a regular file.  Setuid and setgid bits
67  *   are cleared.
68  *
69  * - Zero-size directories can be truncated to look empty.  It is necessary to
70  *   run the bmapbtd and directory repair functions to fully rebuild the
71  *   directory.
72  *
73  * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
74  *   to run the bmapbtd and symlink repair functions to salvage the symlink.
75  *
76  * - Invalid extent size hints will be removed.
77  *
78  * - Quotacheck will be scheduled if we repaired an inode that was so badly
79  *   damaged that the ondisk inode had to be rebuilt.
80  *
81  * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
82  *   Setuid and setgid bits are cleared.
83  *
84  * - Data and attr forks are reset to extents format with zero extents if the
85  *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
86  *   repair functions to recover the space mapping.
87  *
88  * - ACLs will not be recovered if the attr fork is zapped or the extended
89  *   attribute structure itself requires salvaging.
90  *
91  * - If the attr fork is zapped, the user and group ids are reset to root and
92  *   the setuid and setgid bits are removed.
93  */
94 
95 /*
96  * All the information we need to repair the ondisk inode if we can't iget the
97  * incore inode.  We don't allocate this buffer unless we're going to perform
98  * a repair to the ondisk inode cluster buffer.
99  */
100 struct xrep_inode {
101 	/* Inode mapping that we saved from the initial lookup attempt. */
102 	struct xfs_imap		imap;
103 
104 	struct xfs_scrub	*sc;
105 
106 	/* Blocks in use on the data device by data extents or bmbt blocks. */
107 	xfs_rfsblock_t		data_blocks;
108 
109 	/* Blocks in use on the rt device. */
110 	xfs_rfsblock_t		rt_blocks;
111 
112 	/* Blocks in use by the attr fork. */
113 	xfs_rfsblock_t		attr_blocks;
114 
115 	/* Number of data device extents for the data fork. */
116 	xfs_extnum_t		data_extents;
117 
118 	/*
119 	 * Number of realtime device extents for the data fork.  If
120 	 * data_extents and rt_extents indicate that the data fork has extents
121 	 * on both devices, we'll just back away slowly.
122 	 */
123 	xfs_extnum_t		rt_extents;
124 
125 	/* Number of (data device) extents for the attr fork. */
126 	xfs_aextnum_t		attr_extents;
127 
128 	/* Sick state to set after zapping parts of the inode. */
129 	unsigned int		ino_sick_mask;
130 
131 	/* Must we remove all access from this file? */
132 	bool			zap_acls;
133 
134 	/* Inode scanner to see if we can find the ftype from dirents */
135 	struct xchk_iscan	ftype_iscan;
136 	uint8_t			alleged_ftype;
137 };
138 
139 /*
140  * Setup function for inode repair.  @imap contains the ondisk inode mapping
141  * information so that we can correct the ondisk inode cluster buffer if
142  * necessary to make iget work.
143  */
144 int
xrep_setup_inode(struct xfs_scrub * sc,const struct xfs_imap * imap)145 xrep_setup_inode(
146 	struct xfs_scrub	*sc,
147 	const struct xfs_imap	*imap)
148 {
149 	struct xrep_inode	*ri;
150 
151 	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
152 	if (!sc->buf)
153 		return -ENOMEM;
154 
155 	ri = sc->buf;
156 	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
157 	ri->sc = sc;
158 	return 0;
159 }
160 
161 /*
162  * Make sure this ondisk inode can pass the inode buffer verifier.  This is
163  * not the same as the dinode verifier.
164  */
165 STATIC void
xrep_dinode_buf_core(struct xfs_scrub * sc,struct xfs_buf * bp,unsigned int ioffset)166 xrep_dinode_buf_core(
167 	struct xfs_scrub	*sc,
168 	struct xfs_buf		*bp,
169 	unsigned int		ioffset)
170 {
171 	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
172 	struct xfs_trans	*tp = sc->tp;
173 	struct xfs_mount	*mp = sc->mp;
174 	xfs_agino_t		agino;
175 	bool			crc_ok = false;
176 	bool			magic_ok = false;
177 	bool			unlinked_ok = false;
178 
179 	agino = be32_to_cpu(dip->di_next_unlinked);
180 
181 	if (xfs_verify_agino_or_null(bp->b_pag, agino))
182 		unlinked_ok = true;
183 
184 	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
185 	    xfs_dinode_good_version(mp, dip->di_version))
186 		magic_ok = true;
187 
188 	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
189 			XFS_DINODE_CRC_OFF))
190 		crc_ok = true;
191 
192 	if (magic_ok && unlinked_ok && crc_ok)
193 		return;
194 
195 	if (!magic_ok) {
196 		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
197 		dip->di_version = 3;
198 	}
199 	if (!unlinked_ok)
200 		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
201 	xfs_dinode_calc_crc(mp, dip);
202 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
203 	xfs_trans_log_buf(tp, bp, ioffset,
204 				  ioffset + sizeof(struct xfs_dinode) - 1);
205 }
206 
207 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
208 STATIC void
xrep_dinode_buf(struct xfs_scrub * sc,struct xfs_buf * bp)209 xrep_dinode_buf(
210 	struct xfs_scrub	*sc,
211 	struct xfs_buf		*bp)
212 {
213 	struct xfs_mount	*mp = sc->mp;
214 	int			i;
215 	int			ni;
216 
217 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
218 	for (i = 0; i < ni; i++)
219 		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
220 }
221 
222 /* Reinitialize things that never change in an inode. */
223 STATIC void
xrep_dinode_header(struct xfs_scrub * sc,struct xfs_dinode * dip)224 xrep_dinode_header(
225 	struct xfs_scrub	*sc,
226 	struct xfs_dinode	*dip)
227 {
228 	trace_xrep_dinode_header(sc, dip);
229 
230 	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
231 	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
232 		dip->di_version = 3;
233 	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
234 	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
235 	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
236 }
237 
238 /*
239  * If this directory entry points to the scrub target inode, then the directory
240  * we're scanning is the parent of the scrub target inode.
241  */
242 STATIC int
xrep_dinode_findmode_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)243 xrep_dinode_findmode_dirent(
244 	struct xfs_scrub		*sc,
245 	struct xfs_inode		*dp,
246 	xfs_dir2_dataptr_t		dapos,
247 	const struct xfs_name		*name,
248 	xfs_ino_t			ino,
249 	void				*priv)
250 {
251 	struct xrep_inode		*ri = priv;
252 	int				error = 0;
253 
254 	if (xchk_should_terminate(ri->sc, &error))
255 		return error;
256 
257 	if (ino != sc->sm->sm_ino)
258 		return 0;
259 
260 	/* Ignore garbage directory entry names. */
261 	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
262 		return -EFSCORRUPTED;
263 
264 	/* Don't pick up dot or dotdot entries; we only want child dirents. */
265 	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
266 	    xfs_dir2_samename(name, &xfs_name_dot))
267 		return 0;
268 
269 	/*
270 	 * Uhoh, more than one parent for this inode and they don't agree on
271 	 * the file type?
272 	 */
273 	if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
274 	    ri->alleged_ftype != name->type) {
275 		trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
276 				ri->alleged_ftype);
277 		return -EFSCORRUPTED;
278 	}
279 
280 	/* We found a potential parent; remember the ftype. */
281 	trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
282 	ri->alleged_ftype = name->type;
283 	return 0;
284 }
285 
286 /* Try to lock a directory, or wait a jiffy. */
287 static inline int
xrep_dinode_ilock_nowait(struct xfs_inode * dp,unsigned int lock_mode)288 xrep_dinode_ilock_nowait(
289 	struct xfs_inode	*dp,
290 	unsigned int		lock_mode)
291 {
292 	if (xfs_ilock_nowait(dp, lock_mode))
293 		return true;
294 
295 	schedule_timeout_killable(1);
296 	return false;
297 }
298 
299 /*
300  * Try to lock a directory to look for ftype hints.  Since we already hold the
301  * AGI buffer, we cannot block waiting for the ILOCK because rename can take
302  * the ILOCK and then try to lock AGIs.
303  */
304 STATIC int
xrep_dinode_trylock_directory(struct xrep_inode * ri,struct xfs_inode * dp,unsigned int * lock_modep)305 xrep_dinode_trylock_directory(
306 	struct xrep_inode	*ri,
307 	struct xfs_inode	*dp,
308 	unsigned int		*lock_modep)
309 {
310 	unsigned long		deadline = jiffies + msecs_to_jiffies(30000);
311 	unsigned int		lock_mode;
312 	int			error = 0;
313 
314 	do {
315 		if (xchk_should_terminate(ri->sc, &error))
316 			return error;
317 
318 		if (xfs_need_iread_extents(&dp->i_df))
319 			lock_mode = XFS_ILOCK_EXCL;
320 		else
321 			lock_mode = XFS_ILOCK_SHARED;
322 
323 		if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
324 			*lock_modep = lock_mode;
325 			return 0;
326 		}
327 	} while (!time_is_before_jiffies(deadline));
328 	return -EBUSY;
329 }
330 
331 /*
332  * If this is a directory, walk the dirents looking for any that point to the
333  * scrub target inode.
334  */
335 STATIC int
xrep_dinode_findmode_walk_directory(struct xrep_inode * ri,struct xfs_inode * dp)336 xrep_dinode_findmode_walk_directory(
337 	struct xrep_inode	*ri,
338 	struct xfs_inode	*dp)
339 {
340 	struct xfs_scrub	*sc = ri->sc;
341 	unsigned int		lock_mode;
342 	int			error = 0;
343 
344 	/* Ignore temporary repair directories. */
345 	if (xrep_is_tempfile(dp))
346 		return 0;
347 
348 	/*
349 	 * Scan the directory to see if there it contains an entry pointing to
350 	 * the directory that we are repairing.
351 	 */
352 	error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
353 	if (error)
354 		return error;
355 
356 	/*
357 	 * If this directory is known to be sick, we cannot scan it reliably
358 	 * and must abort.
359 	 */
360 	if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
361 				       XFS_SICK_INO_BMBTD |
362 				       XFS_SICK_INO_DIR)) {
363 		error = -EFSCORRUPTED;
364 		goto out_unlock;
365 	}
366 
367 	/*
368 	 * We cannot complete our parent pointer scan if a directory looks as
369 	 * though it has been zapped by the inode record repair code.
370 	 */
371 	if (xchk_dir_looks_zapped(dp)) {
372 		error = -EBUSY;
373 		goto out_unlock;
374 	}
375 
376 	error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
377 	if (error)
378 		goto out_unlock;
379 
380 out_unlock:
381 	xfs_iunlock(dp, lock_mode);
382 	return error;
383 }
384 
385 /*
386  * Try to find the mode of the inode being repaired by looking for directories
387  * that point down to this file.
388  */
389 STATIC int
xrep_dinode_find_mode(struct xrep_inode * ri,uint16_t * mode)390 xrep_dinode_find_mode(
391 	struct xrep_inode	*ri,
392 	uint16_t		*mode)
393 {
394 	struct xfs_scrub	*sc = ri->sc;
395 	struct xfs_inode	*dp;
396 	int			error;
397 
398 	/* No ftype means we have no other metadata to consult. */
399 	if (!xfs_has_ftype(sc->mp)) {
400 		*mode = S_IFREG;
401 		return 0;
402 	}
403 
404 	/*
405 	 * Scan all directories for parents that might point down to this
406 	 * inode.  Skip the inode being repaired during the scan since it
407 	 * cannot be its own parent.  Note that we still hold the AGI locked
408 	 * so there's a real possibility that _iscan_iter can return EBUSY.
409 	 */
410 	xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
411 	xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
412 	ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
413 	ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
414 	while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
415 		if (S_ISDIR(VFS_I(dp)->i_mode))
416 			error = xrep_dinode_findmode_walk_directory(ri, dp);
417 		xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
418 		xchk_irele(sc, dp);
419 		if (error < 0)
420 			break;
421 		if (xchk_should_terminate(sc, &error))
422 			break;
423 	}
424 	xchk_iscan_iter_finish(&ri->ftype_iscan);
425 	xchk_iscan_teardown(&ri->ftype_iscan);
426 
427 	if (error == -EBUSY) {
428 		if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
429 			/*
430 			 * If we got an EBUSY after finding at least one
431 			 * dirent, that means the scan found an inode on the
432 			 * inactivation list and could not open it.  Accept the
433 			 * alleged ftype and install a new mode below.
434 			 */
435 			error = 0;
436 		} else if (!(sc->flags & XCHK_TRY_HARDER)) {
437 			/*
438 			 * Otherwise, retry the operation one time to see if
439 			 * the reason for the delay is an inode from the same
440 			 * cluster buffer waiting on the inactivation list.
441 			 */
442 			error = -EDEADLOCK;
443 		}
444 	}
445 	if (error)
446 		return error;
447 
448 	/*
449 	 * Convert the discovered ftype into the file mode.  If all else fails,
450 	 * return S_IFREG.
451 	 */
452 	switch (ri->alleged_ftype) {
453 	case XFS_DIR3_FT_DIR:
454 		*mode = S_IFDIR;
455 		break;
456 	case XFS_DIR3_FT_WHT:
457 	case XFS_DIR3_FT_CHRDEV:
458 		*mode = S_IFCHR;
459 		break;
460 	case XFS_DIR3_FT_BLKDEV:
461 		*mode = S_IFBLK;
462 		break;
463 	case XFS_DIR3_FT_FIFO:
464 		*mode = S_IFIFO;
465 		break;
466 	case XFS_DIR3_FT_SOCK:
467 		*mode = S_IFSOCK;
468 		break;
469 	case XFS_DIR3_FT_SYMLINK:
470 		*mode = S_IFLNK;
471 		break;
472 	default:
473 		*mode = S_IFREG;
474 		break;
475 	}
476 	return 0;
477 }
478 
479 /* Turn di_mode into /something/ recognizable.  Returns true if we succeed. */
480 STATIC int
xrep_dinode_mode(struct xrep_inode * ri,struct xfs_dinode * dip)481 xrep_dinode_mode(
482 	struct xrep_inode	*ri,
483 	struct xfs_dinode	*dip)
484 {
485 	struct xfs_scrub	*sc = ri->sc;
486 	uint16_t		mode = be16_to_cpu(dip->di_mode);
487 	int			error;
488 
489 	trace_xrep_dinode_mode(sc, dip);
490 
491 	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
492 		return 0;
493 
494 	/* Try to fix the mode.  If we cannot, then leave everything alone. */
495 	error = xrep_dinode_find_mode(ri, &mode);
496 	switch (error) {
497 	case -EINTR:
498 	case -EBUSY:
499 	case -EDEADLOCK:
500 		/* temporary failure or fatal signal */
501 		return error;
502 	case 0:
503 		/* found mode */
504 		break;
505 	default:
506 		/* some other error, assume S_IFREG */
507 		mode = S_IFREG;
508 		break;
509 	}
510 
511 	/* bad mode, so we set it to a file that only root can read */
512 	dip->di_mode = cpu_to_be16(mode);
513 	dip->di_uid = 0;
514 	dip->di_gid = 0;
515 	ri->zap_acls = true;
516 	return 0;
517 }
518 
519 /* Fix unused link count fields having nonzero values. */
520 STATIC void
xrep_dinode_nlinks(struct xfs_dinode * dip)521 xrep_dinode_nlinks(
522 	struct xfs_dinode	*dip)
523 {
524 	if (dip->di_version < 2) {
525 		dip->di_nlink = 0;
526 		return;
527 	}
528 
529 	if (xfs_dinode_is_metadir(dip)) {
530 		if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
531 			dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
532 	} else {
533 		dip->di_metatype = 0;
534 	}
535 }
536 
537 /* Fix any conflicting flags that the verifiers complain about. */
538 STATIC void
xrep_dinode_flags(struct xfs_scrub * sc,struct xfs_dinode * dip,bool isrt)539 xrep_dinode_flags(
540 	struct xfs_scrub	*sc,
541 	struct xfs_dinode	*dip,
542 	bool			isrt)
543 {
544 	struct xfs_mount	*mp = sc->mp;
545 	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
546 	uint16_t		flags = be16_to_cpu(dip->di_flags);
547 	uint16_t		mode = be16_to_cpu(dip->di_mode);
548 
549 	trace_xrep_dinode_flags(sc, dip);
550 
551 	if (isrt)
552 		flags |= XFS_DIFLAG_REALTIME;
553 	else
554 		flags &= ~XFS_DIFLAG_REALTIME;
555 
556 	/*
557 	 * For regular files on a reflink filesystem, set the REFLINK flag to
558 	 * protect shared extents.  A later stage will actually check those
559 	 * extents and clear the flag if possible.
560 	 */
561 	if (xfs_has_reflink(mp) && S_ISREG(mode))
562 		flags2 |= XFS_DIFLAG2_REFLINK;
563 	else
564 		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
565 	if (flags & XFS_DIFLAG_REALTIME)
566 		flags2 &= ~XFS_DIFLAG2_REFLINK;
567 	if (!xfs_has_bigtime(mp))
568 		flags2 &= ~XFS_DIFLAG2_BIGTIME;
569 	if (!xfs_has_large_extent_counts(mp))
570 		flags2 &= ~XFS_DIFLAG2_NREXT64;
571 	if (flags2 & XFS_DIFLAG2_NREXT64)
572 		dip->di_nrext64_pad = 0;
573 	else if (dip->di_version >= 3)
574 		dip->di_v3_pad = 0;
575 
576 	if (flags2 & XFS_DIFLAG2_METADATA) {
577 		xfs_failaddr_t	fa;
578 
579 		fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
580 				flags2);
581 		if (fa)
582 			flags2 &= ~XFS_DIFLAG2_METADATA;
583 	}
584 
585 	dip->di_flags = cpu_to_be16(flags);
586 	dip->di_flags2 = cpu_to_be64(flags2);
587 }
588 
589 /*
590  * Blow out symlink; now it points nowhere.  We don't have to worry about
591  * incore state because this inode is failing the verifiers.
592  */
593 STATIC void
xrep_dinode_zap_symlink(struct xrep_inode * ri,struct xfs_dinode * dip)594 xrep_dinode_zap_symlink(
595 	struct xrep_inode	*ri,
596 	struct xfs_dinode	*dip)
597 {
598 	struct xfs_scrub	*sc = ri->sc;
599 	char			*p;
600 
601 	trace_xrep_dinode_zap_symlink(sc, dip);
602 
603 	dip->di_format = XFS_DINODE_FMT_LOCAL;
604 	dip->di_size = cpu_to_be64(1);
605 	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
606 	*p = '?';
607 	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
608 }
609 
610 /*
611  * Blow out dir, make the parent point to the root.  In the future repair will
612  * reconstruct this directory for us.  Note that there's no in-core directory
613  * inode because the sf verifier tripped, so we don't have to worry about the
614  * dentry cache.
615  */
616 STATIC void
xrep_dinode_zap_dir(struct xrep_inode * ri,struct xfs_dinode * dip)617 xrep_dinode_zap_dir(
618 	struct xrep_inode	*ri,
619 	struct xfs_dinode	*dip)
620 {
621 	struct xfs_scrub	*sc = ri->sc;
622 	struct xfs_mount	*mp = sc->mp;
623 	struct xfs_dir2_sf_hdr	*sfp;
624 	int			i8count;
625 
626 	trace_xrep_dinode_zap_dir(sc, dip);
627 
628 	dip->di_format = XFS_DINODE_FMT_LOCAL;
629 	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
630 	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
631 	sfp->count = 0;
632 	sfp->i8count = i8count;
633 	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
634 	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
635 	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
636 }
637 
638 /* Make sure we don't have a garbage file size. */
639 STATIC void
xrep_dinode_size(struct xrep_inode * ri,struct xfs_dinode * dip)640 xrep_dinode_size(
641 	struct xrep_inode	*ri,
642 	struct xfs_dinode	*dip)
643 {
644 	struct xfs_scrub	*sc = ri->sc;
645 	uint64_t		size = be64_to_cpu(dip->di_size);
646 	uint16_t		mode = be16_to_cpu(dip->di_mode);
647 
648 	trace_xrep_dinode_size(sc, dip);
649 
650 	switch (mode & S_IFMT) {
651 	case S_IFIFO:
652 	case S_IFCHR:
653 	case S_IFBLK:
654 	case S_IFSOCK:
655 		/* di_size can't be nonzero for special files */
656 		dip->di_size = 0;
657 		break;
658 	case S_IFREG:
659 		/* Regular files can't be larger than 2^63-1 bytes. */
660 		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
661 		break;
662 	case S_IFLNK:
663 		/*
664 		 * Truncate ridiculously oversized symlinks.  If the size is
665 		 * zero, reset it to point to the current directory.  Both of
666 		 * these conditions trigger dinode verifier errors, so there
667 		 * is no in-core state to reset.
668 		 */
669 		if (size > XFS_SYMLINK_MAXLEN)
670 			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
671 		else if (size == 0)
672 			xrep_dinode_zap_symlink(ri, dip);
673 		break;
674 	case S_IFDIR:
675 		/*
676 		 * Directories can't have a size larger than 32G.  If the size
677 		 * is zero, reset it to an empty directory.  Both of these
678 		 * conditions trigger dinode verifier errors, so there is no
679 		 * in-core state to reset.
680 		 */
681 		if (size > XFS_DIR2_SPACE_SIZE)
682 			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
683 		else if (size == 0)
684 			xrep_dinode_zap_dir(ri, dip);
685 		break;
686 	}
687 }
688 
689 /* Fix extent size hints. */
690 STATIC void
xrep_dinode_extsize_hints(struct xfs_scrub * sc,struct xfs_dinode * dip)691 xrep_dinode_extsize_hints(
692 	struct xfs_scrub	*sc,
693 	struct xfs_dinode	*dip)
694 {
695 	struct xfs_mount	*mp = sc->mp;
696 	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
697 	uint16_t		flags = be16_to_cpu(dip->di_flags);
698 	uint16_t		mode = be16_to_cpu(dip->di_mode);
699 
700 	xfs_failaddr_t		fa;
701 
702 	trace_xrep_dinode_extsize_hints(sc, dip);
703 
704 	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
705 			mode, flags);
706 	if (fa) {
707 		dip->di_extsize = 0;
708 		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
709 					      XFS_DIFLAG_EXTSZINHERIT);
710 	}
711 
712 	if (dip->di_version < 3)
713 		return;
714 
715 	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
716 			mode, flags, flags2);
717 	if (fa) {
718 		dip->di_cowextsize = 0;
719 		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
720 	}
721 }
722 
723 /* Count extents and blocks for an inode given an rmap. */
724 STATIC int
xrep_dinode_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)725 xrep_dinode_walk_rmap(
726 	struct xfs_btree_cur		*cur,
727 	const struct xfs_rmap_irec	*rec,
728 	void				*priv)
729 {
730 	struct xrep_inode		*ri = priv;
731 	int				error = 0;
732 
733 	if (xchk_should_terminate(ri->sc, &error))
734 		return error;
735 
736 	/* We only care about this inode. */
737 	if (rec->rm_owner != ri->sc->sm->sm_ino)
738 		return 0;
739 
740 	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
741 		ri->attr_blocks += rec->rm_blockcount;
742 		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
743 			ri->attr_extents++;
744 
745 		return 0;
746 	}
747 
748 	ri->data_blocks += rec->rm_blockcount;
749 	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
750 		ri->data_extents++;
751 
752 	return 0;
753 }
754 
755 /* Count extents and blocks for an inode from all AG rmap data. */
756 STATIC int
xrep_dinode_count_ag_rmaps(struct xrep_inode * ri,struct xfs_perag * pag)757 xrep_dinode_count_ag_rmaps(
758 	struct xrep_inode	*ri,
759 	struct xfs_perag	*pag)
760 {
761 	struct xfs_btree_cur	*cur;
762 	struct xfs_buf		*agf;
763 	int			error;
764 
765 	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
766 	if (error)
767 		return error;
768 
769 	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
770 	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
771 	xfs_btree_del_cursor(cur, error);
772 	xfs_trans_brelse(ri->sc->tp, agf);
773 	return error;
774 }
775 
776 /* Count extents and blocks for a given inode from all rmap data. */
777 STATIC int
xrep_dinode_count_rmaps(struct xrep_inode * ri)778 xrep_dinode_count_rmaps(
779 	struct xrep_inode	*ri)
780 {
781 	struct xfs_perag	*pag = NULL;
782 	int			error;
783 
784 	if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
785 		return -EOPNOTSUPP;
786 
787 	while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
788 		error = xrep_dinode_count_ag_rmaps(ri, pag);
789 		if (error) {
790 			xfs_perag_rele(pag);
791 			return error;
792 		}
793 	}
794 
795 	/* Can't have extents on both the rt and the data device. */
796 	if (ri->data_extents && ri->rt_extents)
797 		return -EFSCORRUPTED;
798 
799 	trace_xrep_dinode_count_rmaps(ri->sc,
800 			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
801 			ri->data_extents, ri->rt_extents, ri->attr_extents);
802 	return 0;
803 }
804 
805 /* Return true if this extents-format ifork looks like garbage. */
806 STATIC bool
xrep_dinode_bad_extents_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)807 xrep_dinode_bad_extents_fork(
808 	struct xfs_scrub	*sc,
809 	struct xfs_dinode	*dip,
810 	unsigned int		dfork_size,
811 	int			whichfork)
812 {
813 	struct xfs_bmbt_irec	new;
814 	struct xfs_bmbt_rec	*dp;
815 	xfs_extnum_t		nex;
816 	bool			isrt;
817 	unsigned int		i;
818 
819 	nex = xfs_dfork_nextents(dip, whichfork);
820 	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
821 		return true;
822 
823 	dp = XFS_DFORK_PTR(dip, whichfork);
824 
825 	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
826 	for (i = 0; i < nex; i++, dp++) {
827 		xfs_failaddr_t	fa;
828 
829 		xfs_bmbt_disk_get_all(dp, &new);
830 		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
831 				&new);
832 		if (fa)
833 			return true;
834 	}
835 
836 	return false;
837 }
838 
839 /* Return true if this btree-format ifork looks like garbage. */
840 STATIC bool
xrep_dinode_bad_bmbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)841 xrep_dinode_bad_bmbt_fork(
842 	struct xfs_scrub	*sc,
843 	struct xfs_dinode	*dip,
844 	unsigned int		dfork_size,
845 	int			whichfork)
846 {
847 	struct xfs_bmdr_block	*dfp;
848 	xfs_extnum_t		nex;
849 	unsigned int		i;
850 	unsigned int		dmxr;
851 	unsigned int		nrecs;
852 	unsigned int		level;
853 
854 	nex = xfs_dfork_nextents(dip, whichfork);
855 	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
856 		return true;
857 
858 	if (dfork_size < sizeof(struct xfs_bmdr_block))
859 		return true;
860 
861 	dfp = XFS_DFORK_PTR(dip, whichfork);
862 	nrecs = be16_to_cpu(dfp->bb_numrecs);
863 	level = be16_to_cpu(dfp->bb_level);
864 
865 	if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
866 		return true;
867 	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
868 		return true;
869 
870 	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
871 	for (i = 1; i <= nrecs; i++) {
872 		struct xfs_bmbt_key	*fkp;
873 		xfs_bmbt_ptr_t		*fpp;
874 		xfs_fileoff_t		fileoff;
875 		xfs_fsblock_t		fsbno;
876 
877 		fkp = xfs_bmdr_key_addr(dfp, i);
878 		fileoff = be64_to_cpu(fkp->br_startoff);
879 		if (!xfs_verify_fileoff(sc->mp, fileoff))
880 			return true;
881 
882 		fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
883 		fsbno = be64_to_cpu(*fpp);
884 		if (!xfs_verify_fsbno(sc->mp, fsbno))
885 			return true;
886 	}
887 
888 	return false;
889 }
890 
891 /*
892  * Check the data fork for things that will fail the ifork verifiers or the
893  * ifork formatters.
894  */
895 STATIC bool
xrep_dinode_check_dfork(struct xfs_scrub * sc,struct xfs_dinode * dip,uint16_t mode)896 xrep_dinode_check_dfork(
897 	struct xfs_scrub	*sc,
898 	struct xfs_dinode	*dip,
899 	uint16_t		mode)
900 {
901 	void			*dfork_ptr;
902 	int64_t			data_size;
903 	unsigned int		fmt;
904 	unsigned int		dfork_size;
905 
906 	/*
907 	 * Verifier functions take signed int64_t, so check for bogus negative
908 	 * values first.
909 	 */
910 	data_size = be64_to_cpu(dip->di_size);
911 	if (data_size < 0)
912 		return true;
913 
914 	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
915 	switch (mode & S_IFMT) {
916 	case S_IFIFO:
917 	case S_IFCHR:
918 	case S_IFBLK:
919 	case S_IFSOCK:
920 		if (fmt != XFS_DINODE_FMT_DEV)
921 			return true;
922 		break;
923 	case S_IFREG:
924 		if (fmt == XFS_DINODE_FMT_LOCAL)
925 			return true;
926 		fallthrough;
927 	case S_IFLNK:
928 	case S_IFDIR:
929 		switch (fmt) {
930 		case XFS_DINODE_FMT_LOCAL:
931 		case XFS_DINODE_FMT_EXTENTS:
932 		case XFS_DINODE_FMT_BTREE:
933 			break;
934 		default:
935 			return true;
936 		}
937 		break;
938 	default:
939 		return true;
940 	}
941 
942 	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
943 	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
944 
945 	switch (fmt) {
946 	case XFS_DINODE_FMT_DEV:
947 		break;
948 	case XFS_DINODE_FMT_LOCAL:
949 		/* dir/symlink structure cannot be larger than the fork */
950 		if (data_size > dfork_size)
951 			return true;
952 		/* directory structure must pass verification. */
953 		if (S_ISDIR(mode) &&
954 		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
955 			return true;
956 		/* symlink structure must pass verification. */
957 		if (S_ISLNK(mode) &&
958 		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
959 			return true;
960 		break;
961 	case XFS_DINODE_FMT_EXTENTS:
962 		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
963 				XFS_DATA_FORK))
964 			return true;
965 		break;
966 	case XFS_DINODE_FMT_BTREE:
967 		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
968 				XFS_DATA_FORK))
969 			return true;
970 		break;
971 	default:
972 		return true;
973 	}
974 
975 	return false;
976 }
977 
978 static void
xrep_dinode_set_data_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)979 xrep_dinode_set_data_nextents(
980 	struct xfs_dinode	*dip,
981 	xfs_extnum_t		nextents)
982 {
983 	if (xfs_dinode_has_large_extent_counts(dip))
984 		dip->di_big_nextents = cpu_to_be64(nextents);
985 	else
986 		dip->di_nextents = cpu_to_be32(nextents);
987 }
988 
989 static void
xrep_dinode_set_attr_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)990 xrep_dinode_set_attr_nextents(
991 	struct xfs_dinode	*dip,
992 	xfs_extnum_t		nextents)
993 {
994 	if (xfs_dinode_has_large_extent_counts(dip))
995 		dip->di_big_anextents = cpu_to_be32(nextents);
996 	else
997 		dip->di_anextents = cpu_to_be16(nextents);
998 }
999 
1000 /* Reset the data fork to something sane. */
1001 STATIC void
xrep_dinode_zap_dfork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1002 xrep_dinode_zap_dfork(
1003 	struct xrep_inode	*ri,
1004 	struct xfs_dinode	*dip,
1005 	uint16_t		mode)
1006 {
1007 	struct xfs_scrub	*sc = ri->sc;
1008 
1009 	trace_xrep_dinode_zap_dfork(sc, dip);
1010 
1011 	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
1012 
1013 	xrep_dinode_set_data_nextents(dip, 0);
1014 	ri->data_blocks = 0;
1015 	ri->rt_blocks = 0;
1016 
1017 	/* Special files always get reset to DEV */
1018 	switch (mode & S_IFMT) {
1019 	case S_IFIFO:
1020 	case S_IFCHR:
1021 	case S_IFBLK:
1022 	case S_IFSOCK:
1023 		dip->di_format = XFS_DINODE_FMT_DEV;
1024 		dip->di_size = 0;
1025 		return;
1026 	}
1027 
1028 	/*
1029 	 * If we have data extents, reset to an empty map and hope the user
1030 	 * will run the bmapbtd checker next.
1031 	 */
1032 	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
1033 		dip->di_format = XFS_DINODE_FMT_EXTENTS;
1034 		return;
1035 	}
1036 
1037 	/* Otherwise, reset the local format to the minimum. */
1038 	switch (mode & S_IFMT) {
1039 	case S_IFLNK:
1040 		xrep_dinode_zap_symlink(ri, dip);
1041 		break;
1042 	case S_IFDIR:
1043 		xrep_dinode_zap_dir(ri, dip);
1044 		break;
1045 	}
1046 }
1047 
1048 /*
1049  * Check the attr fork for things that will fail the ifork verifiers or the
1050  * ifork formatters.
1051  */
1052 STATIC bool
xrep_dinode_check_afork(struct xfs_scrub * sc,struct xfs_dinode * dip)1053 xrep_dinode_check_afork(
1054 	struct xfs_scrub		*sc,
1055 	struct xfs_dinode		*dip)
1056 {
1057 	struct xfs_attr_sf_hdr		*afork_ptr;
1058 	size_t				attr_size;
1059 	unsigned int			afork_size;
1060 
1061 	if (XFS_DFORK_BOFF(dip) == 0)
1062 		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
1063 		       xfs_dfork_attr_extents(dip) != 0;
1064 
1065 	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1066 	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1067 
1068 	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
1069 	case XFS_DINODE_FMT_LOCAL:
1070 		/* Fork has to be large enough to extract the xattr size. */
1071 		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
1072 			return true;
1073 
1074 		/* xattr structure cannot be larger than the fork */
1075 		attr_size = be16_to_cpu(afork_ptr->totsize);
1076 		if (attr_size > afork_size)
1077 			return true;
1078 
1079 		/* xattr structure must pass verification. */
1080 		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1081 	case XFS_DINODE_FMT_EXTENTS:
1082 		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1083 					XFS_ATTR_FORK))
1084 			return true;
1085 		break;
1086 	case XFS_DINODE_FMT_BTREE:
1087 		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1088 					XFS_ATTR_FORK))
1089 			return true;
1090 		break;
1091 	default:
1092 		return true;
1093 	}
1094 
1095 	return false;
1096 }
1097 
1098 /*
1099  * Reset the attr fork to empty.  Since the attr fork could have contained
1100  * ACLs, make the file readable only by root.
1101  */
1102 STATIC void
xrep_dinode_zap_afork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1103 xrep_dinode_zap_afork(
1104 	struct xrep_inode	*ri,
1105 	struct xfs_dinode	*dip,
1106 	uint16_t		mode)
1107 {
1108 	struct xfs_scrub	*sc = ri->sc;
1109 
1110 	trace_xrep_dinode_zap_afork(sc, dip);
1111 
1112 	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1113 
1114 	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1115 	xrep_dinode_set_attr_nextents(dip, 0);
1116 	ri->attr_blocks = 0;
1117 
1118 	/*
1119 	 * If the data fork is in btree format, removing the attr fork entirely
1120 	 * might cause verifier failures if the next level down in the bmbt
1121 	 * could now fit in the data fork area.
1122 	 */
1123 	if (dip->di_format != XFS_DINODE_FMT_BTREE)
1124 		dip->di_forkoff = 0;
1125 	dip->di_mode = cpu_to_be16(mode & ~0777);
1126 	dip->di_uid = 0;
1127 	dip->di_gid = 0;
1128 }
1129 
1130 /* Make sure the fork offset is a sensible value. */
1131 STATIC void
xrep_dinode_ensure_forkoff(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1132 xrep_dinode_ensure_forkoff(
1133 	struct xrep_inode	*ri,
1134 	struct xfs_dinode	*dip,
1135 	uint16_t		mode)
1136 {
1137 	struct xfs_bmdr_block	*bmdr;
1138 	struct xfs_scrub	*sc = ri->sc;
1139 	xfs_extnum_t		attr_extents, data_extents;
1140 	size_t			bmdr_minsz = xfs_bmdr_space_calc(1);
1141 	unsigned int		lit_sz = XFS_LITINO(sc->mp);
1142 	unsigned int		afork_min, dfork_min;
1143 
1144 	trace_xrep_dinode_ensure_forkoff(sc, dip);
1145 
1146 	/*
1147 	 * Before calling this function, xrep_dinode_core ensured that both
1148 	 * forks actually fit inside their respective literal areas.  If this
1149 	 * was not the case, the fork was reset to FMT_EXTENTS with zero
1150 	 * records.  If the rmapbt scan found attr or data fork blocks, this
1151 	 * will be noted in the dinode_stats, and we must leave enough room
1152 	 * for the bmap repair code to reconstruct the mapping structure.
1153 	 *
1154 	 * First, compute the minimum space required for the attr fork.
1155 	 */
1156 	switch (dip->di_aformat) {
1157 	case XFS_DINODE_FMT_LOCAL:
1158 		/*
1159 		 * If we still have a shortform xattr structure at all, that
1160 		 * means the attr fork area was exactly large enough to fit
1161 		 * the sf structure.
1162 		 */
1163 		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1164 		break;
1165 	case XFS_DINODE_FMT_EXTENTS:
1166 		attr_extents = xfs_dfork_attr_extents(dip);
1167 		if (attr_extents) {
1168 			/*
1169 			 * We must maintain sufficient space to hold the entire
1170 			 * extent map array in the data fork.  Note that we
1171 			 * previously zapped the fork if it had no chance of
1172 			 * fitting in the inode.
1173 			 */
1174 			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1175 		} else if (ri->attr_extents > 0) {
1176 			/*
1177 			 * The attr fork thinks it has zero extents, but we
1178 			 * found some xattr extents.  We need to leave enough
1179 			 * empty space here so that the incore attr fork will
1180 			 * get created (and hence trigger the attr fork bmap
1181 			 * repairer).
1182 			 */
1183 			afork_min = bmdr_minsz;
1184 		} else {
1185 			/* No extents on disk or found in rmapbt. */
1186 			afork_min = 0;
1187 		}
1188 		break;
1189 	case XFS_DINODE_FMT_BTREE:
1190 		/* Must have space for btree header and key/pointers. */
1191 		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1192 		afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1193 		break;
1194 	default:
1195 		/* We should never see any other formats. */
1196 		afork_min = 0;
1197 		break;
1198 	}
1199 
1200 	/* Compute the minimum space required for the data fork. */
1201 	switch (dip->di_format) {
1202 	case XFS_DINODE_FMT_DEV:
1203 		dfork_min = sizeof(__be32);
1204 		break;
1205 	case XFS_DINODE_FMT_UUID:
1206 		dfork_min = sizeof(uuid_t);
1207 		break;
1208 	case XFS_DINODE_FMT_LOCAL:
1209 		/*
1210 		 * If we still have a shortform data fork at all, that means
1211 		 * the data fork area was large enough to fit whatever was in
1212 		 * there.
1213 		 */
1214 		dfork_min = be64_to_cpu(dip->di_size);
1215 		break;
1216 	case XFS_DINODE_FMT_EXTENTS:
1217 		data_extents = xfs_dfork_data_extents(dip);
1218 		if (data_extents) {
1219 			/*
1220 			 * We must maintain sufficient space to hold the entire
1221 			 * extent map array in the data fork.  Note that we
1222 			 * previously zapped the fork if it had no chance of
1223 			 * fitting in the inode.
1224 			 */
1225 			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1226 		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1227 			/*
1228 			 * The data fork thinks it has zero extents, but we
1229 			 * found some data extents.  We need to leave enough
1230 			 * empty space here so that the data fork bmap repair
1231 			 * will recover the mappings.
1232 			 */
1233 			dfork_min = bmdr_minsz;
1234 		} else {
1235 			/* No extents on disk or found in rmapbt. */
1236 			dfork_min = 0;
1237 		}
1238 		break;
1239 	case XFS_DINODE_FMT_BTREE:
1240 		/* Must have space for btree header and key/pointers. */
1241 		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1242 		dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1243 		break;
1244 	default:
1245 		dfork_min = 0;
1246 		break;
1247 	}
1248 
1249 	/*
1250 	 * Round all values up to the nearest 8 bytes, because that is the
1251 	 * precision of di_forkoff.
1252 	 */
1253 	afork_min = roundup(afork_min, 8);
1254 	dfork_min = roundup(dfork_min, 8);
1255 	bmdr_minsz = roundup(bmdr_minsz, 8);
1256 
1257 	ASSERT(dfork_min <= lit_sz);
1258 	ASSERT(afork_min <= lit_sz);
1259 
1260 	/*
1261 	 * If the data fork was zapped and we don't have enough space for the
1262 	 * recovery fork, move the attr fork up.
1263 	 */
1264 	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1265 	    xfs_dfork_data_extents(dip) == 0 &&
1266 	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
1267 	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1268 		if (bmdr_minsz + afork_min > lit_sz) {
1269 			/*
1270 			 * The attr for and the stub fork we need to recover
1271 			 * the data fork won't both fit.  Zap the attr fork.
1272 			 */
1273 			xrep_dinode_zap_afork(ri, dip, mode);
1274 			afork_min = bmdr_minsz;
1275 		} else {
1276 			void	*before, *after;
1277 
1278 			/* Otherwise, just slide the attr fork up. */
1279 			before = XFS_DFORK_APTR(dip);
1280 			dip->di_forkoff = bmdr_minsz >> 3;
1281 			after = XFS_DFORK_APTR(dip);
1282 			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1283 		}
1284 	}
1285 
1286 	/*
1287 	 * If the attr fork was zapped and we don't have enough space for the
1288 	 * recovery fork, move the attr fork down.
1289 	 */
1290 	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1291 	    xfs_dfork_attr_extents(dip) == 0 &&
1292 	    ri->attr_extents > 0 &&
1293 	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1294 		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1295 			/*
1296 			 * If the data fork is in btree format then we can't
1297 			 * adjust forkoff because that runs the risk of
1298 			 * violating the extents/btree format transition rules.
1299 			 */
1300 		} else if (bmdr_minsz + dfork_min > lit_sz) {
1301 			/*
1302 			 * If we can't move the attr fork, too bad, we lose the
1303 			 * attr fork and leak its blocks.
1304 			 */
1305 			xrep_dinode_zap_afork(ri, dip, mode);
1306 		} else {
1307 			/*
1308 			 * Otherwise, just slide the attr fork down.  The attr
1309 			 * fork is empty, so we don't have any old contents to
1310 			 * move here.
1311 			 */
1312 			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1313 		}
1314 	}
1315 }
1316 
1317 /*
1318  * Zap the data/attr forks if we spot anything that isn't going to pass the
1319  * ifork verifiers or the ifork formatters, because we need to get the inode
1320  * into good enough shape that the higher level repair functions can run.
1321  */
1322 STATIC void
xrep_dinode_zap_forks(struct xrep_inode * ri,struct xfs_dinode * dip)1323 xrep_dinode_zap_forks(
1324 	struct xrep_inode	*ri,
1325 	struct xfs_dinode	*dip)
1326 {
1327 	struct xfs_scrub	*sc = ri->sc;
1328 	xfs_extnum_t		data_extents;
1329 	xfs_extnum_t		attr_extents;
1330 	xfs_filblks_t		nblocks;
1331 	uint16_t		mode;
1332 	bool			zap_datafork = false;
1333 	bool			zap_attrfork = ri->zap_acls;
1334 
1335 	trace_xrep_dinode_zap_forks(sc, dip);
1336 
1337 	mode = be16_to_cpu(dip->di_mode);
1338 
1339 	data_extents = xfs_dfork_data_extents(dip);
1340 	attr_extents = xfs_dfork_attr_extents(dip);
1341 	nblocks = be64_to_cpu(dip->di_nblocks);
1342 
1343 	/* Inode counters don't make sense? */
1344 	if (data_extents > nblocks)
1345 		zap_datafork = true;
1346 	if (attr_extents > nblocks)
1347 		zap_attrfork = true;
1348 	if (data_extents + attr_extents > nblocks)
1349 		zap_datafork = zap_attrfork = true;
1350 
1351 	if (!zap_datafork)
1352 		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1353 	if (!zap_attrfork)
1354 		zap_attrfork = xrep_dinode_check_afork(sc, dip);
1355 
1356 	/* Zap whatever's bad. */
1357 	if (zap_attrfork)
1358 		xrep_dinode_zap_afork(ri, dip, mode);
1359 	if (zap_datafork)
1360 		xrep_dinode_zap_dfork(ri, dip, mode);
1361 	xrep_dinode_ensure_forkoff(ri, dip, mode);
1362 
1363 	/*
1364 	 * Zero di_nblocks if we don't have any extents at all to satisfy the
1365 	 * buffer verifier.
1366 	 */
1367 	data_extents = xfs_dfork_data_extents(dip);
1368 	attr_extents = xfs_dfork_attr_extents(dip);
1369 	if (data_extents + attr_extents == 0)
1370 		dip->di_nblocks = 0;
1371 }
1372 
1373 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1374 STATIC int
xrep_dinode_core(struct xrep_inode * ri)1375 xrep_dinode_core(
1376 	struct xrep_inode	*ri)
1377 {
1378 	struct xfs_scrub	*sc = ri->sc;
1379 	struct xfs_buf		*bp;
1380 	struct xfs_dinode	*dip;
1381 	xfs_ino_t		ino = sc->sm->sm_ino;
1382 	int			error;
1383 	int			iget_error;
1384 
1385 	/* Figure out what this inode had mapped in both forks. */
1386 	error = xrep_dinode_count_rmaps(ri);
1387 	if (error)
1388 		return error;
1389 
1390 	/* Read the inode cluster buffer. */
1391 	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1392 			ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1393 			NULL);
1394 	if (error)
1395 		return error;
1396 
1397 	/* Make sure we can pass the inode buffer verifier. */
1398 	xrep_dinode_buf(sc, bp);
1399 	bp->b_ops = &xfs_inode_buf_ops;
1400 
1401 	/* Fix everything the verifier will complain about. */
1402 	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1403 	xrep_dinode_header(sc, dip);
1404 	iget_error = xrep_dinode_mode(ri, dip);
1405 	if (iget_error)
1406 		goto write;
1407 	xrep_dinode_nlinks(dip);
1408 	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1409 	xrep_dinode_size(ri, dip);
1410 	xrep_dinode_extsize_hints(sc, dip);
1411 	xrep_dinode_zap_forks(ri, dip);
1412 
1413 write:
1414 	/* Write out the inode. */
1415 	trace_xrep_dinode_fixed(sc, dip);
1416 	xfs_dinode_calc_crc(sc->mp, dip);
1417 	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1418 	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1419 			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1420 
1421 	/*
1422 	 * In theory, we've fixed the ondisk inode record enough that we should
1423 	 * be able to load the inode into the cache.  Try to iget that inode
1424 	 * now while we hold the AGI and the inode cluster buffer and take the
1425 	 * IOLOCK so that we can continue with repairs without anyone else
1426 	 * accessing the inode.  If iget fails, we still need to commit the
1427 	 * changes.
1428 	 */
1429 	if (!iget_error)
1430 		iget_error = xchk_iget(sc, ino, &sc->ip);
1431 	if (!iget_error)
1432 		xchk_ilock(sc, XFS_IOLOCK_EXCL);
1433 
1434 	/*
1435 	 * Commit the inode cluster buffer updates and drop the AGI buffer that
1436 	 * we've been holding since scrub setup.  From here on out, repairs
1437 	 * deal only with the cached inode.
1438 	 */
1439 	error = xrep_trans_commit(sc);
1440 	if (error)
1441 		return error;
1442 
1443 	if (iget_error)
1444 		return iget_error;
1445 
1446 	error = xchk_trans_alloc(sc, 0);
1447 	if (error)
1448 		return error;
1449 
1450 	error = xrep_ino_dqattach(sc);
1451 	if (error)
1452 		return error;
1453 
1454 	xchk_ilock(sc, XFS_ILOCK_EXCL);
1455 	if (ri->ino_sick_mask)
1456 		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1457 	return 0;
1458 }
1459 
1460 /* Fix everything xfs_dinode_verify cares about. */
1461 STATIC int
xrep_dinode_problems(struct xrep_inode * ri)1462 xrep_dinode_problems(
1463 	struct xrep_inode	*ri)
1464 {
1465 	struct xfs_scrub	*sc = ri->sc;
1466 	int			error;
1467 
1468 	error = xrep_dinode_core(ri);
1469 	if (error)
1470 		return error;
1471 
1472 	/* We had to fix a totally busted inode, schedule quotacheck. */
1473 	if (XFS_IS_UQUOTA_ON(sc->mp))
1474 		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1475 	if (XFS_IS_GQUOTA_ON(sc->mp))
1476 		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1477 	if (XFS_IS_PQUOTA_ON(sc->mp))
1478 		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1479 
1480 	return 0;
1481 }
1482 
1483 /*
1484  * Fix problems that the verifiers don't care about.  In general these are
1485  * errors that don't cause problems elsewhere in the kernel that we can easily
1486  * detect, so we don't check them all that rigorously.
1487  */
1488 
1489 /* Make sure block and extent counts are ok. */
1490 STATIC int
xrep_inode_blockcounts(struct xfs_scrub * sc)1491 xrep_inode_blockcounts(
1492 	struct xfs_scrub	*sc)
1493 {
1494 	struct xfs_ifork	*ifp;
1495 	xfs_filblks_t		count;
1496 	xfs_filblks_t		acount;
1497 	xfs_extnum_t		nextents;
1498 	int			error;
1499 
1500 	trace_xrep_inode_blockcounts(sc);
1501 
1502 	/* Set data fork counters from the data fork mappings. */
1503 	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1504 			&nextents, &count);
1505 	if (error)
1506 		return error;
1507 	if (xfs_is_reflink_inode(sc->ip)) {
1508 		/*
1509 		 * data fork blockcount can exceed physical storage if a user
1510 		 * reflinks the same block over and over again.
1511 		 */
1512 		;
1513 	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1514 		if (count >= sc->mp->m_sb.sb_rblocks)
1515 			return -EFSCORRUPTED;
1516 	} else {
1517 		if (count >= sc->mp->m_sb.sb_dblocks)
1518 			return -EFSCORRUPTED;
1519 	}
1520 	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1521 	if (error)
1522 		return error;
1523 	sc->ip->i_df.if_nextents = nextents;
1524 
1525 	/* Set attr fork counters from the attr fork mappings. */
1526 	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1527 	if (ifp) {
1528 		error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1529 				&nextents, &acount);
1530 		if (error)
1531 			return error;
1532 		if (count >= sc->mp->m_sb.sb_dblocks)
1533 			return -EFSCORRUPTED;
1534 		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1535 				nextents);
1536 		if (error)
1537 			return error;
1538 		ifp->if_nextents = nextents;
1539 	} else {
1540 		acount = 0;
1541 	}
1542 
1543 	sc->ip->i_nblocks = count + acount;
1544 	return 0;
1545 }
1546 
1547 /* Check for invalid uid/gid/prid. */
1548 STATIC void
xrep_inode_ids(struct xfs_scrub * sc)1549 xrep_inode_ids(
1550 	struct xfs_scrub	*sc)
1551 {
1552 	bool			dirty = false;
1553 
1554 	trace_xrep_inode_ids(sc);
1555 
1556 	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1557 		i_uid_write(VFS_I(sc->ip), 0);
1558 		dirty = true;
1559 		if (XFS_IS_UQUOTA_ON(sc->mp))
1560 			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1561 	}
1562 
1563 	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1564 		i_gid_write(VFS_I(sc->ip), 0);
1565 		dirty = true;
1566 		if (XFS_IS_GQUOTA_ON(sc->mp))
1567 			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1568 	}
1569 
1570 	if (sc->ip->i_projid == -1U) {
1571 		sc->ip->i_projid = 0;
1572 		dirty = true;
1573 		if (XFS_IS_PQUOTA_ON(sc->mp))
1574 			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1575 	}
1576 
1577 	/* strip setuid/setgid if we touched any of the ids */
1578 	if (dirty)
1579 		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1580 }
1581 
1582 static inline void
xrep_clamp_timestamp(struct xfs_inode * ip,struct timespec64 * ts)1583 xrep_clamp_timestamp(
1584 	struct xfs_inode	*ip,
1585 	struct timespec64	*ts)
1586 {
1587 	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1588 	*ts = timestamp_truncate(*ts, VFS_I(ip));
1589 }
1590 
1591 /* Nanosecond counters can't have more than 1 billion. */
1592 STATIC void
xrep_inode_timestamps(struct xfs_inode * ip)1593 xrep_inode_timestamps(
1594 	struct xfs_inode	*ip)
1595 {
1596 	struct timespec64	tstamp;
1597 	struct inode		*inode = VFS_I(ip);
1598 
1599 	tstamp = inode_get_atime(inode);
1600 	xrep_clamp_timestamp(ip, &tstamp);
1601 	inode_set_atime_to_ts(inode, tstamp);
1602 
1603 	tstamp = inode_get_mtime(inode);
1604 	xrep_clamp_timestamp(ip, &tstamp);
1605 	inode_set_mtime_to_ts(inode, tstamp);
1606 
1607 	tstamp = inode_get_ctime(inode);
1608 	xrep_clamp_timestamp(ip, &tstamp);
1609 	inode_set_ctime_to_ts(inode, tstamp);
1610 
1611 	xrep_clamp_timestamp(ip, &ip->i_crtime);
1612 }
1613 
1614 /* Fix inode flags that don't make sense together. */
1615 STATIC void
xrep_inode_flags(struct xfs_scrub * sc)1616 xrep_inode_flags(
1617 	struct xfs_scrub	*sc)
1618 {
1619 	uint16_t		mode;
1620 
1621 	trace_xrep_inode_flags(sc);
1622 
1623 	mode = VFS_I(sc->ip)->i_mode;
1624 
1625 	/* Clear junk flags */
1626 	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1627 		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1628 
1629 	/* NEWRTBM only applies to realtime bitmaps */
1630 	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1631 		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1632 	else
1633 		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1634 
1635 	/* These only make sense for directories. */
1636 	if (!S_ISDIR(mode))
1637 		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1638 					  XFS_DIFLAG_EXTSZINHERIT |
1639 					  XFS_DIFLAG_PROJINHERIT |
1640 					  XFS_DIFLAG_NOSYMLINKS);
1641 
1642 	/* These only make sense for files. */
1643 	if (!S_ISREG(mode))
1644 		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1645 					  XFS_DIFLAG_EXTSIZE);
1646 
1647 	/* These only make sense for non-rt files. */
1648 	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1649 		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1650 
1651 	/* Immutable and append only?  Drop the append. */
1652 	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1653 	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1654 		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1655 
1656 	/* Clear junk flags. */
1657 	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1658 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1659 
1660 	/* No reflink flag unless we support it and it's a file. */
1661 	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1662 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1663 
1664 	/* DAX only applies to files and dirs. */
1665 	if (!(S_ISREG(mode) || S_ISDIR(mode)))
1666 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1667 
1668 	/* No reflink files on the realtime device. */
1669 	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1670 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1671 }
1672 
1673 /*
1674  * Fix size problems with block/node format directories.  If we fail to find
1675  * the extent list, just bail out and let the bmapbtd repair functions clean
1676  * up that mess.
1677  */
1678 STATIC void
xrep_inode_blockdir_size(struct xfs_scrub * sc)1679 xrep_inode_blockdir_size(
1680 	struct xfs_scrub	*sc)
1681 {
1682 	struct xfs_iext_cursor	icur;
1683 	struct xfs_bmbt_irec	got;
1684 	struct xfs_ifork	*ifp;
1685 	xfs_fileoff_t		off;
1686 	int			error;
1687 
1688 	trace_xrep_inode_blockdir_size(sc);
1689 
1690 	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1691 	if (error)
1692 		return;
1693 
1694 	/* Find the last block before 32G; this is the dir size. */
1695 	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1696 	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1697 	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1698 		/* zero-extents directory? */
1699 		return;
1700 	}
1701 
1702 	off = got.br_startoff + got.br_blockcount;
1703 	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1704 			XFS_FSB_TO_B(sc->mp, off));
1705 }
1706 
1707 /* Fix size problems with short format directories. */
1708 STATIC void
xrep_inode_sfdir_size(struct xfs_scrub * sc)1709 xrep_inode_sfdir_size(
1710 	struct xfs_scrub	*sc)
1711 {
1712 	struct xfs_ifork	*ifp;
1713 
1714 	trace_xrep_inode_sfdir_size(sc);
1715 
1716 	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1717 	sc->ip->i_disk_size = ifp->if_bytes;
1718 }
1719 
1720 /*
1721  * Fix any irregularities in a directory inode's size now that we can iterate
1722  * extent maps and access other regular inode data.
1723  */
1724 STATIC void
xrep_inode_dir_size(struct xfs_scrub * sc)1725 xrep_inode_dir_size(
1726 	struct xfs_scrub	*sc)
1727 {
1728 	trace_xrep_inode_dir_size(sc);
1729 
1730 	switch (sc->ip->i_df.if_format) {
1731 	case XFS_DINODE_FMT_EXTENTS:
1732 	case XFS_DINODE_FMT_BTREE:
1733 		xrep_inode_blockdir_size(sc);
1734 		break;
1735 	case XFS_DINODE_FMT_LOCAL:
1736 		xrep_inode_sfdir_size(sc);
1737 		break;
1738 	}
1739 }
1740 
1741 /* Fix extent size hint problems. */
1742 STATIC void
xrep_inode_extsize(struct xfs_scrub * sc)1743 xrep_inode_extsize(
1744 	struct xfs_scrub	*sc)
1745 {
1746 	/* Fix misaligned extent size hints on a directory. */
1747 	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1748 	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1749 	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1750 		sc->ip->i_extsize = 0;
1751 		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1752 	}
1753 }
1754 
1755 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1756 STATIC int
xrep_inode_pptr(struct xfs_scrub * sc)1757 xrep_inode_pptr(
1758 	struct xfs_scrub	*sc)
1759 {
1760 	struct xfs_mount	*mp = sc->mp;
1761 	struct xfs_inode	*ip = sc->ip;
1762 	struct inode		*inode = VFS_I(ip);
1763 
1764 	if (!xfs_has_parent(mp))
1765 		return 0;
1766 
1767 	/*
1768 	 * Unlinked inodes that cannot be added to the directory tree will not
1769 	 * have a parent pointer.
1770 	 */
1771 	if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
1772 		return 0;
1773 
1774 	/* Children of the superblock do not have parent pointers. */
1775 	if (xchk_inode_is_sb_rooted(ip))
1776 		return 0;
1777 
1778 	/* Inode already has an attr fork; no further work possible here. */
1779 	if (xfs_inode_has_attr_fork(ip))
1780 		return 0;
1781 
1782 	return xfs_bmap_add_attrfork(sc->tp, ip,
1783 			sizeof(struct xfs_attr_sf_hdr), true);
1784 }
1785 
1786 /* Fix any irregularities in an inode that the verifiers don't catch. */
1787 STATIC int
xrep_inode_problems(struct xfs_scrub * sc)1788 xrep_inode_problems(
1789 	struct xfs_scrub	*sc)
1790 {
1791 	int			error;
1792 
1793 	error = xrep_inode_blockcounts(sc);
1794 	if (error)
1795 		return error;
1796 	error = xrep_inode_pptr(sc);
1797 	if (error)
1798 		return error;
1799 	xrep_inode_timestamps(sc->ip);
1800 	xrep_inode_flags(sc);
1801 	xrep_inode_ids(sc);
1802 	/*
1803 	 * We can now do a better job fixing the size of a directory now that
1804 	 * we can scan the data fork extents than we could in xrep_dinode_size.
1805 	 */
1806 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1807 		xrep_inode_dir_size(sc);
1808 	xrep_inode_extsize(sc);
1809 
1810 	trace_xrep_inode_fixed(sc);
1811 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1812 	return xrep_roll_trans(sc);
1813 }
1814 
1815 /*
1816  * Make sure this inode's unlinked list pointers are consistent with its
1817  * link count.
1818  */
1819 STATIC int
xrep_inode_unlinked(struct xfs_scrub * sc)1820 xrep_inode_unlinked(
1821 	struct xfs_scrub	*sc)
1822 {
1823 	unsigned int		nlink = VFS_I(sc->ip)->i_nlink;
1824 	int			error;
1825 
1826 	/*
1827 	 * If this inode is linked from the directory tree and on the unlinked
1828 	 * list, remove it from the unlinked list.
1829 	 */
1830 	if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
1831 		struct xfs_perag	*pag;
1832 		int			error;
1833 
1834 		pag = xfs_perag_get(sc->mp,
1835 				XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
1836 		error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
1837 		xfs_perag_put(pag);
1838 		if (error)
1839 			return error;
1840 	}
1841 
1842 	/*
1843 	 * If this inode is not linked from the directory tree yet not on the
1844 	 * unlinked list, put it on the unlinked list.
1845 	 */
1846 	if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
1847 		error = xfs_iunlink(sc->tp, sc->ip);
1848 		if (error)
1849 			return error;
1850 	}
1851 
1852 	return 0;
1853 }
1854 
1855 /* Repair an inode's fields. */
1856 int
xrep_inode(struct xfs_scrub * sc)1857 xrep_inode(
1858 	struct xfs_scrub	*sc)
1859 {
1860 	int			error = 0;
1861 
1862 	/*
1863 	 * No inode?  That means we failed the _iget verifiers.  Repair all
1864 	 * the things that the inode verifiers care about, then retry _iget.
1865 	 */
1866 	if (!sc->ip) {
1867 		struct xrep_inode	*ri = sc->buf;
1868 
1869 		ASSERT(ri != NULL);
1870 
1871 		error = xrep_dinode_problems(ri);
1872 		if (error == -EBUSY) {
1873 			/*
1874 			 * Directory scan to recover inode mode encountered a
1875 			 * busy inode, so we did not continue repairing things.
1876 			 */
1877 			return 0;
1878 		}
1879 		if (error)
1880 			return error;
1881 
1882 		/* By this point we had better have a working incore inode. */
1883 		if (!sc->ip)
1884 			return -EFSCORRUPTED;
1885 	}
1886 
1887 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
1888 
1889 	/* If we found corruption of any kind, try to fix it. */
1890 	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1891 	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1892 		error = xrep_inode_problems(sc);
1893 		if (error)
1894 			return error;
1895 	}
1896 
1897 	/* See if we can clear the reflink flag. */
1898 	if (xfs_is_reflink_inode(sc->ip)) {
1899 		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1900 		if (error)
1901 			return error;
1902 	}
1903 
1904 	/* Reconnect incore unlinked list */
1905 	error = xrep_inode_unlinked(sc);
1906 	if (error)
1907 		return error;
1908 
1909 	return xrep_defer_finish(sc);
1910 }
1911