xref: /linux/fs/xfs/scrub/inode_repair.c (revision c4101e55974cc7d835fbd2d8e01553a3f61e9e75)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_icache.h"
20 #include "xfs_inode_buf.h"
21 #include "xfs_inode_fork.h"
22 #include "xfs_ialloc.h"
23 #include "xfs_da_format.h"
24 #include "xfs_reflink.h"
25 #include "xfs_alloc.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_bmap.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_bmap_util.h"
31 #include "xfs_dir2.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_quota_defs.h"
34 #include "xfs_quota.h"
35 #include "xfs_ag.h"
36 #include "xfs_rtbitmap.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_log_priv.h"
39 #include "xfs_health.h"
40 #include "scrub/xfs_scrub.h"
41 #include "scrub/scrub.h"
42 #include "scrub/common.h"
43 #include "scrub/btree.h"
44 #include "scrub/trace.h"
45 #include "scrub/repair.h"
46 
47 /*
48  * Inode Record Repair
49  * ===================
50  *
51  * Roughly speaking, inode problems can be classified based on whether or not
52  * they trip the dinode verifiers.  If those trip, then we won't be able to
53  * xfs_iget ourselves the inode.
54  *
55  * Therefore, the xrep_dinode_* functions fix anything that will cause the
56  * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
57  * fix things on live incore inodes.  The inode repair functions make decisions
58  * with security and usability implications when reviving a file:
59  *
60  * - Files with zero di_mode or a garbage di_mode are converted to regular file
61  *   that only root can read.  This file may not actually contain user data,
62  *   if the file was not previously a regular file.  Setuid and setgid bits
63  *   are cleared.
64  *
65  * - Zero-size directories can be truncated to look empty.  It is necessary to
66  *   run the bmapbtd and directory repair functions to fully rebuild the
67  *   directory.
68  *
69  * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
70  *   to run the bmapbtd and symlink repair functions to salvage the symlink.
71  *
72  * - Invalid extent size hints will be removed.
73  *
74  * - Quotacheck will be scheduled if we repaired an inode that was so badly
75  *   damaged that the ondisk inode had to be rebuilt.
76  *
77  * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
78  *   Setuid and setgid bits are cleared.
79  *
80  * - Data and attr forks are reset to extents format with zero extents if the
81  *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
82  *   repair functions to recover the space mapping.
83  *
84  * - ACLs will not be recovered if the attr fork is zapped or the extended
85  *   attribute structure itself requires salvaging.
86  *
87  * - If the attr fork is zapped, the user and group ids are reset to root and
88  *   the setuid and setgid bits are removed.
89  */
90 
91 /*
92  * All the information we need to repair the ondisk inode if we can't iget the
93  * incore inode.  We don't allocate this buffer unless we're going to perform
94  * a repair to the ondisk inode cluster buffer.
95  */
96 struct xrep_inode {
97 	/* Inode mapping that we saved from the initial lookup attempt. */
98 	struct xfs_imap		imap;
99 
100 	struct xfs_scrub	*sc;
101 
102 	/* Blocks in use on the data device by data extents or bmbt blocks. */
103 	xfs_rfsblock_t		data_blocks;
104 
105 	/* Blocks in use on the rt device. */
106 	xfs_rfsblock_t		rt_blocks;
107 
108 	/* Blocks in use by the attr fork. */
109 	xfs_rfsblock_t		attr_blocks;
110 
111 	/* Number of data device extents for the data fork. */
112 	xfs_extnum_t		data_extents;
113 
114 	/*
115 	 * Number of realtime device extents for the data fork.  If
116 	 * data_extents and rt_extents indicate that the data fork has extents
117 	 * on both devices, we'll just back away slowly.
118 	 */
119 	xfs_extnum_t		rt_extents;
120 
121 	/* Number of (data device) extents for the attr fork. */
122 	xfs_aextnum_t		attr_extents;
123 
124 	/* Sick state to set after zapping parts of the inode. */
125 	unsigned int		ino_sick_mask;
126 
127 	/* Must we remove all access from this file? */
128 	bool			zap_acls;
129 };
130 
131 /*
132  * Setup function for inode repair.  @imap contains the ondisk inode mapping
133  * information so that we can correct the ondisk inode cluster buffer if
134  * necessary to make iget work.
135  */
136 int
137 xrep_setup_inode(
138 	struct xfs_scrub	*sc,
139 	const struct xfs_imap	*imap)
140 {
141 	struct xrep_inode	*ri;
142 
143 	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
144 	if (!sc->buf)
145 		return -ENOMEM;
146 
147 	ri = sc->buf;
148 	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
149 	ri->sc = sc;
150 	return 0;
151 }
152 
153 /*
154  * Make sure this ondisk inode can pass the inode buffer verifier.  This is
155  * not the same as the dinode verifier.
156  */
157 STATIC void
158 xrep_dinode_buf_core(
159 	struct xfs_scrub	*sc,
160 	struct xfs_buf		*bp,
161 	unsigned int		ioffset)
162 {
163 	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
164 	struct xfs_trans	*tp = sc->tp;
165 	struct xfs_mount	*mp = sc->mp;
166 	xfs_agino_t		agino;
167 	bool			crc_ok = false;
168 	bool			magic_ok = false;
169 	bool			unlinked_ok = false;
170 
171 	agino = be32_to_cpu(dip->di_next_unlinked);
172 
173 	if (xfs_verify_agino_or_null(bp->b_pag, agino))
174 		unlinked_ok = true;
175 
176 	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
177 	    xfs_dinode_good_version(mp, dip->di_version))
178 		magic_ok = true;
179 
180 	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
181 			XFS_DINODE_CRC_OFF))
182 		crc_ok = true;
183 
184 	if (magic_ok && unlinked_ok && crc_ok)
185 		return;
186 
187 	if (!magic_ok) {
188 		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
189 		dip->di_version = 3;
190 	}
191 	if (!unlinked_ok)
192 		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
193 	xfs_dinode_calc_crc(mp, dip);
194 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
195 	xfs_trans_log_buf(tp, bp, ioffset,
196 				  ioffset + sizeof(struct xfs_dinode) - 1);
197 }
198 
199 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
200 STATIC void
201 xrep_dinode_buf(
202 	struct xfs_scrub	*sc,
203 	struct xfs_buf		*bp)
204 {
205 	struct xfs_mount	*mp = sc->mp;
206 	int			i;
207 	int			ni;
208 
209 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
210 	for (i = 0; i < ni; i++)
211 		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
212 }
213 
214 /* Reinitialize things that never change in an inode. */
215 STATIC void
216 xrep_dinode_header(
217 	struct xfs_scrub	*sc,
218 	struct xfs_dinode	*dip)
219 {
220 	trace_xrep_dinode_header(sc, dip);
221 
222 	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
223 	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
224 		dip->di_version = 3;
225 	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
226 	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
227 	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
228 }
229 
230 /* Turn di_mode into /something/ recognizable. */
231 STATIC void
232 xrep_dinode_mode(
233 	struct xrep_inode	*ri,
234 	struct xfs_dinode	*dip)
235 {
236 	struct xfs_scrub	*sc = ri->sc;
237 	uint16_t		mode = be16_to_cpu(dip->di_mode);
238 
239 	trace_xrep_dinode_mode(sc, dip);
240 
241 	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
242 		return;
243 
244 	/* bad mode, so we set it to a file that only root can read */
245 	mode = S_IFREG;
246 	dip->di_mode = cpu_to_be16(mode);
247 	dip->di_uid = 0;
248 	dip->di_gid = 0;
249 	ri->zap_acls = true;
250 }
251 
252 /* Fix any conflicting flags that the verifiers complain about. */
253 STATIC void
254 xrep_dinode_flags(
255 	struct xfs_scrub	*sc,
256 	struct xfs_dinode	*dip,
257 	bool			isrt)
258 {
259 	struct xfs_mount	*mp = sc->mp;
260 	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
261 	uint16_t		flags = be16_to_cpu(dip->di_flags);
262 	uint16_t		mode = be16_to_cpu(dip->di_mode);
263 
264 	trace_xrep_dinode_flags(sc, dip);
265 
266 	if (isrt)
267 		flags |= XFS_DIFLAG_REALTIME;
268 	else
269 		flags &= ~XFS_DIFLAG_REALTIME;
270 
271 	/*
272 	 * For regular files on a reflink filesystem, set the REFLINK flag to
273 	 * protect shared extents.  A later stage will actually check those
274 	 * extents and clear the flag if possible.
275 	 */
276 	if (xfs_has_reflink(mp) && S_ISREG(mode))
277 		flags2 |= XFS_DIFLAG2_REFLINK;
278 	else
279 		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
280 	if (flags & XFS_DIFLAG_REALTIME)
281 		flags2 &= ~XFS_DIFLAG2_REFLINK;
282 	if (!xfs_has_bigtime(mp))
283 		flags2 &= ~XFS_DIFLAG2_BIGTIME;
284 	if (!xfs_has_large_extent_counts(mp))
285 		flags2 &= ~XFS_DIFLAG2_NREXT64;
286 	if (flags2 & XFS_DIFLAG2_NREXT64)
287 		dip->di_nrext64_pad = 0;
288 	else if (dip->di_version >= 3)
289 		dip->di_v3_pad = 0;
290 	dip->di_flags = cpu_to_be16(flags);
291 	dip->di_flags2 = cpu_to_be64(flags2);
292 }
293 
294 /*
295  * Blow out symlink; now it points nowhere.  We don't have to worry about
296  * incore state because this inode is failing the verifiers.
297  */
298 STATIC void
299 xrep_dinode_zap_symlink(
300 	struct xrep_inode	*ri,
301 	struct xfs_dinode	*dip)
302 {
303 	struct xfs_scrub	*sc = ri->sc;
304 	char			*p;
305 
306 	trace_xrep_dinode_zap_symlink(sc, dip);
307 
308 	dip->di_format = XFS_DINODE_FMT_LOCAL;
309 	dip->di_size = cpu_to_be64(1);
310 	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
311 	*p = '?';
312 	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
313 }
314 
315 /*
316  * Blow out dir, make the parent point to the root.  In the future repair will
317  * reconstruct this directory for us.  Note that there's no in-core directory
318  * inode because the sf verifier tripped, so we don't have to worry about the
319  * dentry cache.
320  */
321 STATIC void
322 xrep_dinode_zap_dir(
323 	struct xrep_inode	*ri,
324 	struct xfs_dinode	*dip)
325 {
326 	struct xfs_scrub	*sc = ri->sc;
327 	struct xfs_mount	*mp = sc->mp;
328 	struct xfs_dir2_sf_hdr	*sfp;
329 	int			i8count;
330 
331 	trace_xrep_dinode_zap_dir(sc, dip);
332 
333 	dip->di_format = XFS_DINODE_FMT_LOCAL;
334 	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
335 	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
336 	sfp->count = 0;
337 	sfp->i8count = i8count;
338 	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
339 	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
340 	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
341 }
342 
343 /* Make sure we don't have a garbage file size. */
344 STATIC void
345 xrep_dinode_size(
346 	struct xrep_inode	*ri,
347 	struct xfs_dinode	*dip)
348 {
349 	struct xfs_scrub	*sc = ri->sc;
350 	uint64_t		size = be64_to_cpu(dip->di_size);
351 	uint16_t		mode = be16_to_cpu(dip->di_mode);
352 
353 	trace_xrep_dinode_size(sc, dip);
354 
355 	switch (mode & S_IFMT) {
356 	case S_IFIFO:
357 	case S_IFCHR:
358 	case S_IFBLK:
359 	case S_IFSOCK:
360 		/* di_size can't be nonzero for special files */
361 		dip->di_size = 0;
362 		break;
363 	case S_IFREG:
364 		/* Regular files can't be larger than 2^63-1 bytes. */
365 		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
366 		break;
367 	case S_IFLNK:
368 		/*
369 		 * Truncate ridiculously oversized symlinks.  If the size is
370 		 * zero, reset it to point to the current directory.  Both of
371 		 * these conditions trigger dinode verifier errors, so there
372 		 * is no in-core state to reset.
373 		 */
374 		if (size > XFS_SYMLINK_MAXLEN)
375 			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
376 		else if (size == 0)
377 			xrep_dinode_zap_symlink(ri, dip);
378 		break;
379 	case S_IFDIR:
380 		/*
381 		 * Directories can't have a size larger than 32G.  If the size
382 		 * is zero, reset it to an empty directory.  Both of these
383 		 * conditions trigger dinode verifier errors, so there is no
384 		 * in-core state to reset.
385 		 */
386 		if (size > XFS_DIR2_SPACE_SIZE)
387 			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
388 		else if (size == 0)
389 			xrep_dinode_zap_dir(ri, dip);
390 		break;
391 	}
392 }
393 
394 /* Fix extent size hints. */
395 STATIC void
396 xrep_dinode_extsize_hints(
397 	struct xfs_scrub	*sc,
398 	struct xfs_dinode	*dip)
399 {
400 	struct xfs_mount	*mp = sc->mp;
401 	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
402 	uint16_t		flags = be16_to_cpu(dip->di_flags);
403 	uint16_t		mode = be16_to_cpu(dip->di_mode);
404 
405 	xfs_failaddr_t		fa;
406 
407 	trace_xrep_dinode_extsize_hints(sc, dip);
408 
409 	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
410 			mode, flags);
411 	if (fa) {
412 		dip->di_extsize = 0;
413 		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
414 					      XFS_DIFLAG_EXTSZINHERIT);
415 	}
416 
417 	if (dip->di_version < 3)
418 		return;
419 
420 	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
421 			mode, flags, flags2);
422 	if (fa) {
423 		dip->di_cowextsize = 0;
424 		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
425 	}
426 }
427 
428 /* Count extents and blocks for an inode given an rmap. */
429 STATIC int
430 xrep_dinode_walk_rmap(
431 	struct xfs_btree_cur		*cur,
432 	const struct xfs_rmap_irec	*rec,
433 	void				*priv)
434 {
435 	struct xrep_inode		*ri = priv;
436 	int				error = 0;
437 
438 	if (xchk_should_terminate(ri->sc, &error))
439 		return error;
440 
441 	/* We only care about this inode. */
442 	if (rec->rm_owner != ri->sc->sm->sm_ino)
443 		return 0;
444 
445 	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
446 		ri->attr_blocks += rec->rm_blockcount;
447 		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
448 			ri->attr_extents++;
449 
450 		return 0;
451 	}
452 
453 	ri->data_blocks += rec->rm_blockcount;
454 	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
455 		ri->data_extents++;
456 
457 	return 0;
458 }
459 
460 /* Count extents and blocks for an inode from all AG rmap data. */
461 STATIC int
462 xrep_dinode_count_ag_rmaps(
463 	struct xrep_inode	*ri,
464 	struct xfs_perag	*pag)
465 {
466 	struct xfs_btree_cur	*cur;
467 	struct xfs_buf		*agf;
468 	int			error;
469 
470 	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
471 	if (error)
472 		return error;
473 
474 	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
475 	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
476 	xfs_btree_del_cursor(cur, error);
477 	xfs_trans_brelse(ri->sc->tp, agf);
478 	return error;
479 }
480 
481 /* Count extents and blocks for a given inode from all rmap data. */
482 STATIC int
483 xrep_dinode_count_rmaps(
484 	struct xrep_inode	*ri)
485 {
486 	struct xfs_perag	*pag;
487 	xfs_agnumber_t		agno;
488 	int			error;
489 
490 	if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
491 		return -EOPNOTSUPP;
492 
493 	for_each_perag(ri->sc->mp, agno, pag) {
494 		error = xrep_dinode_count_ag_rmaps(ri, pag);
495 		if (error) {
496 			xfs_perag_rele(pag);
497 			return error;
498 		}
499 	}
500 
501 	/* Can't have extents on both the rt and the data device. */
502 	if (ri->data_extents && ri->rt_extents)
503 		return -EFSCORRUPTED;
504 
505 	trace_xrep_dinode_count_rmaps(ri->sc,
506 			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
507 			ri->data_extents, ri->rt_extents, ri->attr_extents);
508 	return 0;
509 }
510 
511 /* Return true if this extents-format ifork looks like garbage. */
512 STATIC bool
513 xrep_dinode_bad_extents_fork(
514 	struct xfs_scrub	*sc,
515 	struct xfs_dinode	*dip,
516 	unsigned int		dfork_size,
517 	int			whichfork)
518 {
519 	struct xfs_bmbt_irec	new;
520 	struct xfs_bmbt_rec	*dp;
521 	xfs_extnum_t		nex;
522 	bool			isrt;
523 	unsigned int		i;
524 
525 	nex = xfs_dfork_nextents(dip, whichfork);
526 	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
527 		return true;
528 
529 	dp = XFS_DFORK_PTR(dip, whichfork);
530 
531 	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
532 	for (i = 0; i < nex; i++, dp++) {
533 		xfs_failaddr_t	fa;
534 
535 		xfs_bmbt_disk_get_all(dp, &new);
536 		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
537 				&new);
538 		if (fa)
539 			return true;
540 	}
541 
542 	return false;
543 }
544 
545 /* Return true if this btree-format ifork looks like garbage. */
546 STATIC bool
547 xrep_dinode_bad_bmbt_fork(
548 	struct xfs_scrub	*sc,
549 	struct xfs_dinode	*dip,
550 	unsigned int		dfork_size,
551 	int			whichfork)
552 {
553 	struct xfs_bmdr_block	*dfp;
554 	xfs_extnum_t		nex;
555 	unsigned int		i;
556 	unsigned int		dmxr;
557 	unsigned int		nrecs;
558 	unsigned int		level;
559 
560 	nex = xfs_dfork_nextents(dip, whichfork);
561 	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
562 		return true;
563 
564 	if (dfork_size < sizeof(struct xfs_bmdr_block))
565 		return true;
566 
567 	dfp = XFS_DFORK_PTR(dip, whichfork);
568 	nrecs = be16_to_cpu(dfp->bb_numrecs);
569 	level = be16_to_cpu(dfp->bb_level);
570 
571 	if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size)
572 		return true;
573 	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
574 		return true;
575 
576 	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
577 	for (i = 1; i <= nrecs; i++) {
578 		struct xfs_bmbt_key	*fkp;
579 		xfs_bmbt_ptr_t		*fpp;
580 		xfs_fileoff_t		fileoff;
581 		xfs_fsblock_t		fsbno;
582 
583 		fkp = XFS_BMDR_KEY_ADDR(dfp, i);
584 		fileoff = be64_to_cpu(fkp->br_startoff);
585 		if (!xfs_verify_fileoff(sc->mp, fileoff))
586 			return true;
587 
588 		fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr);
589 		fsbno = be64_to_cpu(*fpp);
590 		if (!xfs_verify_fsbno(sc->mp, fsbno))
591 			return true;
592 	}
593 
594 	return false;
595 }
596 
597 /*
598  * Check the data fork for things that will fail the ifork verifiers or the
599  * ifork formatters.
600  */
601 STATIC bool
602 xrep_dinode_check_dfork(
603 	struct xfs_scrub	*sc,
604 	struct xfs_dinode	*dip,
605 	uint16_t		mode)
606 {
607 	void			*dfork_ptr;
608 	int64_t			data_size;
609 	unsigned int		fmt;
610 	unsigned int		dfork_size;
611 
612 	/*
613 	 * Verifier functions take signed int64_t, so check for bogus negative
614 	 * values first.
615 	 */
616 	data_size = be64_to_cpu(dip->di_size);
617 	if (data_size < 0)
618 		return true;
619 
620 	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
621 	switch (mode & S_IFMT) {
622 	case S_IFIFO:
623 	case S_IFCHR:
624 	case S_IFBLK:
625 	case S_IFSOCK:
626 		if (fmt != XFS_DINODE_FMT_DEV)
627 			return true;
628 		break;
629 	case S_IFREG:
630 		if (fmt == XFS_DINODE_FMT_LOCAL)
631 			return true;
632 		fallthrough;
633 	case S_IFLNK:
634 	case S_IFDIR:
635 		switch (fmt) {
636 		case XFS_DINODE_FMT_LOCAL:
637 		case XFS_DINODE_FMT_EXTENTS:
638 		case XFS_DINODE_FMT_BTREE:
639 			break;
640 		default:
641 			return true;
642 		}
643 		break;
644 	default:
645 		return true;
646 	}
647 
648 	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
649 	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
650 
651 	switch (fmt) {
652 	case XFS_DINODE_FMT_DEV:
653 		break;
654 	case XFS_DINODE_FMT_LOCAL:
655 		/* dir/symlink structure cannot be larger than the fork */
656 		if (data_size > dfork_size)
657 			return true;
658 		/* directory structure must pass verification. */
659 		if (S_ISDIR(mode) &&
660 		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
661 			return true;
662 		/* symlink structure must pass verification. */
663 		if (S_ISLNK(mode) &&
664 		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
665 			return true;
666 		break;
667 	case XFS_DINODE_FMT_EXTENTS:
668 		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
669 				XFS_DATA_FORK))
670 			return true;
671 		break;
672 	case XFS_DINODE_FMT_BTREE:
673 		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
674 				XFS_DATA_FORK))
675 			return true;
676 		break;
677 	default:
678 		return true;
679 	}
680 
681 	return false;
682 }
683 
684 static void
685 xrep_dinode_set_data_nextents(
686 	struct xfs_dinode	*dip,
687 	xfs_extnum_t		nextents)
688 {
689 	if (xfs_dinode_has_large_extent_counts(dip))
690 		dip->di_big_nextents = cpu_to_be64(nextents);
691 	else
692 		dip->di_nextents = cpu_to_be32(nextents);
693 }
694 
695 static void
696 xrep_dinode_set_attr_nextents(
697 	struct xfs_dinode	*dip,
698 	xfs_extnum_t		nextents)
699 {
700 	if (xfs_dinode_has_large_extent_counts(dip))
701 		dip->di_big_anextents = cpu_to_be32(nextents);
702 	else
703 		dip->di_anextents = cpu_to_be16(nextents);
704 }
705 
706 /* Reset the data fork to something sane. */
707 STATIC void
708 xrep_dinode_zap_dfork(
709 	struct xrep_inode	*ri,
710 	struct xfs_dinode	*dip,
711 	uint16_t		mode)
712 {
713 	struct xfs_scrub	*sc = ri->sc;
714 
715 	trace_xrep_dinode_zap_dfork(sc, dip);
716 
717 	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
718 
719 	xrep_dinode_set_data_nextents(dip, 0);
720 	ri->data_blocks = 0;
721 	ri->rt_blocks = 0;
722 
723 	/* Special files always get reset to DEV */
724 	switch (mode & S_IFMT) {
725 	case S_IFIFO:
726 	case S_IFCHR:
727 	case S_IFBLK:
728 	case S_IFSOCK:
729 		dip->di_format = XFS_DINODE_FMT_DEV;
730 		dip->di_size = 0;
731 		return;
732 	}
733 
734 	/*
735 	 * If we have data extents, reset to an empty map and hope the user
736 	 * will run the bmapbtd checker next.
737 	 */
738 	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
739 		dip->di_format = XFS_DINODE_FMT_EXTENTS;
740 		return;
741 	}
742 
743 	/* Otherwise, reset the local format to the minimum. */
744 	switch (mode & S_IFMT) {
745 	case S_IFLNK:
746 		xrep_dinode_zap_symlink(ri, dip);
747 		break;
748 	case S_IFDIR:
749 		xrep_dinode_zap_dir(ri, dip);
750 		break;
751 	}
752 }
753 
754 /*
755  * Check the attr fork for things that will fail the ifork verifiers or the
756  * ifork formatters.
757  */
758 STATIC bool
759 xrep_dinode_check_afork(
760 	struct xfs_scrub		*sc,
761 	struct xfs_dinode		*dip)
762 {
763 	struct xfs_attr_sf_hdr		*afork_ptr;
764 	size_t				attr_size;
765 	unsigned int			afork_size;
766 
767 	if (XFS_DFORK_BOFF(dip) == 0)
768 		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
769 		       xfs_dfork_attr_extents(dip) != 0;
770 
771 	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
772 	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
773 
774 	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
775 	case XFS_DINODE_FMT_LOCAL:
776 		/* Fork has to be large enough to extract the xattr size. */
777 		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
778 			return true;
779 
780 		/* xattr structure cannot be larger than the fork */
781 		attr_size = be16_to_cpu(afork_ptr->totsize);
782 		if (attr_size > afork_size)
783 			return true;
784 
785 		/* xattr structure must pass verification. */
786 		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
787 	case XFS_DINODE_FMT_EXTENTS:
788 		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
789 					XFS_ATTR_FORK))
790 			return true;
791 		break;
792 	case XFS_DINODE_FMT_BTREE:
793 		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
794 					XFS_ATTR_FORK))
795 			return true;
796 		break;
797 	default:
798 		return true;
799 	}
800 
801 	return false;
802 }
803 
804 /*
805  * Reset the attr fork to empty.  Since the attr fork could have contained
806  * ACLs, make the file readable only by root.
807  */
808 STATIC void
809 xrep_dinode_zap_afork(
810 	struct xrep_inode	*ri,
811 	struct xfs_dinode	*dip,
812 	uint16_t		mode)
813 {
814 	struct xfs_scrub	*sc = ri->sc;
815 
816 	trace_xrep_dinode_zap_afork(sc, dip);
817 
818 	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
819 
820 	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
821 	xrep_dinode_set_attr_nextents(dip, 0);
822 	ri->attr_blocks = 0;
823 
824 	/*
825 	 * If the data fork is in btree format, removing the attr fork entirely
826 	 * might cause verifier failures if the next level down in the bmbt
827 	 * could now fit in the data fork area.
828 	 */
829 	if (dip->di_format != XFS_DINODE_FMT_BTREE)
830 		dip->di_forkoff = 0;
831 	dip->di_mode = cpu_to_be16(mode & ~0777);
832 	dip->di_uid = 0;
833 	dip->di_gid = 0;
834 }
835 
836 /* Make sure the fork offset is a sensible value. */
837 STATIC void
838 xrep_dinode_ensure_forkoff(
839 	struct xrep_inode	*ri,
840 	struct xfs_dinode	*dip,
841 	uint16_t		mode)
842 {
843 	struct xfs_bmdr_block	*bmdr;
844 	struct xfs_scrub	*sc = ri->sc;
845 	xfs_extnum_t		attr_extents, data_extents;
846 	size_t			bmdr_minsz = XFS_BMDR_SPACE_CALC(1);
847 	unsigned int		lit_sz = XFS_LITINO(sc->mp);
848 	unsigned int		afork_min, dfork_min;
849 
850 	trace_xrep_dinode_ensure_forkoff(sc, dip);
851 
852 	/*
853 	 * Before calling this function, xrep_dinode_core ensured that both
854 	 * forks actually fit inside their respective literal areas.  If this
855 	 * was not the case, the fork was reset to FMT_EXTENTS with zero
856 	 * records.  If the rmapbt scan found attr or data fork blocks, this
857 	 * will be noted in the dinode_stats, and we must leave enough room
858 	 * for the bmap repair code to reconstruct the mapping structure.
859 	 *
860 	 * First, compute the minimum space required for the attr fork.
861 	 */
862 	switch (dip->di_aformat) {
863 	case XFS_DINODE_FMT_LOCAL:
864 		/*
865 		 * If we still have a shortform xattr structure at all, that
866 		 * means the attr fork area was exactly large enough to fit
867 		 * the sf structure.
868 		 */
869 		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
870 		break;
871 	case XFS_DINODE_FMT_EXTENTS:
872 		attr_extents = xfs_dfork_attr_extents(dip);
873 		if (attr_extents) {
874 			/*
875 			 * We must maintain sufficient space to hold the entire
876 			 * extent map array in the data fork.  Note that we
877 			 * previously zapped the fork if it had no chance of
878 			 * fitting in the inode.
879 			 */
880 			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
881 		} else if (ri->attr_extents > 0) {
882 			/*
883 			 * The attr fork thinks it has zero extents, but we
884 			 * found some xattr extents.  We need to leave enough
885 			 * empty space here so that the incore attr fork will
886 			 * get created (and hence trigger the attr fork bmap
887 			 * repairer).
888 			 */
889 			afork_min = bmdr_minsz;
890 		} else {
891 			/* No extents on disk or found in rmapbt. */
892 			afork_min = 0;
893 		}
894 		break;
895 	case XFS_DINODE_FMT_BTREE:
896 		/* Must have space for btree header and key/pointers. */
897 		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
898 		afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
899 		break;
900 	default:
901 		/* We should never see any other formats. */
902 		afork_min = 0;
903 		break;
904 	}
905 
906 	/* Compute the minimum space required for the data fork. */
907 	switch (dip->di_format) {
908 	case XFS_DINODE_FMT_DEV:
909 		dfork_min = sizeof(__be32);
910 		break;
911 	case XFS_DINODE_FMT_UUID:
912 		dfork_min = sizeof(uuid_t);
913 		break;
914 	case XFS_DINODE_FMT_LOCAL:
915 		/*
916 		 * If we still have a shortform data fork at all, that means
917 		 * the data fork area was large enough to fit whatever was in
918 		 * there.
919 		 */
920 		dfork_min = be64_to_cpu(dip->di_size);
921 		break;
922 	case XFS_DINODE_FMT_EXTENTS:
923 		data_extents = xfs_dfork_data_extents(dip);
924 		if (data_extents) {
925 			/*
926 			 * We must maintain sufficient space to hold the entire
927 			 * extent map array in the data fork.  Note that we
928 			 * previously zapped the fork if it had no chance of
929 			 * fitting in the inode.
930 			 */
931 			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
932 		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
933 			/*
934 			 * The data fork thinks it has zero extents, but we
935 			 * found some data extents.  We need to leave enough
936 			 * empty space here so that the data fork bmap repair
937 			 * will recover the mappings.
938 			 */
939 			dfork_min = bmdr_minsz;
940 		} else {
941 			/* No extents on disk or found in rmapbt. */
942 			dfork_min = 0;
943 		}
944 		break;
945 	case XFS_DINODE_FMT_BTREE:
946 		/* Must have space for btree header and key/pointers. */
947 		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
948 		dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
949 		break;
950 	default:
951 		dfork_min = 0;
952 		break;
953 	}
954 
955 	/*
956 	 * Round all values up to the nearest 8 bytes, because that is the
957 	 * precision of di_forkoff.
958 	 */
959 	afork_min = roundup(afork_min, 8);
960 	dfork_min = roundup(dfork_min, 8);
961 	bmdr_minsz = roundup(bmdr_minsz, 8);
962 
963 	ASSERT(dfork_min <= lit_sz);
964 	ASSERT(afork_min <= lit_sz);
965 
966 	/*
967 	 * If the data fork was zapped and we don't have enough space for the
968 	 * recovery fork, move the attr fork up.
969 	 */
970 	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
971 	    xfs_dfork_data_extents(dip) == 0 &&
972 	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
973 	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
974 		if (bmdr_minsz + afork_min > lit_sz) {
975 			/*
976 			 * The attr for and the stub fork we need to recover
977 			 * the data fork won't both fit.  Zap the attr fork.
978 			 */
979 			xrep_dinode_zap_afork(ri, dip, mode);
980 			afork_min = bmdr_minsz;
981 		} else {
982 			void	*before, *after;
983 
984 			/* Otherwise, just slide the attr fork up. */
985 			before = XFS_DFORK_APTR(dip);
986 			dip->di_forkoff = bmdr_minsz >> 3;
987 			after = XFS_DFORK_APTR(dip);
988 			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
989 		}
990 	}
991 
992 	/*
993 	 * If the attr fork was zapped and we don't have enough space for the
994 	 * recovery fork, move the attr fork down.
995 	 */
996 	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
997 	    xfs_dfork_attr_extents(dip) == 0 &&
998 	    ri->attr_extents > 0 &&
999 	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1000 		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1001 			/*
1002 			 * If the data fork is in btree format then we can't
1003 			 * adjust forkoff because that runs the risk of
1004 			 * violating the extents/btree format transition rules.
1005 			 */
1006 		} else if (bmdr_minsz + dfork_min > lit_sz) {
1007 			/*
1008 			 * If we can't move the attr fork, too bad, we lose the
1009 			 * attr fork and leak its blocks.
1010 			 */
1011 			xrep_dinode_zap_afork(ri, dip, mode);
1012 		} else {
1013 			/*
1014 			 * Otherwise, just slide the attr fork down.  The attr
1015 			 * fork is empty, so we don't have any old contents to
1016 			 * move here.
1017 			 */
1018 			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1019 		}
1020 	}
1021 }
1022 
1023 /*
1024  * Zap the data/attr forks if we spot anything that isn't going to pass the
1025  * ifork verifiers or the ifork formatters, because we need to get the inode
1026  * into good enough shape that the higher level repair functions can run.
1027  */
1028 STATIC void
1029 xrep_dinode_zap_forks(
1030 	struct xrep_inode	*ri,
1031 	struct xfs_dinode	*dip)
1032 {
1033 	struct xfs_scrub	*sc = ri->sc;
1034 	xfs_extnum_t		data_extents;
1035 	xfs_extnum_t		attr_extents;
1036 	xfs_filblks_t		nblocks;
1037 	uint16_t		mode;
1038 	bool			zap_datafork = false;
1039 	bool			zap_attrfork = ri->zap_acls;
1040 
1041 	trace_xrep_dinode_zap_forks(sc, dip);
1042 
1043 	mode = be16_to_cpu(dip->di_mode);
1044 
1045 	data_extents = xfs_dfork_data_extents(dip);
1046 	attr_extents = xfs_dfork_attr_extents(dip);
1047 	nblocks = be64_to_cpu(dip->di_nblocks);
1048 
1049 	/* Inode counters don't make sense? */
1050 	if (data_extents > nblocks)
1051 		zap_datafork = true;
1052 	if (attr_extents > nblocks)
1053 		zap_attrfork = true;
1054 	if (data_extents + attr_extents > nblocks)
1055 		zap_datafork = zap_attrfork = true;
1056 
1057 	if (!zap_datafork)
1058 		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1059 	if (!zap_attrfork)
1060 		zap_attrfork = xrep_dinode_check_afork(sc, dip);
1061 
1062 	/* Zap whatever's bad. */
1063 	if (zap_attrfork)
1064 		xrep_dinode_zap_afork(ri, dip, mode);
1065 	if (zap_datafork)
1066 		xrep_dinode_zap_dfork(ri, dip, mode);
1067 	xrep_dinode_ensure_forkoff(ri, dip, mode);
1068 
1069 	/*
1070 	 * Zero di_nblocks if we don't have any extents at all to satisfy the
1071 	 * buffer verifier.
1072 	 */
1073 	data_extents = xfs_dfork_data_extents(dip);
1074 	attr_extents = xfs_dfork_attr_extents(dip);
1075 	if (data_extents + attr_extents == 0)
1076 		dip->di_nblocks = 0;
1077 }
1078 
1079 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1080 STATIC int
1081 xrep_dinode_core(
1082 	struct xrep_inode	*ri)
1083 {
1084 	struct xfs_scrub	*sc = ri->sc;
1085 	struct xfs_buf		*bp;
1086 	struct xfs_dinode	*dip;
1087 	xfs_ino_t		ino = sc->sm->sm_ino;
1088 	int			error;
1089 	int			iget_error;
1090 
1091 	/* Figure out what this inode had mapped in both forks. */
1092 	error = xrep_dinode_count_rmaps(ri);
1093 	if (error)
1094 		return error;
1095 
1096 	/* Read the inode cluster buffer. */
1097 	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1098 			ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1099 			NULL);
1100 	if (error)
1101 		return error;
1102 
1103 	/* Make sure we can pass the inode buffer verifier. */
1104 	xrep_dinode_buf(sc, bp);
1105 	bp->b_ops = &xfs_inode_buf_ops;
1106 
1107 	/* Fix everything the verifier will complain about. */
1108 	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1109 	xrep_dinode_header(sc, dip);
1110 	xrep_dinode_mode(ri, dip);
1111 	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1112 	xrep_dinode_size(ri, dip);
1113 	xrep_dinode_extsize_hints(sc, dip);
1114 	xrep_dinode_zap_forks(ri, dip);
1115 
1116 	/* Write out the inode. */
1117 	trace_xrep_dinode_fixed(sc, dip);
1118 	xfs_dinode_calc_crc(sc->mp, dip);
1119 	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1120 	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1121 			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1122 
1123 	/*
1124 	 * In theory, we've fixed the ondisk inode record enough that we should
1125 	 * be able to load the inode into the cache.  Try to iget that inode
1126 	 * now while we hold the AGI and the inode cluster buffer and take the
1127 	 * IOLOCK so that we can continue with repairs without anyone else
1128 	 * accessing the inode.  If iget fails, we still need to commit the
1129 	 * changes.
1130 	 */
1131 	iget_error = xchk_iget(sc, ino, &sc->ip);
1132 	if (!iget_error)
1133 		xchk_ilock(sc, XFS_IOLOCK_EXCL);
1134 
1135 	/*
1136 	 * Commit the inode cluster buffer updates and drop the AGI buffer that
1137 	 * we've been holding since scrub setup.  From here on out, repairs
1138 	 * deal only with the cached inode.
1139 	 */
1140 	error = xrep_trans_commit(sc);
1141 	if (error)
1142 		return error;
1143 
1144 	if (iget_error)
1145 		return iget_error;
1146 
1147 	error = xchk_trans_alloc(sc, 0);
1148 	if (error)
1149 		return error;
1150 
1151 	error = xrep_ino_dqattach(sc);
1152 	if (error)
1153 		return error;
1154 
1155 	xchk_ilock(sc, XFS_ILOCK_EXCL);
1156 	if (ri->ino_sick_mask)
1157 		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1158 	return 0;
1159 }
1160 
1161 /* Fix everything xfs_dinode_verify cares about. */
1162 STATIC int
1163 xrep_dinode_problems(
1164 	struct xrep_inode	*ri)
1165 {
1166 	struct xfs_scrub	*sc = ri->sc;
1167 	int			error;
1168 
1169 	error = xrep_dinode_core(ri);
1170 	if (error)
1171 		return error;
1172 
1173 	/* We had to fix a totally busted inode, schedule quotacheck. */
1174 	if (XFS_IS_UQUOTA_ON(sc->mp))
1175 		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1176 	if (XFS_IS_GQUOTA_ON(sc->mp))
1177 		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1178 	if (XFS_IS_PQUOTA_ON(sc->mp))
1179 		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1180 
1181 	return 0;
1182 }
1183 
1184 /*
1185  * Fix problems that the verifiers don't care about.  In general these are
1186  * errors that don't cause problems elsewhere in the kernel that we can easily
1187  * detect, so we don't check them all that rigorously.
1188  */
1189 
1190 /* Make sure block and extent counts are ok. */
1191 STATIC int
1192 xrep_inode_blockcounts(
1193 	struct xfs_scrub	*sc)
1194 {
1195 	struct xfs_ifork	*ifp;
1196 	xfs_filblks_t		count;
1197 	xfs_filblks_t		acount;
1198 	xfs_extnum_t		nextents;
1199 	int			error;
1200 
1201 	trace_xrep_inode_blockcounts(sc);
1202 
1203 	/* Set data fork counters from the data fork mappings. */
1204 	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1205 			&nextents, &count);
1206 	if (error)
1207 		return error;
1208 	if (xfs_is_reflink_inode(sc->ip)) {
1209 		/*
1210 		 * data fork blockcount can exceed physical storage if a user
1211 		 * reflinks the same block over and over again.
1212 		 */
1213 		;
1214 	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1215 		if (count >= sc->mp->m_sb.sb_rblocks)
1216 			return -EFSCORRUPTED;
1217 	} else {
1218 		if (count >= sc->mp->m_sb.sb_dblocks)
1219 			return -EFSCORRUPTED;
1220 	}
1221 	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1222 	if (error)
1223 		return error;
1224 	sc->ip->i_df.if_nextents = nextents;
1225 
1226 	/* Set attr fork counters from the attr fork mappings. */
1227 	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1228 	if (ifp) {
1229 		error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1230 				&nextents, &acount);
1231 		if (error)
1232 			return error;
1233 		if (count >= sc->mp->m_sb.sb_dblocks)
1234 			return -EFSCORRUPTED;
1235 		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1236 				nextents);
1237 		if (error)
1238 			return error;
1239 		ifp->if_nextents = nextents;
1240 	} else {
1241 		acount = 0;
1242 	}
1243 
1244 	sc->ip->i_nblocks = count + acount;
1245 	return 0;
1246 }
1247 
1248 /* Check for invalid uid/gid/prid. */
1249 STATIC void
1250 xrep_inode_ids(
1251 	struct xfs_scrub	*sc)
1252 {
1253 	bool			dirty = false;
1254 
1255 	trace_xrep_inode_ids(sc);
1256 
1257 	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1258 		i_uid_write(VFS_I(sc->ip), 0);
1259 		dirty = true;
1260 		if (XFS_IS_UQUOTA_ON(sc->mp))
1261 			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1262 	}
1263 
1264 	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1265 		i_gid_write(VFS_I(sc->ip), 0);
1266 		dirty = true;
1267 		if (XFS_IS_GQUOTA_ON(sc->mp))
1268 			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1269 	}
1270 
1271 	if (sc->ip->i_projid == -1U) {
1272 		sc->ip->i_projid = 0;
1273 		dirty = true;
1274 		if (XFS_IS_PQUOTA_ON(sc->mp))
1275 			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1276 	}
1277 
1278 	/* strip setuid/setgid if we touched any of the ids */
1279 	if (dirty)
1280 		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1281 }
1282 
1283 static inline void
1284 xrep_clamp_timestamp(
1285 	struct xfs_inode	*ip,
1286 	struct timespec64	*ts)
1287 {
1288 	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1289 	*ts = timestamp_truncate(*ts, VFS_I(ip));
1290 }
1291 
1292 /* Nanosecond counters can't have more than 1 billion. */
1293 STATIC void
1294 xrep_inode_timestamps(
1295 	struct xfs_inode	*ip)
1296 {
1297 	struct timespec64	tstamp;
1298 	struct inode		*inode = VFS_I(ip);
1299 
1300 	tstamp = inode_get_atime(inode);
1301 	xrep_clamp_timestamp(ip, &tstamp);
1302 	inode_set_atime_to_ts(inode, tstamp);
1303 
1304 	tstamp = inode_get_mtime(inode);
1305 	xrep_clamp_timestamp(ip, &tstamp);
1306 	inode_set_mtime_to_ts(inode, tstamp);
1307 
1308 	tstamp = inode_get_ctime(inode);
1309 	xrep_clamp_timestamp(ip, &tstamp);
1310 	inode_set_ctime_to_ts(inode, tstamp);
1311 
1312 	xrep_clamp_timestamp(ip, &ip->i_crtime);
1313 }
1314 
1315 /* Fix inode flags that don't make sense together. */
1316 STATIC void
1317 xrep_inode_flags(
1318 	struct xfs_scrub	*sc)
1319 {
1320 	uint16_t		mode;
1321 
1322 	trace_xrep_inode_flags(sc);
1323 
1324 	mode = VFS_I(sc->ip)->i_mode;
1325 
1326 	/* Clear junk flags */
1327 	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1328 		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1329 
1330 	/* NEWRTBM only applies to realtime bitmaps */
1331 	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1332 		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1333 	else
1334 		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1335 
1336 	/* These only make sense for directories. */
1337 	if (!S_ISDIR(mode))
1338 		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1339 					  XFS_DIFLAG_EXTSZINHERIT |
1340 					  XFS_DIFLAG_PROJINHERIT |
1341 					  XFS_DIFLAG_NOSYMLINKS);
1342 
1343 	/* These only make sense for files. */
1344 	if (!S_ISREG(mode))
1345 		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1346 					  XFS_DIFLAG_EXTSIZE);
1347 
1348 	/* These only make sense for non-rt files. */
1349 	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1350 		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1351 
1352 	/* Immutable and append only?  Drop the append. */
1353 	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1354 	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1355 		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1356 
1357 	/* Clear junk flags. */
1358 	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1359 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1360 
1361 	/* No reflink flag unless we support it and it's a file. */
1362 	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1363 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1364 
1365 	/* DAX only applies to files and dirs. */
1366 	if (!(S_ISREG(mode) || S_ISDIR(mode)))
1367 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1368 
1369 	/* No reflink files on the realtime device. */
1370 	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1371 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1372 }
1373 
1374 /*
1375  * Fix size problems with block/node format directories.  If we fail to find
1376  * the extent list, just bail out and let the bmapbtd repair functions clean
1377  * up that mess.
1378  */
1379 STATIC void
1380 xrep_inode_blockdir_size(
1381 	struct xfs_scrub	*sc)
1382 {
1383 	struct xfs_iext_cursor	icur;
1384 	struct xfs_bmbt_irec	got;
1385 	struct xfs_ifork	*ifp;
1386 	xfs_fileoff_t		off;
1387 	int			error;
1388 
1389 	trace_xrep_inode_blockdir_size(sc);
1390 
1391 	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1392 	if (error)
1393 		return;
1394 
1395 	/* Find the last block before 32G; this is the dir size. */
1396 	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1397 	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1398 	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1399 		/* zero-extents directory? */
1400 		return;
1401 	}
1402 
1403 	off = got.br_startoff + got.br_blockcount;
1404 	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1405 			XFS_FSB_TO_B(sc->mp, off));
1406 }
1407 
1408 /* Fix size problems with short format directories. */
1409 STATIC void
1410 xrep_inode_sfdir_size(
1411 	struct xfs_scrub	*sc)
1412 {
1413 	struct xfs_ifork	*ifp;
1414 
1415 	trace_xrep_inode_sfdir_size(sc);
1416 
1417 	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1418 	sc->ip->i_disk_size = ifp->if_bytes;
1419 }
1420 
1421 /*
1422  * Fix any irregularities in a directory inode's size now that we can iterate
1423  * extent maps and access other regular inode data.
1424  */
1425 STATIC void
1426 xrep_inode_dir_size(
1427 	struct xfs_scrub	*sc)
1428 {
1429 	trace_xrep_inode_dir_size(sc);
1430 
1431 	switch (sc->ip->i_df.if_format) {
1432 	case XFS_DINODE_FMT_EXTENTS:
1433 	case XFS_DINODE_FMT_BTREE:
1434 		xrep_inode_blockdir_size(sc);
1435 		break;
1436 	case XFS_DINODE_FMT_LOCAL:
1437 		xrep_inode_sfdir_size(sc);
1438 		break;
1439 	}
1440 }
1441 
1442 /* Fix extent size hint problems. */
1443 STATIC void
1444 xrep_inode_extsize(
1445 	struct xfs_scrub	*sc)
1446 {
1447 	/* Fix misaligned extent size hints on a directory. */
1448 	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1449 	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1450 	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1451 		sc->ip->i_extsize = 0;
1452 		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1453 	}
1454 }
1455 
1456 /* Fix any irregularities in an inode that the verifiers don't catch. */
1457 STATIC int
1458 xrep_inode_problems(
1459 	struct xfs_scrub	*sc)
1460 {
1461 	int			error;
1462 
1463 	error = xrep_inode_blockcounts(sc);
1464 	if (error)
1465 		return error;
1466 	xrep_inode_timestamps(sc->ip);
1467 	xrep_inode_flags(sc);
1468 	xrep_inode_ids(sc);
1469 	/*
1470 	 * We can now do a better job fixing the size of a directory now that
1471 	 * we can scan the data fork extents than we could in xrep_dinode_size.
1472 	 */
1473 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1474 		xrep_inode_dir_size(sc);
1475 	xrep_inode_extsize(sc);
1476 
1477 	trace_xrep_inode_fixed(sc);
1478 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1479 	return xrep_roll_trans(sc);
1480 }
1481 
1482 /* Repair an inode's fields. */
1483 int
1484 xrep_inode(
1485 	struct xfs_scrub	*sc)
1486 {
1487 	int			error = 0;
1488 
1489 	/*
1490 	 * No inode?  That means we failed the _iget verifiers.  Repair all
1491 	 * the things that the inode verifiers care about, then retry _iget.
1492 	 */
1493 	if (!sc->ip) {
1494 		struct xrep_inode	*ri = sc->buf;
1495 
1496 		ASSERT(ri != NULL);
1497 
1498 		error = xrep_dinode_problems(ri);
1499 		if (error)
1500 			return error;
1501 
1502 		/* By this point we had better have a working incore inode. */
1503 		if (!sc->ip)
1504 			return -EFSCORRUPTED;
1505 	}
1506 
1507 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
1508 
1509 	/* If we found corruption of any kind, try to fix it. */
1510 	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1511 	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1512 		error = xrep_inode_problems(sc);
1513 		if (error)
1514 			return error;
1515 	}
1516 
1517 	/* See if we can clear the reflink flag. */
1518 	if (xfs_is_reflink_inode(sc->ip)) {
1519 		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1520 		if (error)
1521 			return error;
1522 	}
1523 
1524 	return xrep_defer_finish(sc);
1525 }
1526