xref: /linux/fs/xfs/scrub/inode_repair.c (revision c148bc7535650fbfa95a1f571b9ffa2ab478ea33)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_icache.h"
20 #include "xfs_inode_buf.h"
21 #include "xfs_inode_fork.h"
22 #include "xfs_ialloc.h"
23 #include "xfs_da_format.h"
24 #include "xfs_reflink.h"
25 #include "xfs_alloc.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_bmap.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_bmap_util.h"
31 #include "xfs_dir2.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_quota_defs.h"
34 #include "xfs_quota.h"
35 #include "xfs_ag.h"
36 #include "xfs_rtbitmap.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_log_priv.h"
39 #include "xfs_health.h"
40 #include "xfs_symlink_remote.h"
41 #include "xfs_rtgroup.h"
42 #include "xfs_rtrmap_btree.h"
43 #include "xfs_rtrefcount_btree.h"
44 #include "scrub/xfs_scrub.h"
45 #include "scrub/scrub.h"
46 #include "scrub/common.h"
47 #include "scrub/btree.h"
48 #include "scrub/trace.h"
49 #include "scrub/repair.h"
50 #include "scrub/iscan.h"
51 #include "scrub/readdir.h"
52 #include "scrub/tempfile.h"
53 
54 /*
55  * Inode Record Repair
56  * ===================
57  *
58  * Roughly speaking, inode problems can be classified based on whether or not
59  * they trip the dinode verifiers.  If those trip, then we won't be able to
60  * xfs_iget ourselves the inode.
61  *
62  * Therefore, the xrep_dinode_* functions fix anything that will cause the
63  * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
64  * fix things on live incore inodes.  The inode repair functions make decisions
65  * with security and usability implications when reviving a file:
66  *
67  * - Files with zero di_mode or a garbage di_mode are converted to regular file
68  *   that only root can read.  This file may not actually contain user data,
69  *   if the file was not previously a regular file.  Setuid and setgid bits
70  *   are cleared.
71  *
72  * - Zero-size directories can be truncated to look empty.  It is necessary to
73  *   run the bmapbtd and directory repair functions to fully rebuild the
74  *   directory.
75  *
76  * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
77  *   to run the bmapbtd and symlink repair functions to salvage the symlink.
78  *
79  * - Invalid extent size hints will be removed.
80  *
81  * - Quotacheck will be scheduled if we repaired an inode that was so badly
82  *   damaged that the ondisk inode had to be rebuilt.
83  *
84  * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
85  *   Setuid and setgid bits are cleared.
86  *
87  * - Data and attr forks are reset to extents format with zero extents if the
88  *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
89  *   repair functions to recover the space mapping.
90  *
91  * - ACLs will not be recovered if the attr fork is zapped or the extended
92  *   attribute structure itself requires salvaging.
93  *
94  * - If the attr fork is zapped, the user and group ids are reset to root and
95  *   the setuid and setgid bits are removed.
96  */
97 
98 /*
99  * All the information we need to repair the ondisk inode if we can't iget the
100  * incore inode.  We don't allocate this buffer unless we're going to perform
101  * a repair to the ondisk inode cluster buffer.
102  */
103 struct xrep_inode {
104 	/* Inode mapping that we saved from the initial lookup attempt. */
105 	struct xfs_imap		imap;
106 
107 	struct xfs_scrub	*sc;
108 
109 	/* Blocks in use on the data device by data extents or bmbt blocks. */
110 	xfs_rfsblock_t		data_blocks;
111 
112 	/* Blocks in use on the rt device. */
113 	xfs_rfsblock_t		rt_blocks;
114 
115 	/* Blocks in use by the attr fork. */
116 	xfs_rfsblock_t		attr_blocks;
117 
118 	/* Number of data device extents for the data fork. */
119 	xfs_extnum_t		data_extents;
120 
121 	/*
122 	 * Number of realtime device extents for the data fork.  If
123 	 * data_extents and rt_extents indicate that the data fork has extents
124 	 * on both devices, we'll just back away slowly.
125 	 */
126 	xfs_extnum_t		rt_extents;
127 
128 	/* Number of (data device) extents for the attr fork. */
129 	xfs_aextnum_t		attr_extents;
130 
131 	/* Sick state to set after zapping parts of the inode. */
132 	unsigned int		ino_sick_mask;
133 
134 	/* Must we remove all access from this file? */
135 	bool			zap_acls;
136 
137 	/* Inode scanner to see if we can find the ftype from dirents */
138 	struct xchk_iscan	ftype_iscan;
139 	uint8_t			alleged_ftype;
140 };
141 
142 /*
143  * Setup function for inode repair.  @imap contains the ondisk inode mapping
144  * information so that we can correct the ondisk inode cluster buffer if
145  * necessary to make iget work.
146  */
147 int
xrep_setup_inode(struct xfs_scrub * sc,const struct xfs_imap * imap)148 xrep_setup_inode(
149 	struct xfs_scrub	*sc,
150 	const struct xfs_imap	*imap)
151 {
152 	struct xrep_inode	*ri;
153 
154 	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
155 	if (!sc->buf)
156 		return -ENOMEM;
157 
158 	ri = sc->buf;
159 	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
160 	ri->sc = sc;
161 	return 0;
162 }
163 
164 /*
165  * Make sure this ondisk inode can pass the inode buffer verifier.  This is
166  * not the same as the dinode verifier.
167  */
168 STATIC void
xrep_dinode_buf_core(struct xfs_scrub * sc,struct xfs_buf * bp,unsigned int ioffset)169 xrep_dinode_buf_core(
170 	struct xfs_scrub	*sc,
171 	struct xfs_buf		*bp,
172 	unsigned int		ioffset)
173 {
174 	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
175 	struct xfs_trans	*tp = sc->tp;
176 	struct xfs_mount	*mp = sc->mp;
177 	xfs_agino_t		agino;
178 	bool			crc_ok = false;
179 	bool			magic_ok = false;
180 	bool			unlinked_ok = false;
181 
182 	agino = be32_to_cpu(dip->di_next_unlinked);
183 
184 	if (xfs_verify_agino_or_null(bp->b_pag, agino))
185 		unlinked_ok = true;
186 
187 	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
188 	    xfs_dinode_good_version(mp, dip->di_version))
189 		magic_ok = true;
190 
191 	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
192 			XFS_DINODE_CRC_OFF))
193 		crc_ok = true;
194 
195 	if (magic_ok && unlinked_ok && crc_ok)
196 		return;
197 
198 	if (!magic_ok) {
199 		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
200 		dip->di_version = 3;
201 	}
202 	if (!unlinked_ok)
203 		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
204 	xfs_dinode_calc_crc(mp, dip);
205 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
206 	xfs_trans_log_buf(tp, bp, ioffset,
207 				  ioffset + sizeof(struct xfs_dinode) - 1);
208 }
209 
210 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
211 STATIC void
xrep_dinode_buf(struct xfs_scrub * sc,struct xfs_buf * bp)212 xrep_dinode_buf(
213 	struct xfs_scrub	*sc,
214 	struct xfs_buf		*bp)
215 {
216 	struct xfs_mount	*mp = sc->mp;
217 	int			i;
218 	int			ni;
219 
220 	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
221 	for (i = 0; i < ni; i++)
222 		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
223 }
224 
225 /* Reinitialize things that never change in an inode. */
226 STATIC void
xrep_dinode_header(struct xfs_scrub * sc,struct xfs_dinode * dip)227 xrep_dinode_header(
228 	struct xfs_scrub	*sc,
229 	struct xfs_dinode	*dip)
230 {
231 	trace_xrep_dinode_header(sc, dip);
232 
233 	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
234 	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
235 		dip->di_version = 3;
236 	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
237 	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
238 	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
239 }
240 
241 /*
242  * If this directory entry points to the scrub target inode, then the directory
243  * we're scanning is the parent of the scrub target inode.
244  */
245 STATIC int
xrep_dinode_findmode_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)246 xrep_dinode_findmode_dirent(
247 	struct xfs_scrub		*sc,
248 	struct xfs_inode		*dp,
249 	xfs_dir2_dataptr_t		dapos,
250 	const struct xfs_name		*name,
251 	xfs_ino_t			ino,
252 	void				*priv)
253 {
254 	struct xrep_inode		*ri = priv;
255 	int				error = 0;
256 
257 	if (xchk_should_terminate(ri->sc, &error))
258 		return error;
259 
260 	if (ino != sc->sm->sm_ino)
261 		return 0;
262 
263 	/* Ignore garbage directory entry names. */
264 	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
265 		return -EFSCORRUPTED;
266 
267 	/* Don't pick up dot or dotdot entries; we only want child dirents. */
268 	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
269 	    xfs_dir2_samename(name, &xfs_name_dot))
270 		return 0;
271 
272 	/*
273 	 * Uhoh, more than one parent for this inode and they don't agree on
274 	 * the file type?
275 	 */
276 	if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
277 	    ri->alleged_ftype != name->type) {
278 		trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
279 				ri->alleged_ftype);
280 		return -EFSCORRUPTED;
281 	}
282 
283 	/* We found a potential parent; remember the ftype. */
284 	trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
285 	ri->alleged_ftype = name->type;
286 	return 0;
287 }
288 
289 /* Try to lock a directory, or wait a jiffy. */
290 static inline int
xrep_dinode_ilock_nowait(struct xfs_inode * dp,unsigned int lock_mode)291 xrep_dinode_ilock_nowait(
292 	struct xfs_inode	*dp,
293 	unsigned int		lock_mode)
294 {
295 	if (xfs_ilock_nowait(dp, lock_mode))
296 		return true;
297 
298 	schedule_timeout_killable(1);
299 	return false;
300 }
301 
302 /*
303  * Try to lock a directory to look for ftype hints.  Since we already hold the
304  * AGI buffer, we cannot block waiting for the ILOCK because rename can take
305  * the ILOCK and then try to lock AGIs.
306  */
307 STATIC int
xrep_dinode_trylock_directory(struct xrep_inode * ri,struct xfs_inode * dp,unsigned int * lock_modep)308 xrep_dinode_trylock_directory(
309 	struct xrep_inode	*ri,
310 	struct xfs_inode	*dp,
311 	unsigned int		*lock_modep)
312 {
313 	unsigned long		deadline = jiffies + msecs_to_jiffies(30000);
314 	unsigned int		lock_mode;
315 	int			error = 0;
316 
317 	do {
318 		if (xchk_should_terminate(ri->sc, &error))
319 			return error;
320 
321 		if (xfs_need_iread_extents(&dp->i_df))
322 			lock_mode = XFS_ILOCK_EXCL;
323 		else
324 			lock_mode = XFS_ILOCK_SHARED;
325 
326 		if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
327 			*lock_modep = lock_mode;
328 			return 0;
329 		}
330 	} while (!time_is_before_jiffies(deadline));
331 	return -EBUSY;
332 }
333 
334 /*
335  * If this is a directory, walk the dirents looking for any that point to the
336  * scrub target inode.
337  */
338 STATIC int
xrep_dinode_findmode_walk_directory(struct xrep_inode * ri,struct xfs_inode * dp)339 xrep_dinode_findmode_walk_directory(
340 	struct xrep_inode	*ri,
341 	struct xfs_inode	*dp)
342 {
343 	struct xfs_scrub	*sc = ri->sc;
344 	unsigned int		lock_mode;
345 	int			error = 0;
346 
347 	/* Ignore temporary repair directories. */
348 	if (xrep_is_tempfile(dp))
349 		return 0;
350 
351 	/*
352 	 * Scan the directory to see if there it contains an entry pointing to
353 	 * the directory that we are repairing.
354 	 */
355 	error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
356 	if (error)
357 		return error;
358 
359 	/*
360 	 * If this directory is known to be sick, we cannot scan it reliably
361 	 * and must abort.
362 	 */
363 	if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
364 				       XFS_SICK_INO_BMBTD |
365 				       XFS_SICK_INO_DIR)) {
366 		error = -EFSCORRUPTED;
367 		goto out_unlock;
368 	}
369 
370 	/*
371 	 * We cannot complete our parent pointer scan if a directory looks as
372 	 * though it has been zapped by the inode record repair code.
373 	 */
374 	if (xchk_dir_looks_zapped(dp)) {
375 		error = -EBUSY;
376 		goto out_unlock;
377 	}
378 
379 	error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
380 	if (error)
381 		goto out_unlock;
382 
383 out_unlock:
384 	xfs_iunlock(dp, lock_mode);
385 	return error;
386 }
387 
388 /*
389  * Try to find the mode of the inode being repaired by looking for directories
390  * that point down to this file.
391  */
392 STATIC int
xrep_dinode_find_mode(struct xrep_inode * ri,uint16_t * mode)393 xrep_dinode_find_mode(
394 	struct xrep_inode	*ri,
395 	uint16_t		*mode)
396 {
397 	struct xfs_scrub	*sc = ri->sc;
398 	struct xfs_inode	*dp;
399 	int			error;
400 
401 	/* No ftype means we have no other metadata to consult. */
402 	if (!xfs_has_ftype(sc->mp)) {
403 		*mode = S_IFREG;
404 		return 0;
405 	}
406 
407 	/*
408 	 * Scan all directories for parents that might point down to this
409 	 * inode.  Skip the inode being repaired during the scan since it
410 	 * cannot be its own parent.  Note that we still hold the AGI locked
411 	 * so there's a real possibility that _iscan_iter can return EBUSY.
412 	 */
413 	xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
414 	xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
415 	ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
416 	ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
417 	while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
418 		if (S_ISDIR(VFS_I(dp)->i_mode))
419 			error = xrep_dinode_findmode_walk_directory(ri, dp);
420 		xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
421 		xchk_irele(sc, dp);
422 		if (error < 0)
423 			break;
424 		if (xchk_should_terminate(sc, &error))
425 			break;
426 	}
427 	xchk_iscan_iter_finish(&ri->ftype_iscan);
428 	xchk_iscan_teardown(&ri->ftype_iscan);
429 
430 	if (error == -EBUSY) {
431 		if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
432 			/*
433 			 * If we got an EBUSY after finding at least one
434 			 * dirent, that means the scan found an inode on the
435 			 * inactivation list and could not open it.  Accept the
436 			 * alleged ftype and install a new mode below.
437 			 */
438 			error = 0;
439 		} else if (!(sc->flags & XCHK_TRY_HARDER)) {
440 			/*
441 			 * Otherwise, retry the operation one time to see if
442 			 * the reason for the delay is an inode from the same
443 			 * cluster buffer waiting on the inactivation list.
444 			 */
445 			error = -EDEADLOCK;
446 		}
447 	}
448 	if (error)
449 		return error;
450 
451 	/*
452 	 * Convert the discovered ftype into the file mode.  If all else fails,
453 	 * return S_IFREG.
454 	 */
455 	switch (ri->alleged_ftype) {
456 	case XFS_DIR3_FT_DIR:
457 		*mode = S_IFDIR;
458 		break;
459 	case XFS_DIR3_FT_WHT:
460 	case XFS_DIR3_FT_CHRDEV:
461 		*mode = S_IFCHR;
462 		break;
463 	case XFS_DIR3_FT_BLKDEV:
464 		*mode = S_IFBLK;
465 		break;
466 	case XFS_DIR3_FT_FIFO:
467 		*mode = S_IFIFO;
468 		break;
469 	case XFS_DIR3_FT_SOCK:
470 		*mode = S_IFSOCK;
471 		break;
472 	case XFS_DIR3_FT_SYMLINK:
473 		*mode = S_IFLNK;
474 		break;
475 	default:
476 		*mode = S_IFREG;
477 		break;
478 	}
479 	return 0;
480 }
481 
482 /* Turn di_mode into /something/ recognizable.  Returns true if we succeed. */
483 STATIC int
xrep_dinode_mode(struct xrep_inode * ri,struct xfs_dinode * dip)484 xrep_dinode_mode(
485 	struct xrep_inode	*ri,
486 	struct xfs_dinode	*dip)
487 {
488 	struct xfs_scrub	*sc = ri->sc;
489 	uint16_t		mode = be16_to_cpu(dip->di_mode);
490 	int			error;
491 
492 	trace_xrep_dinode_mode(sc, dip);
493 
494 	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
495 		return 0;
496 
497 	/* Try to fix the mode.  If we cannot, then leave everything alone. */
498 	error = xrep_dinode_find_mode(ri, &mode);
499 	switch (error) {
500 	case -EINTR:
501 	case -EBUSY:
502 	case -EDEADLOCK:
503 		/* temporary failure or fatal signal */
504 		return error;
505 	case 0:
506 		/* found mode */
507 		break;
508 	default:
509 		/* some other error, assume S_IFREG */
510 		mode = S_IFREG;
511 		break;
512 	}
513 
514 	/* bad mode, so we set it to a file that only root can read */
515 	dip->di_mode = cpu_to_be16(mode);
516 	dip->di_uid = 0;
517 	dip->di_gid = 0;
518 	ri->zap_acls = true;
519 	return 0;
520 }
521 
522 /* Fix unused link count fields having nonzero values. */
523 STATIC void
xrep_dinode_nlinks(struct xfs_dinode * dip)524 xrep_dinode_nlinks(
525 	struct xfs_dinode	*dip)
526 {
527 	if (dip->di_version < 2) {
528 		dip->di_nlink = 0;
529 		return;
530 	}
531 
532 	if (xfs_dinode_is_metadir(dip)) {
533 		if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
534 			dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
535 	} else {
536 		dip->di_metatype = 0;
537 	}
538 }
539 
540 /* Fix any conflicting flags that the verifiers complain about. */
541 STATIC void
xrep_dinode_flags(struct xfs_scrub * sc,struct xfs_dinode * dip,bool isrt)542 xrep_dinode_flags(
543 	struct xfs_scrub	*sc,
544 	struct xfs_dinode	*dip,
545 	bool			isrt)
546 {
547 	struct xfs_mount	*mp = sc->mp;
548 	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
549 	uint16_t		flags = be16_to_cpu(dip->di_flags);
550 	uint16_t		mode = be16_to_cpu(dip->di_mode);
551 
552 	trace_xrep_dinode_flags(sc, dip);
553 
554 	if (isrt)
555 		flags |= XFS_DIFLAG_REALTIME;
556 	else
557 		flags &= ~XFS_DIFLAG_REALTIME;
558 
559 	/*
560 	 * For regular files on a reflink filesystem, set the REFLINK flag to
561 	 * protect shared extents.  A later stage will actually check those
562 	 * extents and clear the flag if possible.
563 	 */
564 	if (xfs_has_reflink(mp) && S_ISREG(mode))
565 		flags2 |= XFS_DIFLAG2_REFLINK;
566 	else
567 		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
568 	if (!xfs_has_bigtime(mp))
569 		flags2 &= ~XFS_DIFLAG2_BIGTIME;
570 	if (!xfs_has_large_extent_counts(mp))
571 		flags2 &= ~XFS_DIFLAG2_NREXT64;
572 	if (flags2 & XFS_DIFLAG2_NREXT64)
573 		dip->di_nrext64_pad = 0;
574 	else if (dip->di_version >= 3)
575 		dip->di_v3_pad = 0;
576 
577 	if (flags2 & XFS_DIFLAG2_METADATA) {
578 		xfs_failaddr_t	fa;
579 
580 		fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
581 				flags2);
582 		if (fa)
583 			flags2 &= ~XFS_DIFLAG2_METADATA;
584 	}
585 
586 	dip->di_flags = cpu_to_be16(flags);
587 	dip->di_flags2 = cpu_to_be64(flags2);
588 }
589 
590 /*
591  * Blow out symlink; now it points nowhere.  We don't have to worry about
592  * incore state because this inode is failing the verifiers.
593  */
594 STATIC void
xrep_dinode_zap_symlink(struct xrep_inode * ri,struct xfs_dinode * dip)595 xrep_dinode_zap_symlink(
596 	struct xrep_inode	*ri,
597 	struct xfs_dinode	*dip)
598 {
599 	struct xfs_scrub	*sc = ri->sc;
600 	char			*p;
601 
602 	trace_xrep_dinode_zap_symlink(sc, dip);
603 
604 	dip->di_format = XFS_DINODE_FMT_LOCAL;
605 	dip->di_size = cpu_to_be64(1);
606 	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
607 	*p = '?';
608 	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
609 }
610 
611 /*
612  * Blow out dir, make the parent point to the root.  In the future repair will
613  * reconstruct this directory for us.  Note that there's no in-core directory
614  * inode because the sf verifier tripped, so we don't have to worry about the
615  * dentry cache.
616  */
617 STATIC void
xrep_dinode_zap_dir(struct xrep_inode * ri,struct xfs_dinode * dip)618 xrep_dinode_zap_dir(
619 	struct xrep_inode	*ri,
620 	struct xfs_dinode	*dip)
621 {
622 	struct xfs_scrub	*sc = ri->sc;
623 	struct xfs_mount	*mp = sc->mp;
624 	struct xfs_dir2_sf_hdr	*sfp;
625 	int			i8count;
626 
627 	trace_xrep_dinode_zap_dir(sc, dip);
628 
629 	dip->di_format = XFS_DINODE_FMT_LOCAL;
630 	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
631 	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
632 	sfp->count = 0;
633 	sfp->i8count = i8count;
634 	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
635 	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
636 	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
637 }
638 
639 /* Make sure we don't have a garbage file size. */
640 STATIC void
xrep_dinode_size(struct xrep_inode * ri,struct xfs_dinode * dip)641 xrep_dinode_size(
642 	struct xrep_inode	*ri,
643 	struct xfs_dinode	*dip)
644 {
645 	struct xfs_scrub	*sc = ri->sc;
646 	uint64_t		size = be64_to_cpu(dip->di_size);
647 	uint16_t		mode = be16_to_cpu(dip->di_mode);
648 
649 	trace_xrep_dinode_size(sc, dip);
650 
651 	switch (mode & S_IFMT) {
652 	case S_IFIFO:
653 	case S_IFCHR:
654 	case S_IFBLK:
655 	case S_IFSOCK:
656 		/* di_size can't be nonzero for special files */
657 		dip->di_size = 0;
658 		break;
659 	case S_IFREG:
660 		/* Regular files can't be larger than 2^63-1 bytes. */
661 		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
662 		break;
663 	case S_IFLNK:
664 		/*
665 		 * Truncate ridiculously oversized symlinks.  If the size is
666 		 * zero, reset it to point to the current directory.  Both of
667 		 * these conditions trigger dinode verifier errors, so there
668 		 * is no in-core state to reset.
669 		 */
670 		if (size > XFS_SYMLINK_MAXLEN)
671 			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
672 		else if (size == 0)
673 			xrep_dinode_zap_symlink(ri, dip);
674 		break;
675 	case S_IFDIR:
676 		/*
677 		 * Directories can't have a size larger than 32G.  If the size
678 		 * is zero, reset it to an empty directory.  Both of these
679 		 * conditions trigger dinode verifier errors, so there is no
680 		 * in-core state to reset.
681 		 */
682 		if (size > XFS_DIR2_SPACE_SIZE)
683 			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
684 		else if (size == 0)
685 			xrep_dinode_zap_dir(ri, dip);
686 		break;
687 	}
688 }
689 
690 /* Fix extent size hints. */
691 STATIC void
xrep_dinode_extsize_hints(struct xfs_scrub * sc,struct xfs_dinode * dip)692 xrep_dinode_extsize_hints(
693 	struct xfs_scrub	*sc,
694 	struct xfs_dinode	*dip)
695 {
696 	struct xfs_mount	*mp = sc->mp;
697 	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
698 	uint16_t		flags = be16_to_cpu(dip->di_flags);
699 	uint16_t		mode = be16_to_cpu(dip->di_mode);
700 
701 	xfs_failaddr_t		fa;
702 
703 	trace_xrep_dinode_extsize_hints(sc, dip);
704 
705 	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
706 			mode, flags);
707 	if (fa) {
708 		dip->di_extsize = 0;
709 		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
710 					      XFS_DIFLAG_EXTSZINHERIT);
711 	}
712 
713 	if (dip->di_version < 3 ||
714 	    (xfs_has_zoned(sc->mp) &&
715 	     dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)))
716 		return;
717 
718 	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
719 			mode, flags, flags2);
720 	if (fa) {
721 		dip->di_cowextsize = 0;
722 		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
723 	}
724 }
725 
726 /* Count extents and blocks for an inode given an rmap. */
727 STATIC int
xrep_dinode_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)728 xrep_dinode_walk_rmap(
729 	struct xfs_btree_cur		*cur,
730 	const struct xfs_rmap_irec	*rec,
731 	void				*priv)
732 {
733 	struct xrep_inode		*ri = priv;
734 	int				error = 0;
735 
736 	if (xchk_should_terminate(ri->sc, &error))
737 		return error;
738 
739 	/* We only care about this inode. */
740 	if (rec->rm_owner != ri->sc->sm->sm_ino)
741 		return 0;
742 
743 	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
744 		ri->attr_blocks += rec->rm_blockcount;
745 		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
746 			ri->attr_extents++;
747 
748 		return 0;
749 	}
750 
751 	ri->data_blocks += rec->rm_blockcount;
752 	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
753 		ri->data_extents++;
754 
755 	return 0;
756 }
757 
758 /* Count extents and blocks for an inode from all AG rmap data. */
759 STATIC int
xrep_dinode_count_ag_rmaps(struct xrep_inode * ri,struct xfs_perag * pag)760 xrep_dinode_count_ag_rmaps(
761 	struct xrep_inode	*ri,
762 	struct xfs_perag	*pag)
763 {
764 	struct xfs_btree_cur	*cur;
765 	struct xfs_buf		*agf;
766 	int			error;
767 
768 	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
769 	if (error)
770 		return error;
771 
772 	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
773 	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
774 	xfs_btree_del_cursor(cur, error);
775 	xfs_trans_brelse(ri->sc->tp, agf);
776 	return error;
777 }
778 
779 /* Count extents and blocks for an inode given an rt rmap. */
780 STATIC int
xrep_dinode_walk_rtrmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)781 xrep_dinode_walk_rtrmap(
782 	struct xfs_btree_cur		*cur,
783 	const struct xfs_rmap_irec	*rec,
784 	void				*priv)
785 {
786 	struct xrep_inode		*ri = priv;
787 	int				error = 0;
788 
789 	if (xchk_should_terminate(ri->sc, &error))
790 		return error;
791 
792 	/* We only care about this inode. */
793 	if (rec->rm_owner != ri->sc->sm->sm_ino)
794 		return 0;
795 
796 	if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))
797 		return -EFSCORRUPTED;
798 
799 	ri->rt_blocks += rec->rm_blockcount;
800 	ri->rt_extents++;
801 	return 0;
802 }
803 
804 /* Count extents and blocks for an inode from all realtime rmap data. */
805 STATIC int
xrep_dinode_count_rtgroup_rmaps(struct xrep_inode * ri,struct xfs_rtgroup * rtg)806 xrep_dinode_count_rtgroup_rmaps(
807 	struct xrep_inode	*ri,
808 	struct xfs_rtgroup	*rtg)
809 {
810 	struct xfs_scrub	*sc = ri->sc;
811 	int			error;
812 
813 	error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP);
814 	if (error)
815 		return error;
816 
817 	error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap,
818 			ri);
819 	xchk_rtgroup_btcur_free(&sc->sr);
820 	xchk_rtgroup_free(sc, &sc->sr);
821 	return error;
822 }
823 
824 /* Count extents and blocks for a given inode from all rmap data. */
825 STATIC int
xrep_dinode_count_rmaps(struct xrep_inode * ri)826 xrep_dinode_count_rmaps(
827 	struct xrep_inode	*ri)
828 {
829 	struct xfs_perag	*pag = NULL;
830 	struct xfs_rtgroup	*rtg = NULL;
831 	int			error;
832 
833 	if (!xfs_has_rmapbt(ri->sc->mp))
834 		return -EOPNOTSUPP;
835 
836 	while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) {
837 		error = xrep_dinode_count_rtgroup_rmaps(ri, rtg);
838 		if (error) {
839 			xfs_rtgroup_rele(rtg);
840 			return error;
841 		}
842 	}
843 
844 	while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
845 		error = xrep_dinode_count_ag_rmaps(ri, pag);
846 		if (error) {
847 			xfs_perag_rele(pag);
848 			return error;
849 		}
850 	}
851 
852 	/* Can't have extents on both the rt and the data device. */
853 	if (ri->data_extents && ri->rt_extents)
854 		return -EFSCORRUPTED;
855 
856 	trace_xrep_dinode_count_rmaps(ri->sc,
857 			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
858 			ri->data_extents, ri->rt_extents, ri->attr_extents);
859 	return 0;
860 }
861 
862 /* Return true if this extents-format ifork looks like garbage. */
863 STATIC bool
xrep_dinode_bad_extents_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)864 xrep_dinode_bad_extents_fork(
865 	struct xfs_scrub	*sc,
866 	struct xfs_dinode	*dip,
867 	unsigned int		dfork_size,
868 	int			whichfork)
869 {
870 	struct xfs_bmbt_irec	new;
871 	struct xfs_bmbt_rec	*dp;
872 	xfs_extnum_t		nex;
873 	bool			isrt;
874 	unsigned int		i;
875 
876 	nex = xfs_dfork_nextents(dip, whichfork);
877 	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
878 		return true;
879 
880 	dp = XFS_DFORK_PTR(dip, whichfork);
881 
882 	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
883 	for (i = 0; i < nex; i++, dp++) {
884 		xfs_failaddr_t	fa;
885 
886 		xfs_bmbt_disk_get_all(dp, &new);
887 		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
888 				&new);
889 		if (fa)
890 			return true;
891 	}
892 
893 	return false;
894 }
895 
896 /* Return true if this btree-format ifork looks like garbage. */
897 STATIC bool
xrep_dinode_bad_bmbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)898 xrep_dinode_bad_bmbt_fork(
899 	struct xfs_scrub	*sc,
900 	struct xfs_dinode	*dip,
901 	unsigned int		dfork_size,
902 	int			whichfork)
903 {
904 	struct xfs_bmdr_block	*dfp;
905 	xfs_extnum_t		nex;
906 	unsigned int		i;
907 	unsigned int		dmxr;
908 	unsigned int		nrecs;
909 	unsigned int		level;
910 
911 	nex = xfs_dfork_nextents(dip, whichfork);
912 	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
913 		return true;
914 
915 	if (dfork_size < sizeof(struct xfs_bmdr_block))
916 		return true;
917 
918 	dfp = XFS_DFORK_PTR(dip, whichfork);
919 	nrecs = be16_to_cpu(dfp->bb_numrecs);
920 	level = be16_to_cpu(dfp->bb_level);
921 
922 	if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
923 		return true;
924 	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
925 		return true;
926 
927 	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
928 	for (i = 1; i <= nrecs; i++) {
929 		struct xfs_bmbt_key	*fkp;
930 		xfs_bmbt_ptr_t		*fpp;
931 		xfs_fileoff_t		fileoff;
932 		xfs_fsblock_t		fsbno;
933 
934 		fkp = xfs_bmdr_key_addr(dfp, i);
935 		fileoff = be64_to_cpu(fkp->br_startoff);
936 		if (!xfs_verify_fileoff(sc->mp, fileoff))
937 			return true;
938 
939 		fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
940 		fsbno = be64_to_cpu(*fpp);
941 		if (!xfs_verify_fsbno(sc->mp, fsbno))
942 			return true;
943 	}
944 
945 	return false;
946 }
947 
948 /* Return true if this rmap-format ifork looks like garbage. */
949 STATIC bool
xrep_dinode_bad_rtrmapbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size)950 xrep_dinode_bad_rtrmapbt_fork(
951 	struct xfs_scrub	*sc,
952 	struct xfs_dinode	*dip,
953 	unsigned int		dfork_size)
954 {
955 	struct xfs_rtrmap_root	*dfp;
956 	unsigned int		nrecs;
957 	unsigned int		level;
958 
959 	if (dfork_size < sizeof(struct xfs_rtrmap_root))
960 		return true;
961 
962 	dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
963 	nrecs = be16_to_cpu(dfp->bb_numrecs);
964 	level = be16_to_cpu(dfp->bb_level);
965 
966 	if (level > sc->mp->m_rtrmap_maxlevels)
967 		return true;
968 	if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size)
969 		return true;
970 	if (level > 0 && nrecs == 0)
971 		return true;
972 
973 	return false;
974 }
975 
976 /* Return true if this refcount-format ifork looks like garbage. */
977 STATIC bool
xrep_dinode_bad_rtrefcountbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size)978 xrep_dinode_bad_rtrefcountbt_fork(
979 	struct xfs_scrub	*sc,
980 	struct xfs_dinode	*dip,
981 	unsigned int		dfork_size)
982 {
983 	struct xfs_rtrefcount_root *dfp;
984 	unsigned int		nrecs;
985 	unsigned int		level;
986 
987 	if (dfork_size < sizeof(struct xfs_rtrefcount_root))
988 		return true;
989 
990 	dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
991 	nrecs = be16_to_cpu(dfp->bb_numrecs);
992 	level = be16_to_cpu(dfp->bb_level);
993 
994 	if (level > sc->mp->m_rtrefc_maxlevels)
995 		return true;
996 	if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size)
997 		return true;
998 	if (level > 0 && nrecs == 0)
999 		return true;
1000 
1001 	return false;
1002 }
1003 
1004 /* Check a metadata-btree fork. */
1005 STATIC bool
xrep_dinode_bad_metabt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)1006 xrep_dinode_bad_metabt_fork(
1007 	struct xfs_scrub	*sc,
1008 	struct xfs_dinode	*dip,
1009 	unsigned int		dfork_size,
1010 	int			whichfork)
1011 {
1012 	if (whichfork != XFS_DATA_FORK)
1013 		return true;
1014 
1015 	switch (be16_to_cpu(dip->di_metatype)) {
1016 	case XFS_METAFILE_RTRMAP:
1017 		return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size);
1018 	case XFS_METAFILE_RTREFCOUNT:
1019 		return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size);
1020 	default:
1021 		return true;
1022 	}
1023 
1024 	return false;
1025 }
1026 
1027 /*
1028  * Check the data fork for things that will fail the ifork verifiers or the
1029  * ifork formatters.
1030  */
1031 STATIC bool
xrep_dinode_check_dfork(struct xfs_scrub * sc,struct xfs_dinode * dip,uint16_t mode)1032 xrep_dinode_check_dfork(
1033 	struct xfs_scrub	*sc,
1034 	struct xfs_dinode	*dip,
1035 	uint16_t		mode)
1036 {
1037 	void			*dfork_ptr;
1038 	int64_t			data_size;
1039 	unsigned int		fmt;
1040 	unsigned int		dfork_size;
1041 
1042 	/*
1043 	 * Verifier functions take signed int64_t, so check for bogus negative
1044 	 * values first.
1045 	 */
1046 	data_size = be64_to_cpu(dip->di_size);
1047 	if (data_size < 0)
1048 		return true;
1049 
1050 	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
1051 	switch (mode & S_IFMT) {
1052 	case S_IFIFO:
1053 	case S_IFCHR:
1054 	case S_IFBLK:
1055 	case S_IFSOCK:
1056 		if (fmt != XFS_DINODE_FMT_DEV)
1057 			return true;
1058 		break;
1059 	case S_IFREG:
1060 		switch (fmt) {
1061 		case XFS_DINODE_FMT_LOCAL:
1062 			return true;
1063 		case XFS_DINODE_FMT_EXTENTS:
1064 		case XFS_DINODE_FMT_BTREE:
1065 		case XFS_DINODE_FMT_META_BTREE:
1066 			break;
1067 		default:
1068 			return true;
1069 		}
1070 		break;
1071 	case S_IFLNK:
1072 	case S_IFDIR:
1073 		switch (fmt) {
1074 		case XFS_DINODE_FMT_LOCAL:
1075 		case XFS_DINODE_FMT_EXTENTS:
1076 		case XFS_DINODE_FMT_BTREE:
1077 			break;
1078 		default:
1079 			return true;
1080 		}
1081 		break;
1082 	default:
1083 		return true;
1084 	}
1085 
1086 	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
1087 	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1088 
1089 	switch (fmt) {
1090 	case XFS_DINODE_FMT_DEV:
1091 		break;
1092 	case XFS_DINODE_FMT_LOCAL:
1093 		/* dir/symlink structure cannot be larger than the fork */
1094 		if (data_size > dfork_size)
1095 			return true;
1096 		/* directory structure must pass verification. */
1097 		if (S_ISDIR(mode) &&
1098 		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
1099 			return true;
1100 		/* symlink structure must pass verification. */
1101 		if (S_ISLNK(mode) &&
1102 		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
1103 			return true;
1104 		break;
1105 	case XFS_DINODE_FMT_EXTENTS:
1106 		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
1107 				XFS_DATA_FORK))
1108 			return true;
1109 		break;
1110 	case XFS_DINODE_FMT_BTREE:
1111 		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
1112 				XFS_DATA_FORK))
1113 			return true;
1114 		break;
1115 	case XFS_DINODE_FMT_META_BTREE:
1116 		if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size,
1117 				XFS_DATA_FORK))
1118 			return true;
1119 		break;
1120 	default:
1121 		return true;
1122 	}
1123 
1124 	return false;
1125 }
1126 
1127 static void
xrep_dinode_set_data_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)1128 xrep_dinode_set_data_nextents(
1129 	struct xfs_dinode	*dip,
1130 	xfs_extnum_t		nextents)
1131 {
1132 	if (xfs_dinode_has_large_extent_counts(dip))
1133 		dip->di_big_nextents = cpu_to_be64(nextents);
1134 	else
1135 		dip->di_nextents = cpu_to_be32(nextents);
1136 }
1137 
1138 static void
xrep_dinode_set_attr_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)1139 xrep_dinode_set_attr_nextents(
1140 	struct xfs_dinode	*dip,
1141 	xfs_extnum_t		nextents)
1142 {
1143 	if (xfs_dinode_has_large_extent_counts(dip))
1144 		dip->di_big_anextents = cpu_to_be32(nextents);
1145 	else
1146 		dip->di_anextents = cpu_to_be16(nextents);
1147 }
1148 
1149 /* Reset the data fork to something sane. */
1150 STATIC void
xrep_dinode_zap_dfork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1151 xrep_dinode_zap_dfork(
1152 	struct xrep_inode	*ri,
1153 	struct xfs_dinode	*dip,
1154 	uint16_t		mode)
1155 {
1156 	struct xfs_scrub	*sc = ri->sc;
1157 
1158 	trace_xrep_dinode_zap_dfork(sc, dip);
1159 
1160 	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
1161 
1162 	xrep_dinode_set_data_nextents(dip, 0);
1163 	ri->data_blocks = 0;
1164 	ri->rt_blocks = 0;
1165 
1166 	/* Special files always get reset to DEV */
1167 	switch (mode & S_IFMT) {
1168 	case S_IFIFO:
1169 	case S_IFCHR:
1170 	case S_IFBLK:
1171 	case S_IFSOCK:
1172 		dip->di_format = XFS_DINODE_FMT_DEV;
1173 		dip->di_size = 0;
1174 		return;
1175 	}
1176 
1177 	/*
1178 	 * If we have data extents, reset to an empty map and hope the user
1179 	 * will run the bmapbtd checker next.
1180 	 */
1181 	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
1182 		dip->di_format = XFS_DINODE_FMT_EXTENTS;
1183 		return;
1184 	}
1185 
1186 	/* Otherwise, reset the local format to the minimum. */
1187 	switch (mode & S_IFMT) {
1188 	case S_IFLNK:
1189 		xrep_dinode_zap_symlink(ri, dip);
1190 		break;
1191 	case S_IFDIR:
1192 		xrep_dinode_zap_dir(ri, dip);
1193 		break;
1194 	}
1195 }
1196 
1197 /*
1198  * Check the attr fork for things that will fail the ifork verifiers or the
1199  * ifork formatters.
1200  */
1201 STATIC bool
xrep_dinode_check_afork(struct xfs_scrub * sc,struct xfs_dinode * dip)1202 xrep_dinode_check_afork(
1203 	struct xfs_scrub		*sc,
1204 	struct xfs_dinode		*dip)
1205 {
1206 	struct xfs_attr_sf_hdr		*afork_ptr;
1207 	size_t				attr_size;
1208 	unsigned int			afork_size;
1209 
1210 	if (XFS_DFORK_BOFF(dip) == 0)
1211 		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
1212 		       xfs_dfork_attr_extents(dip) != 0;
1213 
1214 	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1215 	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1216 
1217 	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
1218 	case XFS_DINODE_FMT_LOCAL:
1219 		/* Fork has to be large enough to extract the xattr size. */
1220 		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
1221 			return true;
1222 
1223 		/* xattr structure cannot be larger than the fork */
1224 		attr_size = be16_to_cpu(afork_ptr->totsize);
1225 		if (attr_size > afork_size)
1226 			return true;
1227 
1228 		/* xattr structure must pass verification. */
1229 		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1230 	case XFS_DINODE_FMT_EXTENTS:
1231 		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1232 					XFS_ATTR_FORK))
1233 			return true;
1234 		break;
1235 	case XFS_DINODE_FMT_BTREE:
1236 		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1237 					XFS_ATTR_FORK))
1238 			return true;
1239 		break;
1240 	case XFS_DINODE_FMT_META_BTREE:
1241 		if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
1242 					XFS_ATTR_FORK))
1243 			return true;
1244 		break;
1245 	default:
1246 		return true;
1247 	}
1248 
1249 	return false;
1250 }
1251 
1252 /*
1253  * Reset the attr fork to empty.  Since the attr fork could have contained
1254  * ACLs, make the file readable only by root.
1255  */
1256 STATIC void
xrep_dinode_zap_afork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1257 xrep_dinode_zap_afork(
1258 	struct xrep_inode	*ri,
1259 	struct xfs_dinode	*dip,
1260 	uint16_t		mode)
1261 {
1262 	struct xfs_scrub	*sc = ri->sc;
1263 
1264 	trace_xrep_dinode_zap_afork(sc, dip);
1265 
1266 	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1267 
1268 	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1269 	xrep_dinode_set_attr_nextents(dip, 0);
1270 	ri->attr_blocks = 0;
1271 
1272 	/*
1273 	 * If the data fork is in btree format, removing the attr fork entirely
1274 	 * might cause verifier failures if the next level down in the bmbt
1275 	 * could now fit in the data fork area.
1276 	 */
1277 	if (dip->di_format != XFS_DINODE_FMT_BTREE)
1278 		dip->di_forkoff = 0;
1279 	dip->di_mode = cpu_to_be16(mode & ~0777);
1280 	dip->di_uid = 0;
1281 	dip->di_gid = 0;
1282 }
1283 
1284 /* Make sure the fork offset is a sensible value. */
1285 STATIC void
xrep_dinode_ensure_forkoff(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1286 xrep_dinode_ensure_forkoff(
1287 	struct xrep_inode	*ri,
1288 	struct xfs_dinode	*dip,
1289 	uint16_t		mode)
1290 {
1291 	struct xfs_bmdr_block	*bmdr;
1292 	struct xfs_rtrmap_root	*rmdr;
1293 	struct xfs_rtrefcount_root *rcdr;
1294 	struct xfs_scrub	*sc = ri->sc;
1295 	xfs_extnum_t		attr_extents, data_extents;
1296 	size_t			bmdr_minsz = xfs_bmdr_space_calc(1);
1297 	unsigned int		lit_sz = XFS_LITINO(sc->mp);
1298 	unsigned int		afork_min, dfork_min;
1299 
1300 	trace_xrep_dinode_ensure_forkoff(sc, dip);
1301 
1302 	/*
1303 	 * Before calling this function, xrep_dinode_core ensured that both
1304 	 * forks actually fit inside their respective literal areas.  If this
1305 	 * was not the case, the fork was reset to FMT_EXTENTS with zero
1306 	 * records.  If the rmapbt scan found attr or data fork blocks, this
1307 	 * will be noted in the dinode_stats, and we must leave enough room
1308 	 * for the bmap repair code to reconstruct the mapping structure.
1309 	 *
1310 	 * First, compute the minimum space required for the attr fork.
1311 	 */
1312 	switch (dip->di_aformat) {
1313 	case XFS_DINODE_FMT_LOCAL:
1314 		/*
1315 		 * If we still have a shortform xattr structure at all, that
1316 		 * means the attr fork area was exactly large enough to fit
1317 		 * the sf structure.
1318 		 */
1319 		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1320 		break;
1321 	case XFS_DINODE_FMT_EXTENTS:
1322 		attr_extents = xfs_dfork_attr_extents(dip);
1323 		if (attr_extents) {
1324 			/*
1325 			 * We must maintain sufficient space to hold the entire
1326 			 * extent map array in the data fork.  Note that we
1327 			 * previously zapped the fork if it had no chance of
1328 			 * fitting in the inode.
1329 			 */
1330 			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1331 		} else if (ri->attr_extents > 0) {
1332 			/*
1333 			 * The attr fork thinks it has zero extents, but we
1334 			 * found some xattr extents.  We need to leave enough
1335 			 * empty space here so that the incore attr fork will
1336 			 * get created (and hence trigger the attr fork bmap
1337 			 * repairer).
1338 			 */
1339 			afork_min = bmdr_minsz;
1340 		} else {
1341 			/* No extents on disk or found in rmapbt. */
1342 			afork_min = 0;
1343 		}
1344 		break;
1345 	case XFS_DINODE_FMT_BTREE:
1346 		/* Must have space for btree header and key/pointers. */
1347 		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1348 		afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1349 		break;
1350 	default:
1351 		/* We should never see any other formats. */
1352 		afork_min = 0;
1353 		break;
1354 	}
1355 
1356 	/* Compute the minimum space required for the data fork. */
1357 	switch (dip->di_format) {
1358 	case XFS_DINODE_FMT_DEV:
1359 		dfork_min = sizeof(__be32);
1360 		break;
1361 	case XFS_DINODE_FMT_UUID:
1362 		dfork_min = sizeof(uuid_t);
1363 		break;
1364 	case XFS_DINODE_FMT_LOCAL:
1365 		/*
1366 		 * If we still have a shortform data fork at all, that means
1367 		 * the data fork area was large enough to fit whatever was in
1368 		 * there.
1369 		 */
1370 		dfork_min = be64_to_cpu(dip->di_size);
1371 		break;
1372 	case XFS_DINODE_FMT_EXTENTS:
1373 		data_extents = xfs_dfork_data_extents(dip);
1374 		if (data_extents) {
1375 			/*
1376 			 * We must maintain sufficient space to hold the entire
1377 			 * extent map array in the data fork.  Note that we
1378 			 * previously zapped the fork if it had no chance of
1379 			 * fitting in the inode.
1380 			 */
1381 			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1382 		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1383 			/*
1384 			 * The data fork thinks it has zero extents, but we
1385 			 * found some data extents.  We need to leave enough
1386 			 * empty space here so that the data fork bmap repair
1387 			 * will recover the mappings.
1388 			 */
1389 			dfork_min = bmdr_minsz;
1390 		} else {
1391 			/* No extents on disk or found in rmapbt. */
1392 			dfork_min = 0;
1393 		}
1394 		break;
1395 	case XFS_DINODE_FMT_BTREE:
1396 		/* Must have space for btree header and key/pointers. */
1397 		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1398 		dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1399 		break;
1400 	case XFS_DINODE_FMT_META_BTREE:
1401 		switch (be16_to_cpu(dip->di_metatype)) {
1402 		case XFS_METAFILE_RTRMAP:
1403 			rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1404 			dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr);
1405 			break;
1406 		case XFS_METAFILE_RTREFCOUNT:
1407 			rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1408 			dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr);
1409 			break;
1410 		default:
1411 			dfork_min = 0;
1412 			break;
1413 		}
1414 		break;
1415 	default:
1416 		dfork_min = 0;
1417 		break;
1418 	}
1419 
1420 	/*
1421 	 * Round all values up to the nearest 8 bytes, because that is the
1422 	 * precision of di_forkoff.
1423 	 */
1424 	afork_min = roundup(afork_min, 8);
1425 	dfork_min = roundup(dfork_min, 8);
1426 	bmdr_minsz = roundup(bmdr_minsz, 8);
1427 
1428 	ASSERT(dfork_min <= lit_sz);
1429 	ASSERT(afork_min <= lit_sz);
1430 
1431 	/*
1432 	 * If the data fork was zapped and we don't have enough space for the
1433 	 * recovery fork, move the attr fork up.
1434 	 */
1435 	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1436 	    xfs_dfork_data_extents(dip) == 0 &&
1437 	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
1438 	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1439 		if (bmdr_minsz + afork_min > lit_sz) {
1440 			/*
1441 			 * The attr for and the stub fork we need to recover
1442 			 * the data fork won't both fit.  Zap the attr fork.
1443 			 */
1444 			xrep_dinode_zap_afork(ri, dip, mode);
1445 			afork_min = bmdr_minsz;
1446 		} else {
1447 			void	*before, *after;
1448 
1449 			/* Otherwise, just slide the attr fork up. */
1450 			before = XFS_DFORK_APTR(dip);
1451 			dip->di_forkoff = bmdr_minsz >> 3;
1452 			after = XFS_DFORK_APTR(dip);
1453 			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1454 		}
1455 	}
1456 
1457 	/*
1458 	 * If the attr fork was zapped and we don't have enough space for the
1459 	 * recovery fork, move the attr fork down.
1460 	 */
1461 	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1462 	    xfs_dfork_attr_extents(dip) == 0 &&
1463 	    ri->attr_extents > 0 &&
1464 	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1465 		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1466 			/*
1467 			 * If the data fork is in btree format then we can't
1468 			 * adjust forkoff because that runs the risk of
1469 			 * violating the extents/btree format transition rules.
1470 			 */
1471 		} else if (bmdr_minsz + dfork_min > lit_sz) {
1472 			/*
1473 			 * If we can't move the attr fork, too bad, we lose the
1474 			 * attr fork and leak its blocks.
1475 			 */
1476 			xrep_dinode_zap_afork(ri, dip, mode);
1477 		} else {
1478 			/*
1479 			 * Otherwise, just slide the attr fork down.  The attr
1480 			 * fork is empty, so we don't have any old contents to
1481 			 * move here.
1482 			 */
1483 			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1484 		}
1485 	}
1486 }
1487 
1488 /*
1489  * Zap the data/attr forks if we spot anything that isn't going to pass the
1490  * ifork verifiers or the ifork formatters, because we need to get the inode
1491  * into good enough shape that the higher level repair functions can run.
1492  */
1493 STATIC void
xrep_dinode_zap_forks(struct xrep_inode * ri,struct xfs_dinode * dip)1494 xrep_dinode_zap_forks(
1495 	struct xrep_inode	*ri,
1496 	struct xfs_dinode	*dip)
1497 {
1498 	struct xfs_scrub	*sc = ri->sc;
1499 	xfs_extnum_t		data_extents;
1500 	xfs_extnum_t		attr_extents;
1501 	xfs_filblks_t		nblocks;
1502 	uint16_t		mode;
1503 	bool			zap_datafork = false;
1504 	bool			zap_attrfork = ri->zap_acls;
1505 
1506 	trace_xrep_dinode_zap_forks(sc, dip);
1507 
1508 	mode = be16_to_cpu(dip->di_mode);
1509 
1510 	data_extents = xfs_dfork_data_extents(dip);
1511 	attr_extents = xfs_dfork_attr_extents(dip);
1512 	nblocks = be64_to_cpu(dip->di_nblocks);
1513 
1514 	/* Inode counters don't make sense? */
1515 	if (data_extents > nblocks)
1516 		zap_datafork = true;
1517 	if (attr_extents > nblocks)
1518 		zap_attrfork = true;
1519 	if (data_extents + attr_extents > nblocks)
1520 		zap_datafork = zap_attrfork = true;
1521 
1522 	if (!zap_datafork)
1523 		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1524 	if (!zap_attrfork)
1525 		zap_attrfork = xrep_dinode_check_afork(sc, dip);
1526 
1527 	/* Zap whatever's bad. */
1528 	if (zap_attrfork)
1529 		xrep_dinode_zap_afork(ri, dip, mode);
1530 	if (zap_datafork)
1531 		xrep_dinode_zap_dfork(ri, dip, mode);
1532 	xrep_dinode_ensure_forkoff(ri, dip, mode);
1533 
1534 	/*
1535 	 * Zero di_nblocks if we don't have any extents at all to satisfy the
1536 	 * buffer verifier.
1537 	 */
1538 	data_extents = xfs_dfork_data_extents(dip);
1539 	attr_extents = xfs_dfork_attr_extents(dip);
1540 	if (data_extents + attr_extents == 0)
1541 		dip->di_nblocks = 0;
1542 }
1543 
1544 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1545 STATIC int
xrep_dinode_core(struct xrep_inode * ri)1546 xrep_dinode_core(
1547 	struct xrep_inode	*ri)
1548 {
1549 	struct xfs_scrub	*sc = ri->sc;
1550 	struct xfs_buf		*bp;
1551 	struct xfs_dinode	*dip;
1552 	xfs_ino_t		ino = sc->sm->sm_ino;
1553 	int			error;
1554 	int			iget_error;
1555 
1556 	/* Figure out what this inode had mapped in both forks. */
1557 	error = xrep_dinode_count_rmaps(ri);
1558 	if (error)
1559 		return error;
1560 
1561 	/* Read the inode cluster buffer. */
1562 	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1563 			ri->imap.im_blkno, ri->imap.im_len, 0, &bp, NULL);
1564 	if (error)
1565 		return error;
1566 
1567 	/* Make sure we can pass the inode buffer verifier. */
1568 	xrep_dinode_buf(sc, bp);
1569 	bp->b_ops = &xfs_inode_buf_ops;
1570 
1571 	/* Fix everything the verifier will complain about. */
1572 	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1573 	xrep_dinode_header(sc, dip);
1574 	iget_error = xrep_dinode_mode(ri, dip);
1575 	if (iget_error)
1576 		goto write;
1577 	xrep_dinode_nlinks(dip);
1578 	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1579 	xrep_dinode_size(ri, dip);
1580 	xrep_dinode_extsize_hints(sc, dip);
1581 	xrep_dinode_zap_forks(ri, dip);
1582 
1583 write:
1584 	/* Write out the inode. */
1585 	trace_xrep_dinode_fixed(sc, dip);
1586 	xfs_dinode_calc_crc(sc->mp, dip);
1587 	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1588 	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1589 			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1590 
1591 	/*
1592 	 * In theory, we've fixed the ondisk inode record enough that we should
1593 	 * be able to load the inode into the cache.  Try to iget that inode
1594 	 * now while we hold the AGI and the inode cluster buffer and take the
1595 	 * IOLOCK so that we can continue with repairs without anyone else
1596 	 * accessing the inode.  If iget fails, we still need to commit the
1597 	 * changes.
1598 	 */
1599 	if (!iget_error)
1600 		iget_error = xchk_iget(sc, ino, &sc->ip);
1601 	if (!iget_error)
1602 		xchk_ilock(sc, XFS_IOLOCK_EXCL);
1603 
1604 	/*
1605 	 * Commit the inode cluster buffer updates and drop the AGI buffer that
1606 	 * we've been holding since scrub setup.  From here on out, repairs
1607 	 * deal only with the cached inode.
1608 	 */
1609 	error = xrep_trans_commit(sc);
1610 	if (error)
1611 		return error;
1612 
1613 	if (iget_error)
1614 		return iget_error;
1615 
1616 	error = xchk_trans_alloc(sc, 0);
1617 	if (error)
1618 		return error;
1619 
1620 	error = xrep_ino_dqattach(sc);
1621 	if (error)
1622 		return error;
1623 
1624 	xchk_ilock(sc, XFS_ILOCK_EXCL);
1625 	if (ri->ino_sick_mask)
1626 		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1627 	return 0;
1628 }
1629 
1630 /* Fix everything xfs_dinode_verify cares about. */
1631 STATIC int
xrep_dinode_problems(struct xrep_inode * ri)1632 xrep_dinode_problems(
1633 	struct xrep_inode	*ri)
1634 {
1635 	struct xfs_scrub	*sc = ri->sc;
1636 	int			error;
1637 
1638 	error = xrep_dinode_core(ri);
1639 	if (error)
1640 		return error;
1641 
1642 	/* We had to fix a totally busted inode, schedule quotacheck. */
1643 	if (XFS_IS_UQUOTA_ON(sc->mp))
1644 		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1645 	if (XFS_IS_GQUOTA_ON(sc->mp))
1646 		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1647 	if (XFS_IS_PQUOTA_ON(sc->mp))
1648 		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1649 
1650 	return 0;
1651 }
1652 
1653 /*
1654  * Fix problems that the verifiers don't care about.  In general these are
1655  * errors that don't cause problems elsewhere in the kernel that we can easily
1656  * detect, so we don't check them all that rigorously.
1657  */
1658 
1659 /* Make sure block and extent counts are ok. */
1660 STATIC int
xrep_inode_blockcounts(struct xfs_scrub * sc)1661 xrep_inode_blockcounts(
1662 	struct xfs_scrub	*sc)
1663 {
1664 	struct xfs_ifork	*ifp;
1665 	xfs_filblks_t		count;
1666 	xfs_filblks_t		acount;
1667 	xfs_extnum_t		nextents;
1668 	int			error;
1669 
1670 	trace_xrep_inode_blockcounts(sc);
1671 
1672 	/* Set data fork counters from the data fork mappings. */
1673 	error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
1674 	if (error)
1675 		return error;
1676 	if (xfs_is_reflink_inode(sc->ip)) {
1677 		/*
1678 		 * data fork blockcount can exceed physical storage if a user
1679 		 * reflinks the same block over and over again.
1680 		 */
1681 		;
1682 	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1683 		if (count >= sc->mp->m_sb.sb_rblocks)
1684 			return -EFSCORRUPTED;
1685 	} else {
1686 		if (count >= sc->mp->m_sb.sb_dblocks)
1687 			return -EFSCORRUPTED;
1688 	}
1689 	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1690 	if (error)
1691 		return error;
1692 	sc->ip->i_df.if_nextents = nextents;
1693 
1694 	/* Set attr fork counters from the attr fork mappings. */
1695 	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1696 	if (ifp) {
1697 		error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents,
1698 				&acount);
1699 		if (error)
1700 			return error;
1701 		if (count >= sc->mp->m_sb.sb_dblocks)
1702 			return -EFSCORRUPTED;
1703 		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1704 				nextents);
1705 		if (error)
1706 			return error;
1707 		ifp->if_nextents = nextents;
1708 	} else {
1709 		acount = 0;
1710 	}
1711 
1712 	sc->ip->i_nblocks = count + acount;
1713 	return 0;
1714 }
1715 
1716 /* Check for invalid uid/gid/prid. */
1717 STATIC void
xrep_inode_ids(struct xfs_scrub * sc)1718 xrep_inode_ids(
1719 	struct xfs_scrub	*sc)
1720 {
1721 	bool			dirty = false;
1722 
1723 	trace_xrep_inode_ids(sc);
1724 
1725 	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1726 		i_uid_write(VFS_I(sc->ip), 0);
1727 		dirty = true;
1728 		if (XFS_IS_UQUOTA_ON(sc->mp))
1729 			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1730 	}
1731 
1732 	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1733 		i_gid_write(VFS_I(sc->ip), 0);
1734 		dirty = true;
1735 		if (XFS_IS_GQUOTA_ON(sc->mp))
1736 			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1737 	}
1738 
1739 	if (sc->ip->i_projid == -1U) {
1740 		sc->ip->i_projid = 0;
1741 		dirty = true;
1742 		if (XFS_IS_PQUOTA_ON(sc->mp))
1743 			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1744 	}
1745 
1746 	/* strip setuid/setgid if we touched any of the ids */
1747 	if (dirty)
1748 		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1749 }
1750 
1751 static inline void
xrep_clamp_timestamp(struct xfs_inode * ip,struct timespec64 * ts)1752 xrep_clamp_timestamp(
1753 	struct xfs_inode	*ip,
1754 	struct timespec64	*ts)
1755 {
1756 	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1757 	*ts = timestamp_truncate(*ts, VFS_I(ip));
1758 }
1759 
1760 /* Nanosecond counters can't have more than 1 billion. */
1761 STATIC void
xrep_inode_timestamps(struct xfs_inode * ip)1762 xrep_inode_timestamps(
1763 	struct xfs_inode	*ip)
1764 {
1765 	struct timespec64	tstamp;
1766 	struct inode		*inode = VFS_I(ip);
1767 
1768 	tstamp = inode_get_atime(inode);
1769 	xrep_clamp_timestamp(ip, &tstamp);
1770 	inode_set_atime_to_ts(inode, tstamp);
1771 
1772 	tstamp = inode_get_mtime(inode);
1773 	xrep_clamp_timestamp(ip, &tstamp);
1774 	inode_set_mtime_to_ts(inode, tstamp);
1775 
1776 	tstamp = inode_get_ctime(inode);
1777 	xrep_clamp_timestamp(ip, &tstamp);
1778 	inode_set_ctime_to_ts(inode, tstamp);
1779 
1780 	xrep_clamp_timestamp(ip, &ip->i_crtime);
1781 }
1782 
1783 /* Fix inode flags that don't make sense together. */
1784 STATIC void
xrep_inode_flags(struct xfs_scrub * sc)1785 xrep_inode_flags(
1786 	struct xfs_scrub	*sc)
1787 {
1788 	uint16_t		mode;
1789 
1790 	trace_xrep_inode_flags(sc);
1791 
1792 	mode = VFS_I(sc->ip)->i_mode;
1793 
1794 	/* Clear junk flags */
1795 	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1796 		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1797 
1798 	/* NEWRTBM only applies to realtime bitmaps */
1799 	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1800 		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1801 	else
1802 		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1803 
1804 	/* These only make sense for directories. */
1805 	if (!S_ISDIR(mode))
1806 		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1807 					  XFS_DIFLAG_EXTSZINHERIT |
1808 					  XFS_DIFLAG_PROJINHERIT |
1809 					  XFS_DIFLAG_NOSYMLINKS);
1810 
1811 	/* These only make sense for files. */
1812 	if (!S_ISREG(mode))
1813 		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1814 					  XFS_DIFLAG_EXTSIZE);
1815 
1816 	/* These only make sense for non-rt files. */
1817 	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1818 		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1819 
1820 	/* Immutable and append only?  Drop the append. */
1821 	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1822 	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1823 		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1824 
1825 	/* Clear junk flags. */
1826 	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1827 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1828 
1829 	/* No reflink flag unless we support it and it's a file. */
1830 	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1831 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1832 
1833 	/* DAX only applies to files and dirs. */
1834 	if (!(S_ISREG(mode) || S_ISDIR(mode)))
1835 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1836 }
1837 
1838 /*
1839  * Fix size problems with block/node format directories.  If we fail to find
1840  * the extent list, just bail out and let the bmapbtd repair functions clean
1841  * up that mess.
1842  */
1843 STATIC void
xrep_inode_blockdir_size(struct xfs_scrub * sc)1844 xrep_inode_blockdir_size(
1845 	struct xfs_scrub	*sc)
1846 {
1847 	struct xfs_iext_cursor	icur;
1848 	struct xfs_bmbt_irec	got;
1849 	struct xfs_ifork	*ifp;
1850 	xfs_fileoff_t		off;
1851 	int			error;
1852 
1853 	trace_xrep_inode_blockdir_size(sc);
1854 
1855 	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1856 	if (error)
1857 		return;
1858 
1859 	/* Find the last block before 32G; this is the dir size. */
1860 	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1861 	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1862 	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1863 		/* zero-extents directory? */
1864 		return;
1865 	}
1866 
1867 	off = got.br_startoff + got.br_blockcount;
1868 	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1869 			XFS_FSB_TO_B(sc->mp, off));
1870 }
1871 
1872 /* Fix size problems with short format directories. */
1873 STATIC void
xrep_inode_sfdir_size(struct xfs_scrub * sc)1874 xrep_inode_sfdir_size(
1875 	struct xfs_scrub	*sc)
1876 {
1877 	struct xfs_ifork	*ifp;
1878 
1879 	trace_xrep_inode_sfdir_size(sc);
1880 
1881 	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1882 	sc->ip->i_disk_size = ifp->if_bytes;
1883 }
1884 
1885 /*
1886  * Fix any irregularities in a directory inode's size now that we can iterate
1887  * extent maps and access other regular inode data.
1888  */
1889 STATIC void
xrep_inode_dir_size(struct xfs_scrub * sc)1890 xrep_inode_dir_size(
1891 	struct xfs_scrub	*sc)
1892 {
1893 	trace_xrep_inode_dir_size(sc);
1894 
1895 	switch (sc->ip->i_df.if_format) {
1896 	case XFS_DINODE_FMT_EXTENTS:
1897 	case XFS_DINODE_FMT_BTREE:
1898 		xrep_inode_blockdir_size(sc);
1899 		break;
1900 	case XFS_DINODE_FMT_LOCAL:
1901 		xrep_inode_sfdir_size(sc);
1902 		break;
1903 	}
1904 }
1905 
1906 /* Fix extent size hint problems. */
1907 STATIC void
xrep_inode_extsize(struct xfs_scrub * sc)1908 xrep_inode_extsize(
1909 	struct xfs_scrub	*sc)
1910 {
1911 	/* Fix misaligned extent size hints on a directory. */
1912 	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1913 	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1914 	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1915 		sc->ip->i_extsize = 0;
1916 		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1917 	}
1918 }
1919 
1920 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1921 STATIC int
xrep_inode_pptr(struct xfs_scrub * sc)1922 xrep_inode_pptr(
1923 	struct xfs_scrub	*sc)
1924 {
1925 	struct xfs_mount	*mp = sc->mp;
1926 	struct xfs_inode	*ip = sc->ip;
1927 	struct inode		*inode = VFS_I(ip);
1928 
1929 	if (!xfs_has_parent(mp))
1930 		return 0;
1931 
1932 	/*
1933 	 * Unlinked inodes that cannot be added to the directory tree will not
1934 	 * have a parent pointer.
1935 	 */
1936 	if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
1937 		return 0;
1938 
1939 	/* Children of the superblock do not have parent pointers. */
1940 	if (xchk_inode_is_sb_rooted(ip))
1941 		return 0;
1942 
1943 	/* Inode already has an attr fork; no further work possible here. */
1944 	if (xfs_inode_has_attr_fork(ip))
1945 		return 0;
1946 
1947 	return xfs_bmap_add_attrfork(sc->tp, ip,
1948 			sizeof(struct xfs_attr_sf_hdr), true);
1949 }
1950 
1951 /* Fix COW extent size hint problems. */
1952 STATIC void
xrep_inode_cowextsize(struct xfs_scrub * sc)1953 xrep_inode_cowextsize(
1954 	struct xfs_scrub	*sc)
1955 {
1956 	/* Fix misaligned CoW extent size hints on a directory. */
1957 	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1958 	    (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1959 	    sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) {
1960 		sc->ip->i_cowextsize = 0;
1961 		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
1962 	}
1963 }
1964 
1965 /* Fix any irregularities in an inode that the verifiers don't catch. */
1966 STATIC int
xrep_inode_problems(struct xfs_scrub * sc)1967 xrep_inode_problems(
1968 	struct xfs_scrub	*sc)
1969 {
1970 	int			error;
1971 
1972 	error = xrep_inode_blockcounts(sc);
1973 	if (error)
1974 		return error;
1975 	error = xrep_inode_pptr(sc);
1976 	if (error)
1977 		return error;
1978 	xrep_inode_timestamps(sc->ip);
1979 	xrep_inode_flags(sc);
1980 	xrep_inode_ids(sc);
1981 	/*
1982 	 * We can now do a better job fixing the size of a directory now that
1983 	 * we can scan the data fork extents than we could in xrep_dinode_size.
1984 	 */
1985 	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1986 		xrep_inode_dir_size(sc);
1987 	xrep_inode_extsize(sc);
1988 	xrep_inode_cowextsize(sc);
1989 
1990 	trace_xrep_inode_fixed(sc);
1991 	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1992 	return xrep_roll_trans(sc);
1993 }
1994 
1995 /*
1996  * Make sure this inode's unlinked list pointers are consistent with its
1997  * link count.
1998  */
1999 STATIC int
xrep_inode_unlinked(struct xfs_scrub * sc)2000 xrep_inode_unlinked(
2001 	struct xfs_scrub	*sc)
2002 {
2003 	unsigned int		nlink = VFS_I(sc->ip)->i_nlink;
2004 	int			error;
2005 
2006 	/*
2007 	 * If this inode is linked from the directory tree and on the unlinked
2008 	 * list, remove it from the unlinked list.
2009 	 */
2010 	if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
2011 		struct xfs_perag	*pag;
2012 		int			error;
2013 
2014 		pag = xfs_perag_get(sc->mp,
2015 				XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
2016 		error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
2017 		xfs_perag_put(pag);
2018 		if (error)
2019 			return error;
2020 	}
2021 
2022 	/*
2023 	 * If this inode is not linked from the directory tree yet not on the
2024 	 * unlinked list, put it on the unlinked list.
2025 	 */
2026 	if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
2027 		error = xfs_iunlink(sc->tp, sc->ip);
2028 		if (error)
2029 			return error;
2030 	}
2031 
2032 	return 0;
2033 }
2034 
2035 /* Repair an inode's fields. */
2036 int
xrep_inode(struct xfs_scrub * sc)2037 xrep_inode(
2038 	struct xfs_scrub	*sc)
2039 {
2040 	int			error = 0;
2041 
2042 	/*
2043 	 * No inode?  That means we failed the _iget verifiers.  Repair all
2044 	 * the things that the inode verifiers care about, then retry _iget.
2045 	 */
2046 	if (!sc->ip) {
2047 		struct xrep_inode	*ri = sc->buf;
2048 
2049 		ASSERT(ri != NULL);
2050 
2051 		error = xrep_dinode_problems(ri);
2052 		if (error == -EBUSY) {
2053 			/*
2054 			 * Directory scan to recover inode mode encountered a
2055 			 * busy inode, so we did not continue repairing things.
2056 			 */
2057 			return 0;
2058 		}
2059 		if (error)
2060 			return error;
2061 
2062 		/* By this point we had better have a working incore inode. */
2063 		if (!sc->ip)
2064 			return -EFSCORRUPTED;
2065 	}
2066 
2067 	xfs_trans_ijoin(sc->tp, sc->ip, 0);
2068 
2069 	/* If we found corruption of any kind, try to fix it. */
2070 	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
2071 	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
2072 		error = xrep_inode_problems(sc);
2073 		if (error)
2074 			return error;
2075 	}
2076 
2077 	/* See if we can clear the reflink flag. */
2078 	if (xfs_is_reflink_inode(sc->ip)) {
2079 		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
2080 		if (error)
2081 			return error;
2082 	}
2083 
2084 	/* Reconnect incore unlinked list */
2085 	error = xrep_inode_unlinked(sc);
2086 	if (error)
2087 		return error;
2088 
2089 	return xrep_defer_finish(sc);
2090 }
2091