1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_icache.h"
20 #include "xfs_inode_buf.h"
21 #include "xfs_inode_fork.h"
22 #include "xfs_ialloc.h"
23 #include "xfs_da_format.h"
24 #include "xfs_reflink.h"
25 #include "xfs_alloc.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_bmap.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_bmap_util.h"
31 #include "xfs_dir2.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_quota_defs.h"
34 #include "xfs_quota.h"
35 #include "xfs_ag.h"
36 #include "xfs_rtbitmap.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_log_priv.h"
39 #include "xfs_health.h"
40 #include "xfs_symlink_remote.h"
41 #include "xfs_rtgroup.h"
42 #include "xfs_rtrmap_btree.h"
43 #include "xfs_rtrefcount_btree.h"
44 #include "scrub/xfs_scrub.h"
45 #include "scrub/scrub.h"
46 #include "scrub/common.h"
47 #include "scrub/btree.h"
48 #include "scrub/trace.h"
49 #include "scrub/repair.h"
50 #include "scrub/iscan.h"
51 #include "scrub/readdir.h"
52 #include "scrub/tempfile.h"
53
54 /*
55 * Inode Record Repair
56 * ===================
57 *
58 * Roughly speaking, inode problems can be classified based on whether or not
59 * they trip the dinode verifiers. If those trip, then we won't be able to
60 * xfs_iget ourselves the inode.
61 *
62 * Therefore, the xrep_dinode_* functions fix anything that will cause the
63 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions
64 * fix things on live incore inodes. The inode repair functions make decisions
65 * with security and usability implications when reviving a file:
66 *
67 * - Files with zero di_mode or a garbage di_mode are converted to regular file
68 * that only root can read. This file may not actually contain user data,
69 * if the file was not previously a regular file. Setuid and setgid bits
70 * are cleared.
71 *
72 * - Zero-size directories can be truncated to look empty. It is necessary to
73 * run the bmapbtd and directory repair functions to fully rebuild the
74 * directory.
75 *
76 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary
77 * to run the bmapbtd and symlink repair functions to salvage the symlink.
78 *
79 * - Invalid extent size hints will be removed.
80 *
81 * - Quotacheck will be scheduled if we repaired an inode that was so badly
82 * damaged that the ondisk inode had to be rebuilt.
83 *
84 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
85 * Setuid and setgid bits are cleared.
86 *
87 * - Data and attr forks are reset to extents format with zero extents if the
88 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta
89 * repair functions to recover the space mapping.
90 *
91 * - ACLs will not be recovered if the attr fork is zapped or the extended
92 * attribute structure itself requires salvaging.
93 *
94 * - If the attr fork is zapped, the user and group ids are reset to root and
95 * the setuid and setgid bits are removed.
96 */
97
98 /*
99 * All the information we need to repair the ondisk inode if we can't iget the
100 * incore inode. We don't allocate this buffer unless we're going to perform
101 * a repair to the ondisk inode cluster buffer.
102 */
103 struct xrep_inode {
104 /* Inode mapping that we saved from the initial lookup attempt. */
105 struct xfs_imap imap;
106
107 struct xfs_scrub *sc;
108
109 /* Blocks in use on the data device by data extents or bmbt blocks. */
110 xfs_rfsblock_t data_blocks;
111
112 /* Blocks in use on the rt device. */
113 xfs_rfsblock_t rt_blocks;
114
115 /* Blocks in use by the attr fork. */
116 xfs_rfsblock_t attr_blocks;
117
118 /* Number of data device extents for the data fork. */
119 xfs_extnum_t data_extents;
120
121 /*
122 * Number of realtime device extents for the data fork. If
123 * data_extents and rt_extents indicate that the data fork has extents
124 * on both devices, we'll just back away slowly.
125 */
126 xfs_extnum_t rt_extents;
127
128 /* Number of (data device) extents for the attr fork. */
129 xfs_aextnum_t attr_extents;
130
131 /* Sick state to set after zapping parts of the inode. */
132 unsigned int ino_sick_mask;
133
134 /* Must we remove all access from this file? */
135 bool zap_acls;
136
137 /* Inode scanner to see if we can find the ftype from dirents */
138 struct xchk_iscan ftype_iscan;
139 uint8_t alleged_ftype;
140 };
141
142 /*
143 * Setup function for inode repair. @imap contains the ondisk inode mapping
144 * information so that we can correct the ondisk inode cluster buffer if
145 * necessary to make iget work.
146 */
147 int
xrep_setup_inode(struct xfs_scrub * sc,const struct xfs_imap * imap)148 xrep_setup_inode(
149 struct xfs_scrub *sc,
150 const struct xfs_imap *imap)
151 {
152 struct xrep_inode *ri;
153
154 sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
155 if (!sc->buf)
156 return -ENOMEM;
157
158 ri = sc->buf;
159 memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
160 ri->sc = sc;
161 return 0;
162 }
163
164 /*
165 * Make sure this ondisk inode can pass the inode buffer verifier. This is
166 * not the same as the dinode verifier.
167 */
168 STATIC void
xrep_dinode_buf_core(struct xfs_scrub * sc,struct xfs_buf * bp,unsigned int ioffset)169 xrep_dinode_buf_core(
170 struct xfs_scrub *sc,
171 struct xfs_buf *bp,
172 unsigned int ioffset)
173 {
174 struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset);
175 struct xfs_trans *tp = sc->tp;
176 struct xfs_mount *mp = sc->mp;
177 xfs_agino_t agino;
178 bool crc_ok = false;
179 bool magic_ok = false;
180 bool unlinked_ok = false;
181
182 agino = be32_to_cpu(dip->di_next_unlinked);
183
184 if (xfs_verify_agino_or_null(bp->b_pag, agino))
185 unlinked_ok = true;
186
187 if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
188 xfs_dinode_good_version(mp, dip->di_version))
189 magic_ok = true;
190
191 if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
192 XFS_DINODE_CRC_OFF))
193 crc_ok = true;
194
195 if (magic_ok && unlinked_ok && crc_ok)
196 return;
197
198 if (!magic_ok) {
199 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
200 dip->di_version = 3;
201 }
202 if (!unlinked_ok)
203 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
204 xfs_dinode_calc_crc(mp, dip);
205 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
206 xfs_trans_log_buf(tp, bp, ioffset,
207 ioffset + sizeof(struct xfs_dinode) - 1);
208 }
209
210 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
211 STATIC void
xrep_dinode_buf(struct xfs_scrub * sc,struct xfs_buf * bp)212 xrep_dinode_buf(
213 struct xfs_scrub *sc,
214 struct xfs_buf *bp)
215 {
216 struct xfs_mount *mp = sc->mp;
217 int i;
218 int ni;
219
220 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
221 for (i = 0; i < ni; i++)
222 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
223 }
224
225 /* Reinitialize things that never change in an inode. */
226 STATIC void
xrep_dinode_header(struct xfs_scrub * sc,struct xfs_dinode * dip)227 xrep_dinode_header(
228 struct xfs_scrub *sc,
229 struct xfs_dinode *dip)
230 {
231 trace_xrep_dinode_header(sc, dip);
232
233 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
234 if (!xfs_dinode_good_version(sc->mp, dip->di_version))
235 dip->di_version = 3;
236 dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
237 uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
238 dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
239 }
240
241 /*
242 * If this directory entry points to the scrub target inode, then the directory
243 * we're scanning is the parent of the scrub target inode.
244 */
245 STATIC int
xrep_dinode_findmode_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)246 xrep_dinode_findmode_dirent(
247 struct xfs_scrub *sc,
248 struct xfs_inode *dp,
249 xfs_dir2_dataptr_t dapos,
250 const struct xfs_name *name,
251 xfs_ino_t ino,
252 void *priv)
253 {
254 struct xrep_inode *ri = priv;
255 int error = 0;
256
257 if (xchk_should_terminate(ri->sc, &error))
258 return error;
259
260 if (ino != sc->sm->sm_ino)
261 return 0;
262
263 /* Ignore garbage directory entry names. */
264 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
265 return -EFSCORRUPTED;
266
267 /* Don't pick up dot or dotdot entries; we only want child dirents. */
268 if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
269 xfs_dir2_samename(name, &xfs_name_dot))
270 return 0;
271
272 /*
273 * Uhoh, more than one parent for this inode and they don't agree on
274 * the file type?
275 */
276 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
277 ri->alleged_ftype != name->type) {
278 trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
279 ri->alleged_ftype);
280 return -EFSCORRUPTED;
281 }
282
283 /* We found a potential parent; remember the ftype. */
284 trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
285 ri->alleged_ftype = name->type;
286 return 0;
287 }
288
289 /* Try to lock a directory, or wait a jiffy. */
290 static inline int
xrep_dinode_ilock_nowait(struct xfs_inode * dp,unsigned int lock_mode)291 xrep_dinode_ilock_nowait(
292 struct xfs_inode *dp,
293 unsigned int lock_mode)
294 {
295 if (xfs_ilock_nowait(dp, lock_mode))
296 return true;
297
298 schedule_timeout_killable(1);
299 return false;
300 }
301
302 /*
303 * Try to lock a directory to look for ftype hints. Since we already hold the
304 * AGI buffer, we cannot block waiting for the ILOCK because rename can take
305 * the ILOCK and then try to lock AGIs.
306 */
307 STATIC int
xrep_dinode_trylock_directory(struct xrep_inode * ri,struct xfs_inode * dp,unsigned int * lock_modep)308 xrep_dinode_trylock_directory(
309 struct xrep_inode *ri,
310 struct xfs_inode *dp,
311 unsigned int *lock_modep)
312 {
313 unsigned long deadline = jiffies + msecs_to_jiffies(30000);
314 unsigned int lock_mode;
315 int error = 0;
316
317 do {
318 if (xchk_should_terminate(ri->sc, &error))
319 return error;
320
321 if (xfs_need_iread_extents(&dp->i_df))
322 lock_mode = XFS_ILOCK_EXCL;
323 else
324 lock_mode = XFS_ILOCK_SHARED;
325
326 if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
327 *lock_modep = lock_mode;
328 return 0;
329 }
330 } while (!time_is_before_jiffies(deadline));
331 return -EBUSY;
332 }
333
334 /*
335 * If this is a directory, walk the dirents looking for any that point to the
336 * scrub target inode.
337 */
338 STATIC int
xrep_dinode_findmode_walk_directory(struct xrep_inode * ri,struct xfs_inode * dp)339 xrep_dinode_findmode_walk_directory(
340 struct xrep_inode *ri,
341 struct xfs_inode *dp)
342 {
343 struct xfs_scrub *sc = ri->sc;
344 unsigned int lock_mode;
345 int error = 0;
346
347 /* Ignore temporary repair directories. */
348 if (xrep_is_tempfile(dp))
349 return 0;
350
351 /*
352 * Scan the directory to see if there it contains an entry pointing to
353 * the directory that we are repairing.
354 */
355 error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
356 if (error)
357 return error;
358
359 /*
360 * If this directory is known to be sick, we cannot scan it reliably
361 * and must abort.
362 */
363 if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
364 XFS_SICK_INO_BMBTD |
365 XFS_SICK_INO_DIR)) {
366 error = -EFSCORRUPTED;
367 goto out_unlock;
368 }
369
370 /*
371 * We cannot complete our parent pointer scan if a directory looks as
372 * though it has been zapped by the inode record repair code.
373 */
374 if (xchk_dir_looks_zapped(dp)) {
375 error = -EBUSY;
376 goto out_unlock;
377 }
378
379 error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
380 if (error)
381 goto out_unlock;
382
383 out_unlock:
384 xfs_iunlock(dp, lock_mode);
385 return error;
386 }
387
388 /*
389 * Try to find the mode of the inode being repaired by looking for directories
390 * that point down to this file.
391 */
392 STATIC int
xrep_dinode_find_mode(struct xrep_inode * ri,uint16_t * mode)393 xrep_dinode_find_mode(
394 struct xrep_inode *ri,
395 uint16_t *mode)
396 {
397 struct xfs_scrub *sc = ri->sc;
398 struct xfs_inode *dp;
399 int error;
400
401 /* No ftype means we have no other metadata to consult. */
402 if (!xfs_has_ftype(sc->mp)) {
403 *mode = S_IFREG;
404 return 0;
405 }
406
407 /*
408 * Scan all directories for parents that might point down to this
409 * inode. Skip the inode being repaired during the scan since it
410 * cannot be its own parent. Note that we still hold the AGI locked
411 * so there's a real possibility that _iscan_iter can return EBUSY.
412 */
413 xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
414 xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
415 ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
416 ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
417 while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
418 if (S_ISDIR(VFS_I(dp)->i_mode))
419 error = xrep_dinode_findmode_walk_directory(ri, dp);
420 xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
421 xchk_irele(sc, dp);
422 if (error < 0)
423 break;
424 if (xchk_should_terminate(sc, &error))
425 break;
426 }
427 xchk_iscan_iter_finish(&ri->ftype_iscan);
428 xchk_iscan_teardown(&ri->ftype_iscan);
429
430 if (error == -EBUSY) {
431 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
432 /*
433 * If we got an EBUSY after finding at least one
434 * dirent, that means the scan found an inode on the
435 * inactivation list and could not open it. Accept the
436 * alleged ftype and install a new mode below.
437 */
438 error = 0;
439 } else if (!(sc->flags & XCHK_TRY_HARDER)) {
440 /*
441 * Otherwise, retry the operation one time to see if
442 * the reason for the delay is an inode from the same
443 * cluster buffer waiting on the inactivation list.
444 */
445 error = -EDEADLOCK;
446 }
447 }
448 if (error)
449 return error;
450
451 /*
452 * Convert the discovered ftype into the file mode. If all else fails,
453 * return S_IFREG.
454 */
455 switch (ri->alleged_ftype) {
456 case XFS_DIR3_FT_DIR:
457 *mode = S_IFDIR;
458 break;
459 case XFS_DIR3_FT_WHT:
460 case XFS_DIR3_FT_CHRDEV:
461 *mode = S_IFCHR;
462 break;
463 case XFS_DIR3_FT_BLKDEV:
464 *mode = S_IFBLK;
465 break;
466 case XFS_DIR3_FT_FIFO:
467 *mode = S_IFIFO;
468 break;
469 case XFS_DIR3_FT_SOCK:
470 *mode = S_IFSOCK;
471 break;
472 case XFS_DIR3_FT_SYMLINK:
473 *mode = S_IFLNK;
474 break;
475 default:
476 *mode = S_IFREG;
477 break;
478 }
479 return 0;
480 }
481
482 /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */
483 STATIC int
xrep_dinode_mode(struct xrep_inode * ri,struct xfs_dinode * dip)484 xrep_dinode_mode(
485 struct xrep_inode *ri,
486 struct xfs_dinode *dip)
487 {
488 struct xfs_scrub *sc = ri->sc;
489 uint16_t mode = be16_to_cpu(dip->di_mode);
490 int error;
491
492 trace_xrep_dinode_mode(sc, dip);
493
494 if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
495 return 0;
496
497 /* Try to fix the mode. If we cannot, then leave everything alone. */
498 error = xrep_dinode_find_mode(ri, &mode);
499 switch (error) {
500 case -EINTR:
501 case -EBUSY:
502 case -EDEADLOCK:
503 /* temporary failure or fatal signal */
504 return error;
505 case 0:
506 /* found mode */
507 break;
508 default:
509 /* some other error, assume S_IFREG */
510 mode = S_IFREG;
511 break;
512 }
513
514 /* bad mode, so we set it to a file that only root can read */
515 dip->di_mode = cpu_to_be16(mode);
516 dip->di_uid = 0;
517 dip->di_gid = 0;
518 ri->zap_acls = true;
519 return 0;
520 }
521
522 /* Fix unused link count fields having nonzero values. */
523 STATIC void
xrep_dinode_nlinks(struct xfs_dinode * dip)524 xrep_dinode_nlinks(
525 struct xfs_dinode *dip)
526 {
527 if (dip->di_version < 2) {
528 dip->di_nlink = 0;
529 return;
530 }
531
532 if (xfs_dinode_is_metadir(dip)) {
533 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
534 dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
535 } else {
536 dip->di_metatype = 0;
537 }
538 }
539
540 /* Fix any conflicting flags that the verifiers complain about. */
541 STATIC void
xrep_dinode_flags(struct xfs_scrub * sc,struct xfs_dinode * dip,bool isrt)542 xrep_dinode_flags(
543 struct xfs_scrub *sc,
544 struct xfs_dinode *dip,
545 bool isrt)
546 {
547 struct xfs_mount *mp = sc->mp;
548 uint64_t flags2 = be64_to_cpu(dip->di_flags2);
549 uint16_t flags = be16_to_cpu(dip->di_flags);
550 uint16_t mode = be16_to_cpu(dip->di_mode);
551
552 trace_xrep_dinode_flags(sc, dip);
553
554 if (isrt)
555 flags |= XFS_DIFLAG_REALTIME;
556 else
557 flags &= ~XFS_DIFLAG_REALTIME;
558
559 /*
560 * For regular files on a reflink filesystem, set the REFLINK flag to
561 * protect shared extents. A later stage will actually check those
562 * extents and clear the flag if possible.
563 */
564 if (xfs_has_reflink(mp) && S_ISREG(mode))
565 flags2 |= XFS_DIFLAG2_REFLINK;
566 else
567 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
568 if (!xfs_has_bigtime(mp))
569 flags2 &= ~XFS_DIFLAG2_BIGTIME;
570 if (!xfs_has_large_extent_counts(mp))
571 flags2 &= ~XFS_DIFLAG2_NREXT64;
572 if (flags2 & XFS_DIFLAG2_NREXT64)
573 dip->di_nrext64_pad = 0;
574 else if (dip->di_version >= 3)
575 dip->di_v3_pad = 0;
576
577 if (flags2 & XFS_DIFLAG2_METADATA) {
578 xfs_failaddr_t fa;
579
580 fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
581 flags2);
582 if (fa)
583 flags2 &= ~XFS_DIFLAG2_METADATA;
584 }
585
586 dip->di_flags = cpu_to_be16(flags);
587 dip->di_flags2 = cpu_to_be64(flags2);
588 }
589
590 /*
591 * Blow out symlink; now it points nowhere. We don't have to worry about
592 * incore state because this inode is failing the verifiers.
593 */
594 STATIC void
xrep_dinode_zap_symlink(struct xrep_inode * ri,struct xfs_dinode * dip)595 xrep_dinode_zap_symlink(
596 struct xrep_inode *ri,
597 struct xfs_dinode *dip)
598 {
599 struct xfs_scrub *sc = ri->sc;
600 char *p;
601
602 trace_xrep_dinode_zap_symlink(sc, dip);
603
604 dip->di_format = XFS_DINODE_FMT_LOCAL;
605 dip->di_size = cpu_to_be64(1);
606 p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
607 *p = '?';
608 ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
609 }
610
611 /*
612 * Blow out dir, make the parent point to the root. In the future repair will
613 * reconstruct this directory for us. Note that there's no in-core directory
614 * inode because the sf verifier tripped, so we don't have to worry about the
615 * dentry cache.
616 */
617 STATIC void
xrep_dinode_zap_dir(struct xrep_inode * ri,struct xfs_dinode * dip)618 xrep_dinode_zap_dir(
619 struct xrep_inode *ri,
620 struct xfs_dinode *dip)
621 {
622 struct xfs_scrub *sc = ri->sc;
623 struct xfs_mount *mp = sc->mp;
624 struct xfs_dir2_sf_hdr *sfp;
625 int i8count;
626
627 trace_xrep_dinode_zap_dir(sc, dip);
628
629 dip->di_format = XFS_DINODE_FMT_LOCAL;
630 i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
631 sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
632 sfp->count = 0;
633 sfp->i8count = i8count;
634 xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
635 dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
636 ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
637 }
638
639 /* Make sure we don't have a garbage file size. */
640 STATIC void
xrep_dinode_size(struct xrep_inode * ri,struct xfs_dinode * dip)641 xrep_dinode_size(
642 struct xrep_inode *ri,
643 struct xfs_dinode *dip)
644 {
645 struct xfs_scrub *sc = ri->sc;
646 uint64_t size = be64_to_cpu(dip->di_size);
647 uint16_t mode = be16_to_cpu(dip->di_mode);
648
649 trace_xrep_dinode_size(sc, dip);
650
651 switch (mode & S_IFMT) {
652 case S_IFIFO:
653 case S_IFCHR:
654 case S_IFBLK:
655 case S_IFSOCK:
656 /* di_size can't be nonzero for special files */
657 dip->di_size = 0;
658 break;
659 case S_IFREG:
660 /* Regular files can't be larger than 2^63-1 bytes. */
661 dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
662 break;
663 case S_IFLNK:
664 /*
665 * Truncate ridiculously oversized symlinks. If the size is
666 * zero, reset it to point to the current directory. Both of
667 * these conditions trigger dinode verifier errors, so there
668 * is no in-core state to reset.
669 */
670 if (size > XFS_SYMLINK_MAXLEN)
671 dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
672 else if (size == 0)
673 xrep_dinode_zap_symlink(ri, dip);
674 break;
675 case S_IFDIR:
676 /*
677 * Directories can't have a size larger than 32G. If the size
678 * is zero, reset it to an empty directory. Both of these
679 * conditions trigger dinode verifier errors, so there is no
680 * in-core state to reset.
681 */
682 if (size > XFS_DIR2_SPACE_SIZE)
683 dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
684 else if (size == 0)
685 xrep_dinode_zap_dir(ri, dip);
686 break;
687 }
688 }
689
690 /* Fix extent size hints. */
691 STATIC void
xrep_dinode_extsize_hints(struct xfs_scrub * sc,struct xfs_dinode * dip)692 xrep_dinode_extsize_hints(
693 struct xfs_scrub *sc,
694 struct xfs_dinode *dip)
695 {
696 struct xfs_mount *mp = sc->mp;
697 uint64_t flags2 = be64_to_cpu(dip->di_flags2);
698 uint16_t flags = be16_to_cpu(dip->di_flags);
699 uint16_t mode = be16_to_cpu(dip->di_mode);
700
701 xfs_failaddr_t fa;
702
703 trace_xrep_dinode_extsize_hints(sc, dip);
704
705 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
706 mode, flags);
707 if (fa) {
708 dip->di_extsize = 0;
709 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
710 XFS_DIFLAG_EXTSZINHERIT);
711 }
712
713 if (dip->di_version < 3 ||
714 (xfs_has_zoned(sc->mp) &&
715 dip->di_metatype == cpu_to_be16(XFS_METAFILE_RTRMAP)))
716 return;
717
718 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
719 mode, flags, flags2);
720 if (fa) {
721 dip->di_cowextsize = 0;
722 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
723 }
724 }
725
726 /* Count extents and blocks for an inode given an rmap. */
727 STATIC int
xrep_dinode_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)728 xrep_dinode_walk_rmap(
729 struct xfs_btree_cur *cur,
730 const struct xfs_rmap_irec *rec,
731 void *priv)
732 {
733 struct xrep_inode *ri = priv;
734 int error = 0;
735
736 if (xchk_should_terminate(ri->sc, &error))
737 return error;
738
739 /* We only care about this inode. */
740 if (rec->rm_owner != ri->sc->sm->sm_ino)
741 return 0;
742
743 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
744 ri->attr_blocks += rec->rm_blockcount;
745 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
746 ri->attr_extents++;
747
748 return 0;
749 }
750
751 ri->data_blocks += rec->rm_blockcount;
752 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
753 ri->data_extents++;
754
755 return 0;
756 }
757
758 /* Count extents and blocks for an inode from all AG rmap data. */
759 STATIC int
xrep_dinode_count_ag_rmaps(struct xrep_inode * ri,struct xfs_perag * pag)760 xrep_dinode_count_ag_rmaps(
761 struct xrep_inode *ri,
762 struct xfs_perag *pag)
763 {
764 struct xfs_btree_cur *cur;
765 struct xfs_buf *agf;
766 int error;
767
768 error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
769 if (error)
770 return error;
771
772 cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
773 error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
774 xfs_btree_del_cursor(cur, error);
775 xfs_trans_brelse(ri->sc->tp, agf);
776 return error;
777 }
778
779 /* Count extents and blocks for an inode given an rt rmap. */
780 STATIC int
xrep_dinode_walk_rtrmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)781 xrep_dinode_walk_rtrmap(
782 struct xfs_btree_cur *cur,
783 const struct xfs_rmap_irec *rec,
784 void *priv)
785 {
786 struct xrep_inode *ri = priv;
787 int error = 0;
788
789 if (xchk_should_terminate(ri->sc, &error))
790 return error;
791
792 /* We only care about this inode. */
793 if (rec->rm_owner != ri->sc->sm->sm_ino)
794 return 0;
795
796 if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))
797 return -EFSCORRUPTED;
798
799 ri->rt_blocks += rec->rm_blockcount;
800 ri->rt_extents++;
801 return 0;
802 }
803
804 /* Count extents and blocks for an inode from all realtime rmap data. */
805 STATIC int
xrep_dinode_count_rtgroup_rmaps(struct xrep_inode * ri,struct xfs_rtgroup * rtg)806 xrep_dinode_count_rtgroup_rmaps(
807 struct xrep_inode *ri,
808 struct xfs_rtgroup *rtg)
809 {
810 struct xfs_scrub *sc = ri->sc;
811 int error;
812
813 error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP);
814 if (error)
815 return error;
816
817 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap,
818 ri);
819 xchk_rtgroup_btcur_free(&sc->sr);
820 xchk_rtgroup_free(sc, &sc->sr);
821 return error;
822 }
823
824 /* Count extents and blocks for a given inode from all rmap data. */
825 STATIC int
xrep_dinode_count_rmaps(struct xrep_inode * ri)826 xrep_dinode_count_rmaps(
827 struct xrep_inode *ri)
828 {
829 struct xfs_perag *pag = NULL;
830 struct xfs_rtgroup *rtg = NULL;
831 int error;
832
833 if (!xfs_has_rmapbt(ri->sc->mp))
834 return -EOPNOTSUPP;
835
836 while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) {
837 error = xrep_dinode_count_rtgroup_rmaps(ri, rtg);
838 if (error) {
839 xfs_rtgroup_rele(rtg);
840 return error;
841 }
842 }
843
844 while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
845 error = xrep_dinode_count_ag_rmaps(ri, pag);
846 if (error) {
847 xfs_perag_rele(pag);
848 return error;
849 }
850 }
851
852 /* Can't have extents on both the rt and the data device. */
853 if (ri->data_extents && ri->rt_extents)
854 return -EFSCORRUPTED;
855
856 trace_xrep_dinode_count_rmaps(ri->sc,
857 ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
858 ri->data_extents, ri->rt_extents, ri->attr_extents);
859 return 0;
860 }
861
862 /* Return true if this extents-format ifork looks like garbage. */
863 STATIC bool
xrep_dinode_bad_extents_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)864 xrep_dinode_bad_extents_fork(
865 struct xfs_scrub *sc,
866 struct xfs_dinode *dip,
867 unsigned int dfork_size,
868 int whichfork)
869 {
870 struct xfs_bmbt_irec new;
871 struct xfs_bmbt_rec *dp;
872 xfs_extnum_t nex;
873 bool isrt;
874 unsigned int i;
875
876 nex = xfs_dfork_nextents(dip, whichfork);
877 if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
878 return true;
879
880 dp = XFS_DFORK_PTR(dip, whichfork);
881
882 isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
883 for (i = 0; i < nex; i++, dp++) {
884 xfs_failaddr_t fa;
885
886 xfs_bmbt_disk_get_all(dp, &new);
887 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
888 &new);
889 if (fa)
890 return true;
891 }
892
893 return false;
894 }
895
896 /* Return true if this btree-format ifork looks like garbage. */
897 STATIC bool
xrep_dinode_bad_bmbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)898 xrep_dinode_bad_bmbt_fork(
899 struct xfs_scrub *sc,
900 struct xfs_dinode *dip,
901 unsigned int dfork_size,
902 int whichfork)
903 {
904 struct xfs_bmdr_block *dfp;
905 xfs_extnum_t nex;
906 unsigned int i;
907 unsigned int dmxr;
908 unsigned int nrecs;
909 unsigned int level;
910
911 nex = xfs_dfork_nextents(dip, whichfork);
912 if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
913 return true;
914
915 if (dfork_size < sizeof(struct xfs_bmdr_block))
916 return true;
917
918 dfp = XFS_DFORK_PTR(dip, whichfork);
919 nrecs = be16_to_cpu(dfp->bb_numrecs);
920 level = be16_to_cpu(dfp->bb_level);
921
922 if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
923 return true;
924 if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
925 return true;
926
927 dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
928 for (i = 1; i <= nrecs; i++) {
929 struct xfs_bmbt_key *fkp;
930 xfs_bmbt_ptr_t *fpp;
931 xfs_fileoff_t fileoff;
932 xfs_fsblock_t fsbno;
933
934 fkp = xfs_bmdr_key_addr(dfp, i);
935 fileoff = be64_to_cpu(fkp->br_startoff);
936 if (!xfs_verify_fileoff(sc->mp, fileoff))
937 return true;
938
939 fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
940 fsbno = be64_to_cpu(*fpp);
941 if (!xfs_verify_fsbno(sc->mp, fsbno))
942 return true;
943 }
944
945 return false;
946 }
947
948 /* Return true if this rmap-format ifork looks like garbage. */
949 STATIC bool
xrep_dinode_bad_rtrmapbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size)950 xrep_dinode_bad_rtrmapbt_fork(
951 struct xfs_scrub *sc,
952 struct xfs_dinode *dip,
953 unsigned int dfork_size)
954 {
955 struct xfs_rtrmap_root *dfp;
956 unsigned int nrecs;
957 unsigned int level;
958
959 if (dfork_size < sizeof(struct xfs_rtrmap_root))
960 return true;
961
962 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
963 nrecs = be16_to_cpu(dfp->bb_numrecs);
964 level = be16_to_cpu(dfp->bb_level);
965
966 if (level > sc->mp->m_rtrmap_maxlevels)
967 return true;
968 if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size)
969 return true;
970 if (level > 0 && nrecs == 0)
971 return true;
972
973 return false;
974 }
975
976 /* Return true if this refcount-format ifork looks like garbage. */
977 STATIC bool
xrep_dinode_bad_rtrefcountbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size)978 xrep_dinode_bad_rtrefcountbt_fork(
979 struct xfs_scrub *sc,
980 struct xfs_dinode *dip,
981 unsigned int dfork_size)
982 {
983 struct xfs_rtrefcount_root *dfp;
984 unsigned int nrecs;
985 unsigned int level;
986
987 if (dfork_size < sizeof(struct xfs_rtrefcount_root))
988 return true;
989
990 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
991 nrecs = be16_to_cpu(dfp->bb_numrecs);
992 level = be16_to_cpu(dfp->bb_level);
993
994 if (level > sc->mp->m_rtrefc_maxlevels)
995 return true;
996 if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size)
997 return true;
998 if (level > 0 && nrecs == 0)
999 return true;
1000
1001 return false;
1002 }
1003
1004 /* Check a metadata-btree fork. */
1005 STATIC bool
xrep_dinode_bad_metabt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)1006 xrep_dinode_bad_metabt_fork(
1007 struct xfs_scrub *sc,
1008 struct xfs_dinode *dip,
1009 unsigned int dfork_size,
1010 int whichfork)
1011 {
1012 if (whichfork != XFS_DATA_FORK)
1013 return true;
1014
1015 switch (be16_to_cpu(dip->di_metatype)) {
1016 case XFS_METAFILE_RTRMAP:
1017 return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size);
1018 case XFS_METAFILE_RTREFCOUNT:
1019 return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size);
1020 default:
1021 return true;
1022 }
1023
1024 return false;
1025 }
1026
1027 /*
1028 * Check the data fork for things that will fail the ifork verifiers or the
1029 * ifork formatters.
1030 */
1031 STATIC bool
xrep_dinode_check_dfork(struct xfs_scrub * sc,struct xfs_dinode * dip,uint16_t mode)1032 xrep_dinode_check_dfork(
1033 struct xfs_scrub *sc,
1034 struct xfs_dinode *dip,
1035 uint16_t mode)
1036 {
1037 void *dfork_ptr;
1038 int64_t data_size;
1039 unsigned int fmt;
1040 unsigned int dfork_size;
1041
1042 /*
1043 * Verifier functions take signed int64_t, so check for bogus negative
1044 * values first.
1045 */
1046 data_size = be64_to_cpu(dip->di_size);
1047 if (data_size < 0)
1048 return true;
1049
1050 fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
1051 switch (mode & S_IFMT) {
1052 case S_IFIFO:
1053 case S_IFCHR:
1054 case S_IFBLK:
1055 case S_IFSOCK:
1056 if (fmt != XFS_DINODE_FMT_DEV)
1057 return true;
1058 break;
1059 case S_IFREG:
1060 switch (fmt) {
1061 case XFS_DINODE_FMT_LOCAL:
1062 return true;
1063 case XFS_DINODE_FMT_EXTENTS:
1064 case XFS_DINODE_FMT_BTREE:
1065 case XFS_DINODE_FMT_META_BTREE:
1066 break;
1067 default:
1068 return true;
1069 }
1070 break;
1071 case S_IFLNK:
1072 case S_IFDIR:
1073 switch (fmt) {
1074 case XFS_DINODE_FMT_LOCAL:
1075 case XFS_DINODE_FMT_EXTENTS:
1076 case XFS_DINODE_FMT_BTREE:
1077 break;
1078 default:
1079 return true;
1080 }
1081 break;
1082 default:
1083 return true;
1084 }
1085
1086 dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
1087 dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1088
1089 switch (fmt) {
1090 case XFS_DINODE_FMT_DEV:
1091 break;
1092 case XFS_DINODE_FMT_LOCAL:
1093 /* dir/symlink structure cannot be larger than the fork */
1094 if (data_size > dfork_size)
1095 return true;
1096 /* directory structure must pass verification. */
1097 if (S_ISDIR(mode) &&
1098 xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
1099 return true;
1100 /* symlink structure must pass verification. */
1101 if (S_ISLNK(mode) &&
1102 xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
1103 return true;
1104 break;
1105 case XFS_DINODE_FMT_EXTENTS:
1106 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
1107 XFS_DATA_FORK))
1108 return true;
1109 break;
1110 case XFS_DINODE_FMT_BTREE:
1111 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
1112 XFS_DATA_FORK))
1113 return true;
1114 break;
1115 case XFS_DINODE_FMT_META_BTREE:
1116 if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size,
1117 XFS_DATA_FORK))
1118 return true;
1119 break;
1120 default:
1121 return true;
1122 }
1123
1124 return false;
1125 }
1126
1127 static void
xrep_dinode_set_data_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)1128 xrep_dinode_set_data_nextents(
1129 struct xfs_dinode *dip,
1130 xfs_extnum_t nextents)
1131 {
1132 if (xfs_dinode_has_large_extent_counts(dip))
1133 dip->di_big_nextents = cpu_to_be64(nextents);
1134 else
1135 dip->di_nextents = cpu_to_be32(nextents);
1136 }
1137
1138 static void
xrep_dinode_set_attr_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)1139 xrep_dinode_set_attr_nextents(
1140 struct xfs_dinode *dip,
1141 xfs_extnum_t nextents)
1142 {
1143 if (xfs_dinode_has_large_extent_counts(dip))
1144 dip->di_big_anextents = cpu_to_be32(nextents);
1145 else
1146 dip->di_anextents = cpu_to_be16(nextents);
1147 }
1148
1149 /* Reset the data fork to something sane. */
1150 STATIC void
xrep_dinode_zap_dfork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1151 xrep_dinode_zap_dfork(
1152 struct xrep_inode *ri,
1153 struct xfs_dinode *dip,
1154 uint16_t mode)
1155 {
1156 struct xfs_scrub *sc = ri->sc;
1157
1158 trace_xrep_dinode_zap_dfork(sc, dip);
1159
1160 ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
1161
1162 xrep_dinode_set_data_nextents(dip, 0);
1163 ri->data_blocks = 0;
1164 ri->rt_blocks = 0;
1165
1166 /* Special files always get reset to DEV */
1167 switch (mode & S_IFMT) {
1168 case S_IFIFO:
1169 case S_IFCHR:
1170 case S_IFBLK:
1171 case S_IFSOCK:
1172 dip->di_format = XFS_DINODE_FMT_DEV;
1173 dip->di_size = 0;
1174 return;
1175 }
1176
1177 /*
1178 * If we have data extents, reset to an empty map and hope the user
1179 * will run the bmapbtd checker next.
1180 */
1181 if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
1182 dip->di_format = XFS_DINODE_FMT_EXTENTS;
1183 return;
1184 }
1185
1186 /* Otherwise, reset the local format to the minimum. */
1187 switch (mode & S_IFMT) {
1188 case S_IFLNK:
1189 xrep_dinode_zap_symlink(ri, dip);
1190 break;
1191 case S_IFDIR:
1192 xrep_dinode_zap_dir(ri, dip);
1193 break;
1194 }
1195 }
1196
1197 /*
1198 * Check the attr fork for things that will fail the ifork verifiers or the
1199 * ifork formatters.
1200 */
1201 STATIC bool
xrep_dinode_check_afork(struct xfs_scrub * sc,struct xfs_dinode * dip)1202 xrep_dinode_check_afork(
1203 struct xfs_scrub *sc,
1204 struct xfs_dinode *dip)
1205 {
1206 struct xfs_attr_sf_hdr *afork_ptr;
1207 size_t attr_size;
1208 unsigned int afork_size;
1209
1210 if (XFS_DFORK_BOFF(dip) == 0)
1211 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
1212 xfs_dfork_attr_extents(dip) != 0;
1213
1214 afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1215 afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1216
1217 switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
1218 case XFS_DINODE_FMT_LOCAL:
1219 /* Fork has to be large enough to extract the xattr size. */
1220 if (afork_size < sizeof(struct xfs_attr_sf_hdr))
1221 return true;
1222
1223 /* xattr structure cannot be larger than the fork */
1224 attr_size = be16_to_cpu(afork_ptr->totsize);
1225 if (attr_size > afork_size)
1226 return true;
1227
1228 /* xattr structure must pass verification. */
1229 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1230 case XFS_DINODE_FMT_EXTENTS:
1231 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1232 XFS_ATTR_FORK))
1233 return true;
1234 break;
1235 case XFS_DINODE_FMT_BTREE:
1236 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1237 XFS_ATTR_FORK))
1238 return true;
1239 break;
1240 case XFS_DINODE_FMT_META_BTREE:
1241 if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
1242 XFS_ATTR_FORK))
1243 return true;
1244 break;
1245 default:
1246 return true;
1247 }
1248
1249 return false;
1250 }
1251
1252 /*
1253 * Reset the attr fork to empty. Since the attr fork could have contained
1254 * ACLs, make the file readable only by root.
1255 */
1256 STATIC void
xrep_dinode_zap_afork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1257 xrep_dinode_zap_afork(
1258 struct xrep_inode *ri,
1259 struct xfs_dinode *dip,
1260 uint16_t mode)
1261 {
1262 struct xfs_scrub *sc = ri->sc;
1263
1264 trace_xrep_dinode_zap_afork(sc, dip);
1265
1266 ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1267
1268 dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1269 xrep_dinode_set_attr_nextents(dip, 0);
1270 ri->attr_blocks = 0;
1271
1272 /*
1273 * If the data fork is in btree format, removing the attr fork entirely
1274 * might cause verifier failures if the next level down in the bmbt
1275 * could now fit in the data fork area.
1276 */
1277 if (dip->di_format != XFS_DINODE_FMT_BTREE)
1278 dip->di_forkoff = 0;
1279 dip->di_mode = cpu_to_be16(mode & ~0777);
1280 dip->di_uid = 0;
1281 dip->di_gid = 0;
1282 }
1283
1284 /* Make sure the fork offset is a sensible value. */
1285 STATIC void
xrep_dinode_ensure_forkoff(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1286 xrep_dinode_ensure_forkoff(
1287 struct xrep_inode *ri,
1288 struct xfs_dinode *dip,
1289 uint16_t mode)
1290 {
1291 struct xfs_bmdr_block *bmdr;
1292 struct xfs_rtrmap_root *rmdr;
1293 struct xfs_rtrefcount_root *rcdr;
1294 struct xfs_scrub *sc = ri->sc;
1295 xfs_extnum_t attr_extents, data_extents;
1296 size_t bmdr_minsz = xfs_bmdr_space_calc(1);
1297 unsigned int lit_sz = XFS_LITINO(sc->mp);
1298 unsigned int afork_min, dfork_min;
1299
1300 trace_xrep_dinode_ensure_forkoff(sc, dip);
1301
1302 /*
1303 * Before calling this function, xrep_dinode_core ensured that both
1304 * forks actually fit inside their respective literal areas. If this
1305 * was not the case, the fork was reset to FMT_EXTENTS with zero
1306 * records. If the rmapbt scan found attr or data fork blocks, this
1307 * will be noted in the dinode_stats, and we must leave enough room
1308 * for the bmap repair code to reconstruct the mapping structure.
1309 *
1310 * First, compute the minimum space required for the attr fork.
1311 */
1312 switch (dip->di_aformat) {
1313 case XFS_DINODE_FMT_LOCAL:
1314 /*
1315 * If we still have a shortform xattr structure at all, that
1316 * means the attr fork area was exactly large enough to fit
1317 * the sf structure.
1318 */
1319 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1320 break;
1321 case XFS_DINODE_FMT_EXTENTS:
1322 attr_extents = xfs_dfork_attr_extents(dip);
1323 if (attr_extents) {
1324 /*
1325 * We must maintain sufficient space to hold the entire
1326 * extent map array in the data fork. Note that we
1327 * previously zapped the fork if it had no chance of
1328 * fitting in the inode.
1329 */
1330 afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1331 } else if (ri->attr_extents > 0) {
1332 /*
1333 * The attr fork thinks it has zero extents, but we
1334 * found some xattr extents. We need to leave enough
1335 * empty space here so that the incore attr fork will
1336 * get created (and hence trigger the attr fork bmap
1337 * repairer).
1338 */
1339 afork_min = bmdr_minsz;
1340 } else {
1341 /* No extents on disk or found in rmapbt. */
1342 afork_min = 0;
1343 }
1344 break;
1345 case XFS_DINODE_FMT_BTREE:
1346 /* Must have space for btree header and key/pointers. */
1347 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1348 afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1349 break;
1350 default:
1351 /* We should never see any other formats. */
1352 afork_min = 0;
1353 break;
1354 }
1355
1356 /* Compute the minimum space required for the data fork. */
1357 switch (dip->di_format) {
1358 case XFS_DINODE_FMT_DEV:
1359 dfork_min = sizeof(__be32);
1360 break;
1361 case XFS_DINODE_FMT_UUID:
1362 dfork_min = sizeof(uuid_t);
1363 break;
1364 case XFS_DINODE_FMT_LOCAL:
1365 /*
1366 * If we still have a shortform data fork at all, that means
1367 * the data fork area was large enough to fit whatever was in
1368 * there.
1369 */
1370 dfork_min = be64_to_cpu(dip->di_size);
1371 break;
1372 case XFS_DINODE_FMT_EXTENTS:
1373 data_extents = xfs_dfork_data_extents(dip);
1374 if (data_extents) {
1375 /*
1376 * We must maintain sufficient space to hold the entire
1377 * extent map array in the data fork. Note that we
1378 * previously zapped the fork if it had no chance of
1379 * fitting in the inode.
1380 */
1381 dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1382 } else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1383 /*
1384 * The data fork thinks it has zero extents, but we
1385 * found some data extents. We need to leave enough
1386 * empty space here so that the data fork bmap repair
1387 * will recover the mappings.
1388 */
1389 dfork_min = bmdr_minsz;
1390 } else {
1391 /* No extents on disk or found in rmapbt. */
1392 dfork_min = 0;
1393 }
1394 break;
1395 case XFS_DINODE_FMT_BTREE:
1396 /* Must have space for btree header and key/pointers. */
1397 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1398 dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1399 break;
1400 case XFS_DINODE_FMT_META_BTREE:
1401 switch (be16_to_cpu(dip->di_metatype)) {
1402 case XFS_METAFILE_RTRMAP:
1403 rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1404 dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr);
1405 break;
1406 case XFS_METAFILE_RTREFCOUNT:
1407 rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1408 dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr);
1409 break;
1410 default:
1411 dfork_min = 0;
1412 break;
1413 }
1414 break;
1415 default:
1416 dfork_min = 0;
1417 break;
1418 }
1419
1420 /*
1421 * Round all values up to the nearest 8 bytes, because that is the
1422 * precision of di_forkoff.
1423 */
1424 afork_min = roundup(afork_min, 8);
1425 dfork_min = roundup(dfork_min, 8);
1426 bmdr_minsz = roundup(bmdr_minsz, 8);
1427
1428 ASSERT(dfork_min <= lit_sz);
1429 ASSERT(afork_min <= lit_sz);
1430
1431 /*
1432 * If the data fork was zapped and we don't have enough space for the
1433 * recovery fork, move the attr fork up.
1434 */
1435 if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1436 xfs_dfork_data_extents(dip) == 0 &&
1437 (ri->data_extents > 0 || ri->rt_extents > 0) &&
1438 bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1439 if (bmdr_minsz + afork_min > lit_sz) {
1440 /*
1441 * The attr for and the stub fork we need to recover
1442 * the data fork won't both fit. Zap the attr fork.
1443 */
1444 xrep_dinode_zap_afork(ri, dip, mode);
1445 afork_min = bmdr_minsz;
1446 } else {
1447 void *before, *after;
1448
1449 /* Otherwise, just slide the attr fork up. */
1450 before = XFS_DFORK_APTR(dip);
1451 dip->di_forkoff = bmdr_minsz >> 3;
1452 after = XFS_DFORK_APTR(dip);
1453 memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1454 }
1455 }
1456
1457 /*
1458 * If the attr fork was zapped and we don't have enough space for the
1459 * recovery fork, move the attr fork down.
1460 */
1461 if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1462 xfs_dfork_attr_extents(dip) == 0 &&
1463 ri->attr_extents > 0 &&
1464 bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1465 if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1466 /*
1467 * If the data fork is in btree format then we can't
1468 * adjust forkoff because that runs the risk of
1469 * violating the extents/btree format transition rules.
1470 */
1471 } else if (bmdr_minsz + dfork_min > lit_sz) {
1472 /*
1473 * If we can't move the attr fork, too bad, we lose the
1474 * attr fork and leak its blocks.
1475 */
1476 xrep_dinode_zap_afork(ri, dip, mode);
1477 } else {
1478 /*
1479 * Otherwise, just slide the attr fork down. The attr
1480 * fork is empty, so we don't have any old contents to
1481 * move here.
1482 */
1483 dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1484 }
1485 }
1486 }
1487
1488 /*
1489 * Zap the data/attr forks if we spot anything that isn't going to pass the
1490 * ifork verifiers or the ifork formatters, because we need to get the inode
1491 * into good enough shape that the higher level repair functions can run.
1492 */
1493 STATIC void
xrep_dinode_zap_forks(struct xrep_inode * ri,struct xfs_dinode * dip)1494 xrep_dinode_zap_forks(
1495 struct xrep_inode *ri,
1496 struct xfs_dinode *dip)
1497 {
1498 struct xfs_scrub *sc = ri->sc;
1499 xfs_extnum_t data_extents;
1500 xfs_extnum_t attr_extents;
1501 xfs_filblks_t nblocks;
1502 uint16_t mode;
1503 bool zap_datafork = false;
1504 bool zap_attrfork = ri->zap_acls;
1505
1506 trace_xrep_dinode_zap_forks(sc, dip);
1507
1508 mode = be16_to_cpu(dip->di_mode);
1509
1510 data_extents = xfs_dfork_data_extents(dip);
1511 attr_extents = xfs_dfork_attr_extents(dip);
1512 nblocks = be64_to_cpu(dip->di_nblocks);
1513
1514 /* Inode counters don't make sense? */
1515 if (data_extents > nblocks)
1516 zap_datafork = true;
1517 if (attr_extents > nblocks)
1518 zap_attrfork = true;
1519 if (data_extents + attr_extents > nblocks)
1520 zap_datafork = zap_attrfork = true;
1521
1522 if (!zap_datafork)
1523 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1524 if (!zap_attrfork)
1525 zap_attrfork = xrep_dinode_check_afork(sc, dip);
1526
1527 /* Zap whatever's bad. */
1528 if (zap_attrfork)
1529 xrep_dinode_zap_afork(ri, dip, mode);
1530 if (zap_datafork)
1531 xrep_dinode_zap_dfork(ri, dip, mode);
1532 xrep_dinode_ensure_forkoff(ri, dip, mode);
1533
1534 /*
1535 * Zero di_nblocks if we don't have any extents at all to satisfy the
1536 * buffer verifier.
1537 */
1538 data_extents = xfs_dfork_data_extents(dip);
1539 attr_extents = xfs_dfork_attr_extents(dip);
1540 if (data_extents + attr_extents == 0)
1541 dip->di_nblocks = 0;
1542 }
1543
1544 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1545 STATIC int
xrep_dinode_core(struct xrep_inode * ri)1546 xrep_dinode_core(
1547 struct xrep_inode *ri)
1548 {
1549 struct xfs_scrub *sc = ri->sc;
1550 struct xfs_buf *bp;
1551 struct xfs_dinode *dip;
1552 xfs_ino_t ino = sc->sm->sm_ino;
1553 int error;
1554 int iget_error;
1555
1556 /* Figure out what this inode had mapped in both forks. */
1557 error = xrep_dinode_count_rmaps(ri);
1558 if (error)
1559 return error;
1560
1561 /* Read the inode cluster buffer. */
1562 error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1563 ri->imap.im_blkno, ri->imap.im_len, 0, &bp, NULL);
1564 if (error)
1565 return error;
1566
1567 /* Make sure we can pass the inode buffer verifier. */
1568 xrep_dinode_buf(sc, bp);
1569 bp->b_ops = &xfs_inode_buf_ops;
1570
1571 /* Fix everything the verifier will complain about. */
1572 dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1573 xrep_dinode_header(sc, dip);
1574 iget_error = xrep_dinode_mode(ri, dip);
1575 if (iget_error)
1576 goto write;
1577 xrep_dinode_nlinks(dip);
1578 xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1579 xrep_dinode_size(ri, dip);
1580 xrep_dinode_extsize_hints(sc, dip);
1581 xrep_dinode_zap_forks(ri, dip);
1582
1583 write:
1584 /* Write out the inode. */
1585 trace_xrep_dinode_fixed(sc, dip);
1586 xfs_dinode_calc_crc(sc->mp, dip);
1587 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1588 xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1589 ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1590
1591 /*
1592 * In theory, we've fixed the ondisk inode record enough that we should
1593 * be able to load the inode into the cache. Try to iget that inode
1594 * now while we hold the AGI and the inode cluster buffer and take the
1595 * IOLOCK so that we can continue with repairs without anyone else
1596 * accessing the inode. If iget fails, we still need to commit the
1597 * changes.
1598 */
1599 if (!iget_error)
1600 iget_error = xchk_iget(sc, ino, &sc->ip);
1601 if (!iget_error)
1602 xchk_ilock(sc, XFS_IOLOCK_EXCL);
1603
1604 /*
1605 * Commit the inode cluster buffer updates and drop the AGI buffer that
1606 * we've been holding since scrub setup. From here on out, repairs
1607 * deal only with the cached inode.
1608 */
1609 error = xrep_trans_commit(sc);
1610 if (error)
1611 return error;
1612
1613 if (iget_error)
1614 return iget_error;
1615
1616 error = xchk_trans_alloc(sc, 0);
1617 if (error)
1618 return error;
1619
1620 error = xrep_ino_dqattach(sc);
1621 if (error)
1622 return error;
1623
1624 xchk_ilock(sc, XFS_ILOCK_EXCL);
1625 if (ri->ino_sick_mask)
1626 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1627 return 0;
1628 }
1629
1630 /* Fix everything xfs_dinode_verify cares about. */
1631 STATIC int
xrep_dinode_problems(struct xrep_inode * ri)1632 xrep_dinode_problems(
1633 struct xrep_inode *ri)
1634 {
1635 struct xfs_scrub *sc = ri->sc;
1636 int error;
1637
1638 error = xrep_dinode_core(ri);
1639 if (error)
1640 return error;
1641
1642 /* We had to fix a totally busted inode, schedule quotacheck. */
1643 if (XFS_IS_UQUOTA_ON(sc->mp))
1644 xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1645 if (XFS_IS_GQUOTA_ON(sc->mp))
1646 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1647 if (XFS_IS_PQUOTA_ON(sc->mp))
1648 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1649
1650 return 0;
1651 }
1652
1653 /*
1654 * Fix problems that the verifiers don't care about. In general these are
1655 * errors that don't cause problems elsewhere in the kernel that we can easily
1656 * detect, so we don't check them all that rigorously.
1657 */
1658
1659 /* Make sure block and extent counts are ok. */
1660 STATIC int
xrep_inode_blockcounts(struct xfs_scrub * sc)1661 xrep_inode_blockcounts(
1662 struct xfs_scrub *sc)
1663 {
1664 struct xfs_ifork *ifp;
1665 xfs_filblks_t count;
1666 xfs_filblks_t acount;
1667 xfs_extnum_t nextents;
1668 int error;
1669
1670 trace_xrep_inode_blockcounts(sc);
1671
1672 /* Set data fork counters from the data fork mappings. */
1673 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
1674 if (error)
1675 return error;
1676 if (xfs_is_reflink_inode(sc->ip)) {
1677 /*
1678 * data fork blockcount can exceed physical storage if a user
1679 * reflinks the same block over and over again.
1680 */
1681 ;
1682 } else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1683 if (count >= sc->mp->m_sb.sb_rblocks)
1684 return -EFSCORRUPTED;
1685 } else {
1686 if (count >= sc->mp->m_sb.sb_dblocks)
1687 return -EFSCORRUPTED;
1688 }
1689 error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1690 if (error)
1691 return error;
1692 sc->ip->i_df.if_nextents = nextents;
1693
1694 /* Set attr fork counters from the attr fork mappings. */
1695 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1696 if (ifp) {
1697 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents,
1698 &acount);
1699 if (error)
1700 return error;
1701 if (count >= sc->mp->m_sb.sb_dblocks)
1702 return -EFSCORRUPTED;
1703 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1704 nextents);
1705 if (error)
1706 return error;
1707 ifp->if_nextents = nextents;
1708 } else {
1709 acount = 0;
1710 }
1711
1712 sc->ip->i_nblocks = count + acount;
1713 return 0;
1714 }
1715
1716 /* Check for invalid uid/gid/prid. */
1717 STATIC void
xrep_inode_ids(struct xfs_scrub * sc)1718 xrep_inode_ids(
1719 struct xfs_scrub *sc)
1720 {
1721 bool dirty = false;
1722
1723 trace_xrep_inode_ids(sc);
1724
1725 if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1726 i_uid_write(VFS_I(sc->ip), 0);
1727 dirty = true;
1728 if (XFS_IS_UQUOTA_ON(sc->mp))
1729 xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1730 }
1731
1732 if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1733 i_gid_write(VFS_I(sc->ip), 0);
1734 dirty = true;
1735 if (XFS_IS_GQUOTA_ON(sc->mp))
1736 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1737 }
1738
1739 if (sc->ip->i_projid == -1U) {
1740 sc->ip->i_projid = 0;
1741 dirty = true;
1742 if (XFS_IS_PQUOTA_ON(sc->mp))
1743 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1744 }
1745
1746 /* strip setuid/setgid if we touched any of the ids */
1747 if (dirty)
1748 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1749 }
1750
1751 static inline void
xrep_clamp_timestamp(struct xfs_inode * ip,struct timespec64 * ts)1752 xrep_clamp_timestamp(
1753 struct xfs_inode *ip,
1754 struct timespec64 *ts)
1755 {
1756 ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1757 *ts = timestamp_truncate(*ts, VFS_I(ip));
1758 }
1759
1760 /* Nanosecond counters can't have more than 1 billion. */
1761 STATIC void
xrep_inode_timestamps(struct xfs_inode * ip)1762 xrep_inode_timestamps(
1763 struct xfs_inode *ip)
1764 {
1765 struct timespec64 tstamp;
1766 struct inode *inode = VFS_I(ip);
1767
1768 tstamp = inode_get_atime(inode);
1769 xrep_clamp_timestamp(ip, &tstamp);
1770 inode_set_atime_to_ts(inode, tstamp);
1771
1772 tstamp = inode_get_mtime(inode);
1773 xrep_clamp_timestamp(ip, &tstamp);
1774 inode_set_mtime_to_ts(inode, tstamp);
1775
1776 tstamp = inode_get_ctime(inode);
1777 xrep_clamp_timestamp(ip, &tstamp);
1778 inode_set_ctime_to_ts(inode, tstamp);
1779
1780 xrep_clamp_timestamp(ip, &ip->i_crtime);
1781 }
1782
1783 /* Fix inode flags that don't make sense together. */
1784 STATIC void
xrep_inode_flags(struct xfs_scrub * sc)1785 xrep_inode_flags(
1786 struct xfs_scrub *sc)
1787 {
1788 uint16_t mode;
1789
1790 trace_xrep_inode_flags(sc);
1791
1792 mode = VFS_I(sc->ip)->i_mode;
1793
1794 /* Clear junk flags */
1795 if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1796 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1797
1798 /* NEWRTBM only applies to realtime bitmaps */
1799 if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1800 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1801 else
1802 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1803
1804 /* These only make sense for directories. */
1805 if (!S_ISDIR(mode))
1806 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1807 XFS_DIFLAG_EXTSZINHERIT |
1808 XFS_DIFLAG_PROJINHERIT |
1809 XFS_DIFLAG_NOSYMLINKS);
1810
1811 /* These only make sense for files. */
1812 if (!S_ISREG(mode))
1813 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1814 XFS_DIFLAG_EXTSIZE);
1815
1816 /* These only make sense for non-rt files. */
1817 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1818 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1819
1820 /* Immutable and append only? Drop the append. */
1821 if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1822 (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1823 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1824
1825 /* Clear junk flags. */
1826 if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1827 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1828
1829 /* No reflink flag unless we support it and it's a file. */
1830 if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1831 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1832
1833 /* DAX only applies to files and dirs. */
1834 if (!(S_ISREG(mode) || S_ISDIR(mode)))
1835 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1836 }
1837
1838 /*
1839 * Fix size problems with block/node format directories. If we fail to find
1840 * the extent list, just bail out and let the bmapbtd repair functions clean
1841 * up that mess.
1842 */
1843 STATIC void
xrep_inode_blockdir_size(struct xfs_scrub * sc)1844 xrep_inode_blockdir_size(
1845 struct xfs_scrub *sc)
1846 {
1847 struct xfs_iext_cursor icur;
1848 struct xfs_bmbt_irec got;
1849 struct xfs_ifork *ifp;
1850 xfs_fileoff_t off;
1851 int error;
1852
1853 trace_xrep_inode_blockdir_size(sc);
1854
1855 error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1856 if (error)
1857 return;
1858
1859 /* Find the last block before 32G; this is the dir size. */
1860 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1861 off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1862 if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1863 /* zero-extents directory? */
1864 return;
1865 }
1866
1867 off = got.br_startoff + got.br_blockcount;
1868 sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1869 XFS_FSB_TO_B(sc->mp, off));
1870 }
1871
1872 /* Fix size problems with short format directories. */
1873 STATIC void
xrep_inode_sfdir_size(struct xfs_scrub * sc)1874 xrep_inode_sfdir_size(
1875 struct xfs_scrub *sc)
1876 {
1877 struct xfs_ifork *ifp;
1878
1879 trace_xrep_inode_sfdir_size(sc);
1880
1881 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1882 sc->ip->i_disk_size = ifp->if_bytes;
1883 }
1884
1885 /*
1886 * Fix any irregularities in a directory inode's size now that we can iterate
1887 * extent maps and access other regular inode data.
1888 */
1889 STATIC void
xrep_inode_dir_size(struct xfs_scrub * sc)1890 xrep_inode_dir_size(
1891 struct xfs_scrub *sc)
1892 {
1893 trace_xrep_inode_dir_size(sc);
1894
1895 switch (sc->ip->i_df.if_format) {
1896 case XFS_DINODE_FMT_EXTENTS:
1897 case XFS_DINODE_FMT_BTREE:
1898 xrep_inode_blockdir_size(sc);
1899 break;
1900 case XFS_DINODE_FMT_LOCAL:
1901 xrep_inode_sfdir_size(sc);
1902 break;
1903 }
1904 }
1905
1906 /* Fix extent size hint problems. */
1907 STATIC void
xrep_inode_extsize(struct xfs_scrub * sc)1908 xrep_inode_extsize(
1909 struct xfs_scrub *sc)
1910 {
1911 /* Fix misaligned extent size hints on a directory. */
1912 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1913 (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1914 xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1915 sc->ip->i_extsize = 0;
1916 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1917 }
1918 }
1919
1920 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1921 STATIC int
xrep_inode_pptr(struct xfs_scrub * sc)1922 xrep_inode_pptr(
1923 struct xfs_scrub *sc)
1924 {
1925 struct xfs_mount *mp = sc->mp;
1926 struct xfs_inode *ip = sc->ip;
1927 struct inode *inode = VFS_I(ip);
1928
1929 if (!xfs_has_parent(mp))
1930 return 0;
1931
1932 /*
1933 * Unlinked inodes that cannot be added to the directory tree will not
1934 * have a parent pointer.
1935 */
1936 if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
1937 return 0;
1938
1939 /* Children of the superblock do not have parent pointers. */
1940 if (xchk_inode_is_sb_rooted(ip))
1941 return 0;
1942
1943 /* Inode already has an attr fork; no further work possible here. */
1944 if (xfs_inode_has_attr_fork(ip))
1945 return 0;
1946
1947 return xfs_bmap_add_attrfork(sc->tp, ip,
1948 sizeof(struct xfs_attr_sf_hdr), true);
1949 }
1950
1951 /* Fix COW extent size hint problems. */
1952 STATIC void
xrep_inode_cowextsize(struct xfs_scrub * sc)1953 xrep_inode_cowextsize(
1954 struct xfs_scrub *sc)
1955 {
1956 /* Fix misaligned CoW extent size hints on a directory. */
1957 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1958 (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1959 sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) {
1960 sc->ip->i_cowextsize = 0;
1961 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
1962 }
1963 }
1964
1965 /* Fix any irregularities in an inode that the verifiers don't catch. */
1966 STATIC int
xrep_inode_problems(struct xfs_scrub * sc)1967 xrep_inode_problems(
1968 struct xfs_scrub *sc)
1969 {
1970 int error;
1971
1972 error = xrep_inode_blockcounts(sc);
1973 if (error)
1974 return error;
1975 error = xrep_inode_pptr(sc);
1976 if (error)
1977 return error;
1978 xrep_inode_timestamps(sc->ip);
1979 xrep_inode_flags(sc);
1980 xrep_inode_ids(sc);
1981 /*
1982 * We can now do a better job fixing the size of a directory now that
1983 * we can scan the data fork extents than we could in xrep_dinode_size.
1984 */
1985 if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1986 xrep_inode_dir_size(sc);
1987 xrep_inode_extsize(sc);
1988 xrep_inode_cowextsize(sc);
1989
1990 trace_xrep_inode_fixed(sc);
1991 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1992 return xrep_roll_trans(sc);
1993 }
1994
1995 /*
1996 * Make sure this inode's unlinked list pointers are consistent with its
1997 * link count.
1998 */
1999 STATIC int
xrep_inode_unlinked(struct xfs_scrub * sc)2000 xrep_inode_unlinked(
2001 struct xfs_scrub *sc)
2002 {
2003 unsigned int nlink = VFS_I(sc->ip)->i_nlink;
2004 int error;
2005
2006 /*
2007 * If this inode is linked from the directory tree and on the unlinked
2008 * list, remove it from the unlinked list.
2009 */
2010 if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
2011 struct xfs_perag *pag;
2012 int error;
2013
2014 pag = xfs_perag_get(sc->mp,
2015 XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
2016 error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
2017 xfs_perag_put(pag);
2018 if (error)
2019 return error;
2020 }
2021
2022 /*
2023 * If this inode is not linked from the directory tree yet not on the
2024 * unlinked list, put it on the unlinked list.
2025 */
2026 if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
2027 error = xfs_iunlink(sc->tp, sc->ip);
2028 if (error)
2029 return error;
2030 }
2031
2032 return 0;
2033 }
2034
2035 /* Repair an inode's fields. */
2036 int
xrep_inode(struct xfs_scrub * sc)2037 xrep_inode(
2038 struct xfs_scrub *sc)
2039 {
2040 int error = 0;
2041
2042 /*
2043 * No inode? That means we failed the _iget verifiers. Repair all
2044 * the things that the inode verifiers care about, then retry _iget.
2045 */
2046 if (!sc->ip) {
2047 struct xrep_inode *ri = sc->buf;
2048
2049 ASSERT(ri != NULL);
2050
2051 error = xrep_dinode_problems(ri);
2052 if (error == -EBUSY) {
2053 /*
2054 * Directory scan to recover inode mode encountered a
2055 * busy inode, so we did not continue repairing things.
2056 */
2057 return 0;
2058 }
2059 if (error)
2060 return error;
2061
2062 /* By this point we had better have a working incore inode. */
2063 if (!sc->ip)
2064 return -EFSCORRUPTED;
2065 }
2066
2067 xfs_trans_ijoin(sc->tp, sc->ip, 0);
2068
2069 /* If we found corruption of any kind, try to fix it. */
2070 if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
2071 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
2072 error = xrep_inode_problems(sc);
2073 if (error)
2074 return error;
2075 }
2076
2077 /* See if we can clear the reflink flag. */
2078 if (xfs_is_reflink_inode(sc->ip)) {
2079 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
2080 if (error)
2081 return error;
2082 }
2083
2084 /* Reconnect incore unlinked list */
2085 error = xrep_inode_unlinked(sc);
2086 if (error)
2087 return error;
2088
2089 return xrep_defer_finish(sc);
2090 }
2091