1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_defer.h"
13 #include "xfs_btree.h"
14 #include "xfs_bit.h"
15 #include "xfs_log_format.h"
16 #include "xfs_trans.h"
17 #include "xfs_sb.h"
18 #include "xfs_inode.h"
19 #include "xfs_icache.h"
20 #include "xfs_inode_buf.h"
21 #include "xfs_inode_fork.h"
22 #include "xfs_ialloc.h"
23 #include "xfs_da_format.h"
24 #include "xfs_reflink.h"
25 #include "xfs_alloc.h"
26 #include "xfs_rmap.h"
27 #include "xfs_rmap_btree.h"
28 #include "xfs_bmap.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_bmap_util.h"
31 #include "xfs_dir2.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_quota_defs.h"
34 #include "xfs_quota.h"
35 #include "xfs_ag.h"
36 #include "xfs_rtbitmap.h"
37 #include "xfs_attr_leaf.h"
38 #include "xfs_log_priv.h"
39 #include "xfs_health.h"
40 #include "xfs_symlink_remote.h"
41 #include "xfs_rtgroup.h"
42 #include "xfs_rtrmap_btree.h"
43 #include "xfs_rtrefcount_btree.h"
44 #include "scrub/xfs_scrub.h"
45 #include "scrub/scrub.h"
46 #include "scrub/common.h"
47 #include "scrub/btree.h"
48 #include "scrub/trace.h"
49 #include "scrub/repair.h"
50 #include "scrub/iscan.h"
51 #include "scrub/readdir.h"
52 #include "scrub/tempfile.h"
53
54 /*
55 * Inode Record Repair
56 * ===================
57 *
58 * Roughly speaking, inode problems can be classified based on whether or not
59 * they trip the dinode verifiers. If those trip, then we won't be able to
60 * xfs_iget ourselves the inode.
61 *
62 * Therefore, the xrep_dinode_* functions fix anything that will cause the
63 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions
64 * fix things on live incore inodes. The inode repair functions make decisions
65 * with security and usability implications when reviving a file:
66 *
67 * - Files with zero di_mode or a garbage di_mode are converted to regular file
68 * that only root can read. This file may not actually contain user data,
69 * if the file was not previously a regular file. Setuid and setgid bits
70 * are cleared.
71 *
72 * - Zero-size directories can be truncated to look empty. It is necessary to
73 * run the bmapbtd and directory repair functions to fully rebuild the
74 * directory.
75 *
76 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary
77 * to run the bmapbtd and symlink repair functions to salvage the symlink.
78 *
79 * - Invalid extent size hints will be removed.
80 *
81 * - Quotacheck will be scheduled if we repaired an inode that was so badly
82 * damaged that the ondisk inode had to be rebuilt.
83 *
84 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
85 * Setuid and setgid bits are cleared.
86 *
87 * - Data and attr forks are reset to extents format with zero extents if the
88 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta
89 * repair functions to recover the space mapping.
90 *
91 * - ACLs will not be recovered if the attr fork is zapped or the extended
92 * attribute structure itself requires salvaging.
93 *
94 * - If the attr fork is zapped, the user and group ids are reset to root and
95 * the setuid and setgid bits are removed.
96 */
97
98 /*
99 * All the information we need to repair the ondisk inode if we can't iget the
100 * incore inode. We don't allocate this buffer unless we're going to perform
101 * a repair to the ondisk inode cluster buffer.
102 */
103 struct xrep_inode {
104 /* Inode mapping that we saved from the initial lookup attempt. */
105 struct xfs_imap imap;
106
107 struct xfs_scrub *sc;
108
109 /* Blocks in use on the data device by data extents or bmbt blocks. */
110 xfs_rfsblock_t data_blocks;
111
112 /* Blocks in use on the rt device. */
113 xfs_rfsblock_t rt_blocks;
114
115 /* Blocks in use by the attr fork. */
116 xfs_rfsblock_t attr_blocks;
117
118 /* Number of data device extents for the data fork. */
119 xfs_extnum_t data_extents;
120
121 /*
122 * Number of realtime device extents for the data fork. If
123 * data_extents and rt_extents indicate that the data fork has extents
124 * on both devices, we'll just back away slowly.
125 */
126 xfs_extnum_t rt_extents;
127
128 /* Number of (data device) extents for the attr fork. */
129 xfs_aextnum_t attr_extents;
130
131 /* Sick state to set after zapping parts of the inode. */
132 unsigned int ino_sick_mask;
133
134 /* Must we remove all access from this file? */
135 bool zap_acls;
136
137 /* Inode scanner to see if we can find the ftype from dirents */
138 struct xchk_iscan ftype_iscan;
139 uint8_t alleged_ftype;
140 };
141
142 /*
143 * Setup function for inode repair. @imap contains the ondisk inode mapping
144 * information so that we can correct the ondisk inode cluster buffer if
145 * necessary to make iget work.
146 */
147 int
xrep_setup_inode(struct xfs_scrub * sc,const struct xfs_imap * imap)148 xrep_setup_inode(
149 struct xfs_scrub *sc,
150 const struct xfs_imap *imap)
151 {
152 struct xrep_inode *ri;
153
154 sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
155 if (!sc->buf)
156 return -ENOMEM;
157
158 ri = sc->buf;
159 memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
160 ri->sc = sc;
161 return 0;
162 }
163
164 /*
165 * Make sure this ondisk inode can pass the inode buffer verifier. This is
166 * not the same as the dinode verifier.
167 */
168 STATIC void
xrep_dinode_buf_core(struct xfs_scrub * sc,struct xfs_buf * bp,unsigned int ioffset)169 xrep_dinode_buf_core(
170 struct xfs_scrub *sc,
171 struct xfs_buf *bp,
172 unsigned int ioffset)
173 {
174 struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset);
175 struct xfs_trans *tp = sc->tp;
176 struct xfs_mount *mp = sc->mp;
177 xfs_agino_t agino;
178 bool crc_ok = false;
179 bool magic_ok = false;
180 bool unlinked_ok = false;
181
182 agino = be32_to_cpu(dip->di_next_unlinked);
183
184 if (xfs_verify_agino_or_null(bp->b_pag, agino))
185 unlinked_ok = true;
186
187 if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
188 xfs_dinode_good_version(mp, dip->di_version))
189 magic_ok = true;
190
191 if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
192 XFS_DINODE_CRC_OFF))
193 crc_ok = true;
194
195 if (magic_ok && unlinked_ok && crc_ok)
196 return;
197
198 if (!magic_ok) {
199 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
200 dip->di_version = 3;
201 }
202 if (!unlinked_ok)
203 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
204 xfs_dinode_calc_crc(mp, dip);
205 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
206 xfs_trans_log_buf(tp, bp, ioffset,
207 ioffset + sizeof(struct xfs_dinode) - 1);
208 }
209
210 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
211 STATIC void
xrep_dinode_buf(struct xfs_scrub * sc,struct xfs_buf * bp)212 xrep_dinode_buf(
213 struct xfs_scrub *sc,
214 struct xfs_buf *bp)
215 {
216 struct xfs_mount *mp = sc->mp;
217 int i;
218 int ni;
219
220 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
221 for (i = 0; i < ni; i++)
222 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
223 }
224
225 /* Reinitialize things that never change in an inode. */
226 STATIC void
xrep_dinode_header(struct xfs_scrub * sc,struct xfs_dinode * dip)227 xrep_dinode_header(
228 struct xfs_scrub *sc,
229 struct xfs_dinode *dip)
230 {
231 trace_xrep_dinode_header(sc, dip);
232
233 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
234 if (!xfs_dinode_good_version(sc->mp, dip->di_version))
235 dip->di_version = 3;
236 dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
237 uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
238 dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
239 }
240
241 /*
242 * If this directory entry points to the scrub target inode, then the directory
243 * we're scanning is the parent of the scrub target inode.
244 */
245 STATIC int
xrep_dinode_findmode_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)246 xrep_dinode_findmode_dirent(
247 struct xfs_scrub *sc,
248 struct xfs_inode *dp,
249 xfs_dir2_dataptr_t dapos,
250 const struct xfs_name *name,
251 xfs_ino_t ino,
252 void *priv)
253 {
254 struct xrep_inode *ri = priv;
255 int error = 0;
256
257 if (xchk_should_terminate(ri->sc, &error))
258 return error;
259
260 if (ino != sc->sm->sm_ino)
261 return 0;
262
263 /* Ignore garbage directory entry names. */
264 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
265 return -EFSCORRUPTED;
266
267 /* Don't pick up dot or dotdot entries; we only want child dirents. */
268 if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
269 xfs_dir2_samename(name, &xfs_name_dot))
270 return 0;
271
272 /*
273 * Uhoh, more than one parent for this inode and they don't agree on
274 * the file type?
275 */
276 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
277 ri->alleged_ftype != name->type) {
278 trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
279 ri->alleged_ftype);
280 return -EFSCORRUPTED;
281 }
282
283 /* We found a potential parent; remember the ftype. */
284 trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
285 ri->alleged_ftype = name->type;
286 return 0;
287 }
288
289 /* Try to lock a directory, or wait a jiffy. */
290 static inline int
xrep_dinode_ilock_nowait(struct xfs_inode * dp,unsigned int lock_mode)291 xrep_dinode_ilock_nowait(
292 struct xfs_inode *dp,
293 unsigned int lock_mode)
294 {
295 if (xfs_ilock_nowait(dp, lock_mode))
296 return true;
297
298 schedule_timeout_killable(1);
299 return false;
300 }
301
302 /*
303 * Try to lock a directory to look for ftype hints. Since we already hold the
304 * AGI buffer, we cannot block waiting for the ILOCK because rename can take
305 * the ILOCK and then try to lock AGIs.
306 */
307 STATIC int
xrep_dinode_trylock_directory(struct xrep_inode * ri,struct xfs_inode * dp,unsigned int * lock_modep)308 xrep_dinode_trylock_directory(
309 struct xrep_inode *ri,
310 struct xfs_inode *dp,
311 unsigned int *lock_modep)
312 {
313 unsigned long deadline = jiffies + msecs_to_jiffies(30000);
314 unsigned int lock_mode;
315 int error = 0;
316
317 do {
318 if (xchk_should_terminate(ri->sc, &error))
319 return error;
320
321 if (xfs_need_iread_extents(&dp->i_df))
322 lock_mode = XFS_ILOCK_EXCL;
323 else
324 lock_mode = XFS_ILOCK_SHARED;
325
326 if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
327 *lock_modep = lock_mode;
328 return 0;
329 }
330 } while (!time_is_before_jiffies(deadline));
331 return -EBUSY;
332 }
333
334 /*
335 * If this is a directory, walk the dirents looking for any that point to the
336 * scrub target inode.
337 */
338 STATIC int
xrep_dinode_findmode_walk_directory(struct xrep_inode * ri,struct xfs_inode * dp)339 xrep_dinode_findmode_walk_directory(
340 struct xrep_inode *ri,
341 struct xfs_inode *dp)
342 {
343 struct xfs_scrub *sc = ri->sc;
344 unsigned int lock_mode;
345 int error = 0;
346
347 /* Ignore temporary repair directories. */
348 if (xrep_is_tempfile(dp))
349 return 0;
350
351 /*
352 * Scan the directory to see if there it contains an entry pointing to
353 * the directory that we are repairing.
354 */
355 error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
356 if (error)
357 return error;
358
359 /*
360 * If this directory is known to be sick, we cannot scan it reliably
361 * and must abort.
362 */
363 if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
364 XFS_SICK_INO_BMBTD |
365 XFS_SICK_INO_DIR)) {
366 error = -EFSCORRUPTED;
367 goto out_unlock;
368 }
369
370 /*
371 * We cannot complete our parent pointer scan if a directory looks as
372 * though it has been zapped by the inode record repair code.
373 */
374 if (xchk_dir_looks_zapped(dp)) {
375 error = -EBUSY;
376 goto out_unlock;
377 }
378
379 error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
380 if (error)
381 goto out_unlock;
382
383 out_unlock:
384 xfs_iunlock(dp, lock_mode);
385 return error;
386 }
387
388 /*
389 * Try to find the mode of the inode being repaired by looking for directories
390 * that point down to this file.
391 */
392 STATIC int
xrep_dinode_find_mode(struct xrep_inode * ri,uint16_t * mode)393 xrep_dinode_find_mode(
394 struct xrep_inode *ri,
395 uint16_t *mode)
396 {
397 struct xfs_scrub *sc = ri->sc;
398 struct xfs_inode *dp;
399 int error;
400
401 /* No ftype means we have no other metadata to consult. */
402 if (!xfs_has_ftype(sc->mp)) {
403 *mode = S_IFREG;
404 return 0;
405 }
406
407 /*
408 * Scan all directories for parents that might point down to this
409 * inode. Skip the inode being repaired during the scan since it
410 * cannot be its own parent. Note that we still hold the AGI locked
411 * so there's a real possibility that _iscan_iter can return EBUSY.
412 */
413 xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
414 xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
415 ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
416 ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
417 while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
418 if (S_ISDIR(VFS_I(dp)->i_mode))
419 error = xrep_dinode_findmode_walk_directory(ri, dp);
420 xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
421 xchk_irele(sc, dp);
422 if (error < 0)
423 break;
424 if (xchk_should_terminate(sc, &error))
425 break;
426 }
427 xchk_iscan_iter_finish(&ri->ftype_iscan);
428 xchk_iscan_teardown(&ri->ftype_iscan);
429
430 if (error == -EBUSY) {
431 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
432 /*
433 * If we got an EBUSY after finding at least one
434 * dirent, that means the scan found an inode on the
435 * inactivation list and could not open it. Accept the
436 * alleged ftype and install a new mode below.
437 */
438 error = 0;
439 } else if (!(sc->flags & XCHK_TRY_HARDER)) {
440 /*
441 * Otherwise, retry the operation one time to see if
442 * the reason for the delay is an inode from the same
443 * cluster buffer waiting on the inactivation list.
444 */
445 error = -EDEADLOCK;
446 }
447 }
448 if (error)
449 return error;
450
451 /*
452 * Convert the discovered ftype into the file mode. If all else fails,
453 * return S_IFREG.
454 */
455 switch (ri->alleged_ftype) {
456 case XFS_DIR3_FT_DIR:
457 *mode = S_IFDIR;
458 break;
459 case XFS_DIR3_FT_WHT:
460 case XFS_DIR3_FT_CHRDEV:
461 *mode = S_IFCHR;
462 break;
463 case XFS_DIR3_FT_BLKDEV:
464 *mode = S_IFBLK;
465 break;
466 case XFS_DIR3_FT_FIFO:
467 *mode = S_IFIFO;
468 break;
469 case XFS_DIR3_FT_SOCK:
470 *mode = S_IFSOCK;
471 break;
472 case XFS_DIR3_FT_SYMLINK:
473 *mode = S_IFLNK;
474 break;
475 default:
476 *mode = S_IFREG;
477 break;
478 }
479 return 0;
480 }
481
482 /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */
483 STATIC int
xrep_dinode_mode(struct xrep_inode * ri,struct xfs_dinode * dip)484 xrep_dinode_mode(
485 struct xrep_inode *ri,
486 struct xfs_dinode *dip)
487 {
488 struct xfs_scrub *sc = ri->sc;
489 uint16_t mode = be16_to_cpu(dip->di_mode);
490 int error;
491
492 trace_xrep_dinode_mode(sc, dip);
493
494 if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
495 return 0;
496
497 /* Try to fix the mode. If we cannot, then leave everything alone. */
498 error = xrep_dinode_find_mode(ri, &mode);
499 switch (error) {
500 case -EINTR:
501 case -EBUSY:
502 case -EDEADLOCK:
503 /* temporary failure or fatal signal */
504 return error;
505 case 0:
506 /* found mode */
507 break;
508 default:
509 /* some other error, assume S_IFREG */
510 mode = S_IFREG;
511 break;
512 }
513
514 /* bad mode, so we set it to a file that only root can read */
515 dip->di_mode = cpu_to_be16(mode);
516 dip->di_uid = 0;
517 dip->di_gid = 0;
518 ri->zap_acls = true;
519 return 0;
520 }
521
522 /* Fix unused link count fields having nonzero values. */
523 STATIC void
xrep_dinode_nlinks(struct xfs_dinode * dip)524 xrep_dinode_nlinks(
525 struct xfs_dinode *dip)
526 {
527 if (dip->di_version < 2) {
528 dip->di_nlink = 0;
529 return;
530 }
531
532 if (xfs_dinode_is_metadir(dip)) {
533 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
534 dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
535 } else {
536 dip->di_metatype = 0;
537 }
538 }
539
540 /* Fix any conflicting flags that the verifiers complain about. */
541 STATIC void
xrep_dinode_flags(struct xfs_scrub * sc,struct xfs_dinode * dip,bool isrt)542 xrep_dinode_flags(
543 struct xfs_scrub *sc,
544 struct xfs_dinode *dip,
545 bool isrt)
546 {
547 struct xfs_mount *mp = sc->mp;
548 uint64_t flags2 = be64_to_cpu(dip->di_flags2);
549 uint16_t flags = be16_to_cpu(dip->di_flags);
550 uint16_t mode = be16_to_cpu(dip->di_mode);
551
552 trace_xrep_dinode_flags(sc, dip);
553
554 if (isrt)
555 flags |= XFS_DIFLAG_REALTIME;
556 else
557 flags &= ~XFS_DIFLAG_REALTIME;
558
559 /*
560 * For regular files on a reflink filesystem, set the REFLINK flag to
561 * protect shared extents. A later stage will actually check those
562 * extents and clear the flag if possible.
563 */
564 if (xfs_has_reflink(mp) && S_ISREG(mode))
565 flags2 |= XFS_DIFLAG2_REFLINK;
566 else
567 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
568 if (!xfs_has_bigtime(mp))
569 flags2 &= ~XFS_DIFLAG2_BIGTIME;
570 if (!xfs_has_large_extent_counts(mp))
571 flags2 &= ~XFS_DIFLAG2_NREXT64;
572 if (flags2 & XFS_DIFLAG2_NREXT64)
573 dip->di_nrext64_pad = 0;
574 else if (dip->di_version >= 3)
575 dip->di_v3_pad = 0;
576
577 if (flags2 & XFS_DIFLAG2_METADATA) {
578 xfs_failaddr_t fa;
579
580 fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
581 flags2);
582 if (fa)
583 flags2 &= ~XFS_DIFLAG2_METADATA;
584 }
585
586 dip->di_flags = cpu_to_be16(flags);
587 dip->di_flags2 = cpu_to_be64(flags2);
588 }
589
590 /*
591 * Blow out symlink; now it points nowhere. We don't have to worry about
592 * incore state because this inode is failing the verifiers.
593 */
594 STATIC void
xrep_dinode_zap_symlink(struct xrep_inode * ri,struct xfs_dinode * dip)595 xrep_dinode_zap_symlink(
596 struct xrep_inode *ri,
597 struct xfs_dinode *dip)
598 {
599 struct xfs_scrub *sc = ri->sc;
600 char *p;
601
602 trace_xrep_dinode_zap_symlink(sc, dip);
603
604 dip->di_format = XFS_DINODE_FMT_LOCAL;
605 dip->di_size = cpu_to_be64(1);
606 p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
607 *p = '?';
608 ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
609 }
610
611 /*
612 * Blow out dir, make the parent point to the root. In the future repair will
613 * reconstruct this directory for us. Note that there's no in-core directory
614 * inode because the sf verifier tripped, so we don't have to worry about the
615 * dentry cache.
616 */
617 STATIC void
xrep_dinode_zap_dir(struct xrep_inode * ri,struct xfs_dinode * dip)618 xrep_dinode_zap_dir(
619 struct xrep_inode *ri,
620 struct xfs_dinode *dip)
621 {
622 struct xfs_scrub *sc = ri->sc;
623 struct xfs_mount *mp = sc->mp;
624 struct xfs_dir2_sf_hdr *sfp;
625 int i8count;
626
627 trace_xrep_dinode_zap_dir(sc, dip);
628
629 dip->di_format = XFS_DINODE_FMT_LOCAL;
630 i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
631 sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
632 sfp->count = 0;
633 sfp->i8count = i8count;
634 xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
635 dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
636 ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
637 }
638
639 /* Make sure we don't have a garbage file size. */
640 STATIC void
xrep_dinode_size(struct xrep_inode * ri,struct xfs_dinode * dip)641 xrep_dinode_size(
642 struct xrep_inode *ri,
643 struct xfs_dinode *dip)
644 {
645 struct xfs_scrub *sc = ri->sc;
646 uint64_t size = be64_to_cpu(dip->di_size);
647 uint16_t mode = be16_to_cpu(dip->di_mode);
648
649 trace_xrep_dinode_size(sc, dip);
650
651 switch (mode & S_IFMT) {
652 case S_IFIFO:
653 case S_IFCHR:
654 case S_IFBLK:
655 case S_IFSOCK:
656 /* di_size can't be nonzero for special files */
657 dip->di_size = 0;
658 break;
659 case S_IFREG:
660 /* Regular files can't be larger than 2^63-1 bytes. */
661 dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
662 break;
663 case S_IFLNK:
664 /*
665 * Truncate ridiculously oversized symlinks. If the size is
666 * zero, reset it to point to the current directory. Both of
667 * these conditions trigger dinode verifier errors, so there
668 * is no in-core state to reset.
669 */
670 if (size > XFS_SYMLINK_MAXLEN)
671 dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
672 else if (size == 0)
673 xrep_dinode_zap_symlink(ri, dip);
674 break;
675 case S_IFDIR:
676 /*
677 * Directories can't have a size larger than 32G. If the size
678 * is zero, reset it to an empty directory. Both of these
679 * conditions trigger dinode verifier errors, so there is no
680 * in-core state to reset.
681 */
682 if (size > XFS_DIR2_SPACE_SIZE)
683 dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
684 else if (size == 0)
685 xrep_dinode_zap_dir(ri, dip);
686 break;
687 }
688 }
689
690 /* Fix extent size hints. */
691 STATIC void
xrep_dinode_extsize_hints(struct xfs_scrub * sc,struct xfs_dinode * dip)692 xrep_dinode_extsize_hints(
693 struct xfs_scrub *sc,
694 struct xfs_dinode *dip)
695 {
696 struct xfs_mount *mp = sc->mp;
697 uint64_t flags2 = be64_to_cpu(dip->di_flags2);
698 uint16_t flags = be16_to_cpu(dip->di_flags);
699 uint16_t mode = be16_to_cpu(dip->di_mode);
700
701 xfs_failaddr_t fa;
702
703 trace_xrep_dinode_extsize_hints(sc, dip);
704
705 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
706 mode, flags);
707 if (fa) {
708 dip->di_extsize = 0;
709 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
710 XFS_DIFLAG_EXTSZINHERIT);
711 }
712
713 if (dip->di_version < 3)
714 return;
715
716 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
717 mode, flags, flags2);
718 if (fa) {
719 dip->di_cowextsize = 0;
720 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
721 }
722 }
723
724 /* Count extents and blocks for an inode given an rmap. */
725 STATIC int
xrep_dinode_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)726 xrep_dinode_walk_rmap(
727 struct xfs_btree_cur *cur,
728 const struct xfs_rmap_irec *rec,
729 void *priv)
730 {
731 struct xrep_inode *ri = priv;
732 int error = 0;
733
734 if (xchk_should_terminate(ri->sc, &error))
735 return error;
736
737 /* We only care about this inode. */
738 if (rec->rm_owner != ri->sc->sm->sm_ino)
739 return 0;
740
741 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
742 ri->attr_blocks += rec->rm_blockcount;
743 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
744 ri->attr_extents++;
745
746 return 0;
747 }
748
749 ri->data_blocks += rec->rm_blockcount;
750 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
751 ri->data_extents++;
752
753 return 0;
754 }
755
756 /* Count extents and blocks for an inode from all AG rmap data. */
757 STATIC int
xrep_dinode_count_ag_rmaps(struct xrep_inode * ri,struct xfs_perag * pag)758 xrep_dinode_count_ag_rmaps(
759 struct xrep_inode *ri,
760 struct xfs_perag *pag)
761 {
762 struct xfs_btree_cur *cur;
763 struct xfs_buf *agf;
764 int error;
765
766 error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
767 if (error)
768 return error;
769
770 cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
771 error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
772 xfs_btree_del_cursor(cur, error);
773 xfs_trans_brelse(ri->sc->tp, agf);
774 return error;
775 }
776
777 /* Count extents and blocks for an inode given an rt rmap. */
778 STATIC int
xrep_dinode_walk_rtrmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)779 xrep_dinode_walk_rtrmap(
780 struct xfs_btree_cur *cur,
781 const struct xfs_rmap_irec *rec,
782 void *priv)
783 {
784 struct xrep_inode *ri = priv;
785 int error = 0;
786
787 if (xchk_should_terminate(ri->sc, &error))
788 return error;
789
790 /* We only care about this inode. */
791 if (rec->rm_owner != ri->sc->sm->sm_ino)
792 return 0;
793
794 if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))
795 return -EFSCORRUPTED;
796
797 ri->rt_blocks += rec->rm_blockcount;
798 ri->rt_extents++;
799 return 0;
800 }
801
802 /* Count extents and blocks for an inode from all realtime rmap data. */
803 STATIC int
xrep_dinode_count_rtgroup_rmaps(struct xrep_inode * ri,struct xfs_rtgroup * rtg)804 xrep_dinode_count_rtgroup_rmaps(
805 struct xrep_inode *ri,
806 struct xfs_rtgroup *rtg)
807 {
808 struct xfs_scrub *sc = ri->sc;
809 int error;
810
811 error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP);
812 if (error)
813 return error;
814
815 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap,
816 ri);
817 xchk_rtgroup_btcur_free(&sc->sr);
818 xchk_rtgroup_free(sc, &sc->sr);
819 return error;
820 }
821
822 /* Count extents and blocks for a given inode from all rmap data. */
823 STATIC int
xrep_dinode_count_rmaps(struct xrep_inode * ri)824 xrep_dinode_count_rmaps(
825 struct xrep_inode *ri)
826 {
827 struct xfs_perag *pag = NULL;
828 struct xfs_rtgroup *rtg = NULL;
829 int error;
830
831 if (!xfs_has_rmapbt(ri->sc->mp))
832 return -EOPNOTSUPP;
833
834 while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) {
835 error = xrep_dinode_count_rtgroup_rmaps(ri, rtg);
836 if (error) {
837 xfs_rtgroup_rele(rtg);
838 return error;
839 }
840 }
841
842 while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
843 error = xrep_dinode_count_ag_rmaps(ri, pag);
844 if (error) {
845 xfs_perag_rele(pag);
846 return error;
847 }
848 }
849
850 /* Can't have extents on both the rt and the data device. */
851 if (ri->data_extents && ri->rt_extents)
852 return -EFSCORRUPTED;
853
854 trace_xrep_dinode_count_rmaps(ri->sc,
855 ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
856 ri->data_extents, ri->rt_extents, ri->attr_extents);
857 return 0;
858 }
859
860 /* Return true if this extents-format ifork looks like garbage. */
861 STATIC bool
xrep_dinode_bad_extents_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)862 xrep_dinode_bad_extents_fork(
863 struct xfs_scrub *sc,
864 struct xfs_dinode *dip,
865 unsigned int dfork_size,
866 int whichfork)
867 {
868 struct xfs_bmbt_irec new;
869 struct xfs_bmbt_rec *dp;
870 xfs_extnum_t nex;
871 bool isrt;
872 unsigned int i;
873
874 nex = xfs_dfork_nextents(dip, whichfork);
875 if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
876 return true;
877
878 dp = XFS_DFORK_PTR(dip, whichfork);
879
880 isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
881 for (i = 0; i < nex; i++, dp++) {
882 xfs_failaddr_t fa;
883
884 xfs_bmbt_disk_get_all(dp, &new);
885 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
886 &new);
887 if (fa)
888 return true;
889 }
890
891 return false;
892 }
893
894 /* Return true if this btree-format ifork looks like garbage. */
895 STATIC bool
xrep_dinode_bad_bmbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)896 xrep_dinode_bad_bmbt_fork(
897 struct xfs_scrub *sc,
898 struct xfs_dinode *dip,
899 unsigned int dfork_size,
900 int whichfork)
901 {
902 struct xfs_bmdr_block *dfp;
903 xfs_extnum_t nex;
904 unsigned int i;
905 unsigned int dmxr;
906 unsigned int nrecs;
907 unsigned int level;
908
909 nex = xfs_dfork_nextents(dip, whichfork);
910 if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
911 return true;
912
913 if (dfork_size < sizeof(struct xfs_bmdr_block))
914 return true;
915
916 dfp = XFS_DFORK_PTR(dip, whichfork);
917 nrecs = be16_to_cpu(dfp->bb_numrecs);
918 level = be16_to_cpu(dfp->bb_level);
919
920 if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
921 return true;
922 if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
923 return true;
924
925 dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
926 for (i = 1; i <= nrecs; i++) {
927 struct xfs_bmbt_key *fkp;
928 xfs_bmbt_ptr_t *fpp;
929 xfs_fileoff_t fileoff;
930 xfs_fsblock_t fsbno;
931
932 fkp = xfs_bmdr_key_addr(dfp, i);
933 fileoff = be64_to_cpu(fkp->br_startoff);
934 if (!xfs_verify_fileoff(sc->mp, fileoff))
935 return true;
936
937 fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
938 fsbno = be64_to_cpu(*fpp);
939 if (!xfs_verify_fsbno(sc->mp, fsbno))
940 return true;
941 }
942
943 return false;
944 }
945
946 /* Return true if this rmap-format ifork looks like garbage. */
947 STATIC bool
xrep_dinode_bad_rtrmapbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size)948 xrep_dinode_bad_rtrmapbt_fork(
949 struct xfs_scrub *sc,
950 struct xfs_dinode *dip,
951 unsigned int dfork_size)
952 {
953 struct xfs_rtrmap_root *dfp;
954 unsigned int nrecs;
955 unsigned int level;
956
957 if (dfork_size < sizeof(struct xfs_rtrmap_root))
958 return true;
959
960 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
961 nrecs = be16_to_cpu(dfp->bb_numrecs);
962 level = be16_to_cpu(dfp->bb_level);
963
964 if (level > sc->mp->m_rtrmap_maxlevels)
965 return true;
966 if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size)
967 return true;
968 if (level > 0 && nrecs == 0)
969 return true;
970
971 return false;
972 }
973
974 /* Return true if this refcount-format ifork looks like garbage. */
975 STATIC bool
xrep_dinode_bad_rtrefcountbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size)976 xrep_dinode_bad_rtrefcountbt_fork(
977 struct xfs_scrub *sc,
978 struct xfs_dinode *dip,
979 unsigned int dfork_size)
980 {
981 struct xfs_rtrefcount_root *dfp;
982 unsigned int nrecs;
983 unsigned int level;
984
985 if (dfork_size < sizeof(struct xfs_rtrefcount_root))
986 return true;
987
988 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
989 nrecs = be16_to_cpu(dfp->bb_numrecs);
990 level = be16_to_cpu(dfp->bb_level);
991
992 if (level > sc->mp->m_rtrefc_maxlevels)
993 return true;
994 if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size)
995 return true;
996 if (level > 0 && nrecs == 0)
997 return true;
998
999 return false;
1000 }
1001
1002 /* Check a metadata-btree fork. */
1003 STATIC bool
xrep_dinode_bad_metabt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)1004 xrep_dinode_bad_metabt_fork(
1005 struct xfs_scrub *sc,
1006 struct xfs_dinode *dip,
1007 unsigned int dfork_size,
1008 int whichfork)
1009 {
1010 if (whichfork != XFS_DATA_FORK)
1011 return true;
1012
1013 switch (be16_to_cpu(dip->di_metatype)) {
1014 case XFS_METAFILE_RTRMAP:
1015 return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size);
1016 case XFS_METAFILE_RTREFCOUNT:
1017 return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size);
1018 default:
1019 return true;
1020 }
1021
1022 return false;
1023 }
1024
1025 /*
1026 * Check the data fork for things that will fail the ifork verifiers or the
1027 * ifork formatters.
1028 */
1029 STATIC bool
xrep_dinode_check_dfork(struct xfs_scrub * sc,struct xfs_dinode * dip,uint16_t mode)1030 xrep_dinode_check_dfork(
1031 struct xfs_scrub *sc,
1032 struct xfs_dinode *dip,
1033 uint16_t mode)
1034 {
1035 void *dfork_ptr;
1036 int64_t data_size;
1037 unsigned int fmt;
1038 unsigned int dfork_size;
1039
1040 /*
1041 * Verifier functions take signed int64_t, so check for bogus negative
1042 * values first.
1043 */
1044 data_size = be64_to_cpu(dip->di_size);
1045 if (data_size < 0)
1046 return true;
1047
1048 fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
1049 switch (mode & S_IFMT) {
1050 case S_IFIFO:
1051 case S_IFCHR:
1052 case S_IFBLK:
1053 case S_IFSOCK:
1054 if (fmt != XFS_DINODE_FMT_DEV)
1055 return true;
1056 break;
1057 case S_IFREG:
1058 switch (fmt) {
1059 case XFS_DINODE_FMT_LOCAL:
1060 return true;
1061 case XFS_DINODE_FMT_EXTENTS:
1062 case XFS_DINODE_FMT_BTREE:
1063 case XFS_DINODE_FMT_META_BTREE:
1064 break;
1065 default:
1066 return true;
1067 }
1068 break;
1069 case S_IFLNK:
1070 case S_IFDIR:
1071 switch (fmt) {
1072 case XFS_DINODE_FMT_LOCAL:
1073 case XFS_DINODE_FMT_EXTENTS:
1074 case XFS_DINODE_FMT_BTREE:
1075 break;
1076 default:
1077 return true;
1078 }
1079 break;
1080 default:
1081 return true;
1082 }
1083
1084 dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
1085 dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1086
1087 switch (fmt) {
1088 case XFS_DINODE_FMT_DEV:
1089 break;
1090 case XFS_DINODE_FMT_LOCAL:
1091 /* dir/symlink structure cannot be larger than the fork */
1092 if (data_size > dfork_size)
1093 return true;
1094 /* directory structure must pass verification. */
1095 if (S_ISDIR(mode) &&
1096 xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
1097 return true;
1098 /* symlink structure must pass verification. */
1099 if (S_ISLNK(mode) &&
1100 xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
1101 return true;
1102 break;
1103 case XFS_DINODE_FMT_EXTENTS:
1104 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
1105 XFS_DATA_FORK))
1106 return true;
1107 break;
1108 case XFS_DINODE_FMT_BTREE:
1109 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
1110 XFS_DATA_FORK))
1111 return true;
1112 break;
1113 case XFS_DINODE_FMT_META_BTREE:
1114 if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size,
1115 XFS_DATA_FORK))
1116 return true;
1117 break;
1118 default:
1119 return true;
1120 }
1121
1122 return false;
1123 }
1124
1125 static void
xrep_dinode_set_data_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)1126 xrep_dinode_set_data_nextents(
1127 struct xfs_dinode *dip,
1128 xfs_extnum_t nextents)
1129 {
1130 if (xfs_dinode_has_large_extent_counts(dip))
1131 dip->di_big_nextents = cpu_to_be64(nextents);
1132 else
1133 dip->di_nextents = cpu_to_be32(nextents);
1134 }
1135
1136 static void
xrep_dinode_set_attr_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)1137 xrep_dinode_set_attr_nextents(
1138 struct xfs_dinode *dip,
1139 xfs_extnum_t nextents)
1140 {
1141 if (xfs_dinode_has_large_extent_counts(dip))
1142 dip->di_big_anextents = cpu_to_be32(nextents);
1143 else
1144 dip->di_anextents = cpu_to_be16(nextents);
1145 }
1146
1147 /* Reset the data fork to something sane. */
1148 STATIC void
xrep_dinode_zap_dfork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1149 xrep_dinode_zap_dfork(
1150 struct xrep_inode *ri,
1151 struct xfs_dinode *dip,
1152 uint16_t mode)
1153 {
1154 struct xfs_scrub *sc = ri->sc;
1155
1156 trace_xrep_dinode_zap_dfork(sc, dip);
1157
1158 ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
1159
1160 xrep_dinode_set_data_nextents(dip, 0);
1161 ri->data_blocks = 0;
1162 ri->rt_blocks = 0;
1163
1164 /* Special files always get reset to DEV */
1165 switch (mode & S_IFMT) {
1166 case S_IFIFO:
1167 case S_IFCHR:
1168 case S_IFBLK:
1169 case S_IFSOCK:
1170 dip->di_format = XFS_DINODE_FMT_DEV;
1171 dip->di_size = 0;
1172 return;
1173 }
1174
1175 /*
1176 * If we have data extents, reset to an empty map and hope the user
1177 * will run the bmapbtd checker next.
1178 */
1179 if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
1180 dip->di_format = XFS_DINODE_FMT_EXTENTS;
1181 return;
1182 }
1183
1184 /* Otherwise, reset the local format to the minimum. */
1185 switch (mode & S_IFMT) {
1186 case S_IFLNK:
1187 xrep_dinode_zap_symlink(ri, dip);
1188 break;
1189 case S_IFDIR:
1190 xrep_dinode_zap_dir(ri, dip);
1191 break;
1192 }
1193 }
1194
1195 /*
1196 * Check the attr fork for things that will fail the ifork verifiers or the
1197 * ifork formatters.
1198 */
1199 STATIC bool
xrep_dinode_check_afork(struct xfs_scrub * sc,struct xfs_dinode * dip)1200 xrep_dinode_check_afork(
1201 struct xfs_scrub *sc,
1202 struct xfs_dinode *dip)
1203 {
1204 struct xfs_attr_sf_hdr *afork_ptr;
1205 size_t attr_size;
1206 unsigned int afork_size;
1207
1208 if (XFS_DFORK_BOFF(dip) == 0)
1209 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
1210 xfs_dfork_attr_extents(dip) != 0;
1211
1212 afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1213 afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1214
1215 switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
1216 case XFS_DINODE_FMT_LOCAL:
1217 /* Fork has to be large enough to extract the xattr size. */
1218 if (afork_size < sizeof(struct xfs_attr_sf_hdr))
1219 return true;
1220
1221 /* xattr structure cannot be larger than the fork */
1222 attr_size = be16_to_cpu(afork_ptr->totsize);
1223 if (attr_size > afork_size)
1224 return true;
1225
1226 /* xattr structure must pass verification. */
1227 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1228 case XFS_DINODE_FMT_EXTENTS:
1229 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1230 XFS_ATTR_FORK))
1231 return true;
1232 break;
1233 case XFS_DINODE_FMT_BTREE:
1234 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1235 XFS_ATTR_FORK))
1236 return true;
1237 break;
1238 case XFS_DINODE_FMT_META_BTREE:
1239 if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
1240 XFS_ATTR_FORK))
1241 return true;
1242 break;
1243 default:
1244 return true;
1245 }
1246
1247 return false;
1248 }
1249
1250 /*
1251 * Reset the attr fork to empty. Since the attr fork could have contained
1252 * ACLs, make the file readable only by root.
1253 */
1254 STATIC void
xrep_dinode_zap_afork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1255 xrep_dinode_zap_afork(
1256 struct xrep_inode *ri,
1257 struct xfs_dinode *dip,
1258 uint16_t mode)
1259 {
1260 struct xfs_scrub *sc = ri->sc;
1261
1262 trace_xrep_dinode_zap_afork(sc, dip);
1263
1264 ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1265
1266 dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1267 xrep_dinode_set_attr_nextents(dip, 0);
1268 ri->attr_blocks = 0;
1269
1270 /*
1271 * If the data fork is in btree format, removing the attr fork entirely
1272 * might cause verifier failures if the next level down in the bmbt
1273 * could now fit in the data fork area.
1274 */
1275 if (dip->di_format != XFS_DINODE_FMT_BTREE)
1276 dip->di_forkoff = 0;
1277 dip->di_mode = cpu_to_be16(mode & ~0777);
1278 dip->di_uid = 0;
1279 dip->di_gid = 0;
1280 }
1281
1282 /* Make sure the fork offset is a sensible value. */
1283 STATIC void
xrep_dinode_ensure_forkoff(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1284 xrep_dinode_ensure_forkoff(
1285 struct xrep_inode *ri,
1286 struct xfs_dinode *dip,
1287 uint16_t mode)
1288 {
1289 struct xfs_bmdr_block *bmdr;
1290 struct xfs_rtrmap_root *rmdr;
1291 struct xfs_rtrefcount_root *rcdr;
1292 struct xfs_scrub *sc = ri->sc;
1293 xfs_extnum_t attr_extents, data_extents;
1294 size_t bmdr_minsz = xfs_bmdr_space_calc(1);
1295 unsigned int lit_sz = XFS_LITINO(sc->mp);
1296 unsigned int afork_min, dfork_min;
1297
1298 trace_xrep_dinode_ensure_forkoff(sc, dip);
1299
1300 /*
1301 * Before calling this function, xrep_dinode_core ensured that both
1302 * forks actually fit inside their respective literal areas. If this
1303 * was not the case, the fork was reset to FMT_EXTENTS with zero
1304 * records. If the rmapbt scan found attr or data fork blocks, this
1305 * will be noted in the dinode_stats, and we must leave enough room
1306 * for the bmap repair code to reconstruct the mapping structure.
1307 *
1308 * First, compute the minimum space required for the attr fork.
1309 */
1310 switch (dip->di_aformat) {
1311 case XFS_DINODE_FMT_LOCAL:
1312 /*
1313 * If we still have a shortform xattr structure at all, that
1314 * means the attr fork area was exactly large enough to fit
1315 * the sf structure.
1316 */
1317 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1318 break;
1319 case XFS_DINODE_FMT_EXTENTS:
1320 attr_extents = xfs_dfork_attr_extents(dip);
1321 if (attr_extents) {
1322 /*
1323 * We must maintain sufficient space to hold the entire
1324 * extent map array in the data fork. Note that we
1325 * previously zapped the fork if it had no chance of
1326 * fitting in the inode.
1327 */
1328 afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1329 } else if (ri->attr_extents > 0) {
1330 /*
1331 * The attr fork thinks it has zero extents, but we
1332 * found some xattr extents. We need to leave enough
1333 * empty space here so that the incore attr fork will
1334 * get created (and hence trigger the attr fork bmap
1335 * repairer).
1336 */
1337 afork_min = bmdr_minsz;
1338 } else {
1339 /* No extents on disk or found in rmapbt. */
1340 afork_min = 0;
1341 }
1342 break;
1343 case XFS_DINODE_FMT_BTREE:
1344 /* Must have space for btree header and key/pointers. */
1345 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1346 afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1347 break;
1348 default:
1349 /* We should never see any other formats. */
1350 afork_min = 0;
1351 break;
1352 }
1353
1354 /* Compute the minimum space required for the data fork. */
1355 switch (dip->di_format) {
1356 case XFS_DINODE_FMT_DEV:
1357 dfork_min = sizeof(__be32);
1358 break;
1359 case XFS_DINODE_FMT_UUID:
1360 dfork_min = sizeof(uuid_t);
1361 break;
1362 case XFS_DINODE_FMT_LOCAL:
1363 /*
1364 * If we still have a shortform data fork at all, that means
1365 * the data fork area was large enough to fit whatever was in
1366 * there.
1367 */
1368 dfork_min = be64_to_cpu(dip->di_size);
1369 break;
1370 case XFS_DINODE_FMT_EXTENTS:
1371 data_extents = xfs_dfork_data_extents(dip);
1372 if (data_extents) {
1373 /*
1374 * We must maintain sufficient space to hold the entire
1375 * extent map array in the data fork. Note that we
1376 * previously zapped the fork if it had no chance of
1377 * fitting in the inode.
1378 */
1379 dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1380 } else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1381 /*
1382 * The data fork thinks it has zero extents, but we
1383 * found some data extents. We need to leave enough
1384 * empty space here so that the data fork bmap repair
1385 * will recover the mappings.
1386 */
1387 dfork_min = bmdr_minsz;
1388 } else {
1389 /* No extents on disk or found in rmapbt. */
1390 dfork_min = 0;
1391 }
1392 break;
1393 case XFS_DINODE_FMT_BTREE:
1394 /* Must have space for btree header and key/pointers. */
1395 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1396 dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1397 break;
1398 case XFS_DINODE_FMT_META_BTREE:
1399 switch (be16_to_cpu(dip->di_metatype)) {
1400 case XFS_METAFILE_RTRMAP:
1401 rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1402 dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr);
1403 break;
1404 case XFS_METAFILE_RTREFCOUNT:
1405 rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1406 dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr);
1407 break;
1408 default:
1409 dfork_min = 0;
1410 break;
1411 }
1412 break;
1413 default:
1414 dfork_min = 0;
1415 break;
1416 }
1417
1418 /*
1419 * Round all values up to the nearest 8 bytes, because that is the
1420 * precision of di_forkoff.
1421 */
1422 afork_min = roundup(afork_min, 8);
1423 dfork_min = roundup(dfork_min, 8);
1424 bmdr_minsz = roundup(bmdr_minsz, 8);
1425
1426 ASSERT(dfork_min <= lit_sz);
1427 ASSERT(afork_min <= lit_sz);
1428
1429 /*
1430 * If the data fork was zapped and we don't have enough space for the
1431 * recovery fork, move the attr fork up.
1432 */
1433 if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1434 xfs_dfork_data_extents(dip) == 0 &&
1435 (ri->data_extents > 0 || ri->rt_extents > 0) &&
1436 bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1437 if (bmdr_minsz + afork_min > lit_sz) {
1438 /*
1439 * The attr for and the stub fork we need to recover
1440 * the data fork won't both fit. Zap the attr fork.
1441 */
1442 xrep_dinode_zap_afork(ri, dip, mode);
1443 afork_min = bmdr_minsz;
1444 } else {
1445 void *before, *after;
1446
1447 /* Otherwise, just slide the attr fork up. */
1448 before = XFS_DFORK_APTR(dip);
1449 dip->di_forkoff = bmdr_minsz >> 3;
1450 after = XFS_DFORK_APTR(dip);
1451 memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1452 }
1453 }
1454
1455 /*
1456 * If the attr fork was zapped and we don't have enough space for the
1457 * recovery fork, move the attr fork down.
1458 */
1459 if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1460 xfs_dfork_attr_extents(dip) == 0 &&
1461 ri->attr_extents > 0 &&
1462 bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1463 if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1464 /*
1465 * If the data fork is in btree format then we can't
1466 * adjust forkoff because that runs the risk of
1467 * violating the extents/btree format transition rules.
1468 */
1469 } else if (bmdr_minsz + dfork_min > lit_sz) {
1470 /*
1471 * If we can't move the attr fork, too bad, we lose the
1472 * attr fork and leak its blocks.
1473 */
1474 xrep_dinode_zap_afork(ri, dip, mode);
1475 } else {
1476 /*
1477 * Otherwise, just slide the attr fork down. The attr
1478 * fork is empty, so we don't have any old contents to
1479 * move here.
1480 */
1481 dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1482 }
1483 }
1484 }
1485
1486 /*
1487 * Zap the data/attr forks if we spot anything that isn't going to pass the
1488 * ifork verifiers or the ifork formatters, because we need to get the inode
1489 * into good enough shape that the higher level repair functions can run.
1490 */
1491 STATIC void
xrep_dinode_zap_forks(struct xrep_inode * ri,struct xfs_dinode * dip)1492 xrep_dinode_zap_forks(
1493 struct xrep_inode *ri,
1494 struct xfs_dinode *dip)
1495 {
1496 struct xfs_scrub *sc = ri->sc;
1497 xfs_extnum_t data_extents;
1498 xfs_extnum_t attr_extents;
1499 xfs_filblks_t nblocks;
1500 uint16_t mode;
1501 bool zap_datafork = false;
1502 bool zap_attrfork = ri->zap_acls;
1503
1504 trace_xrep_dinode_zap_forks(sc, dip);
1505
1506 mode = be16_to_cpu(dip->di_mode);
1507
1508 data_extents = xfs_dfork_data_extents(dip);
1509 attr_extents = xfs_dfork_attr_extents(dip);
1510 nblocks = be64_to_cpu(dip->di_nblocks);
1511
1512 /* Inode counters don't make sense? */
1513 if (data_extents > nblocks)
1514 zap_datafork = true;
1515 if (attr_extents > nblocks)
1516 zap_attrfork = true;
1517 if (data_extents + attr_extents > nblocks)
1518 zap_datafork = zap_attrfork = true;
1519
1520 if (!zap_datafork)
1521 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1522 if (!zap_attrfork)
1523 zap_attrfork = xrep_dinode_check_afork(sc, dip);
1524
1525 /* Zap whatever's bad. */
1526 if (zap_attrfork)
1527 xrep_dinode_zap_afork(ri, dip, mode);
1528 if (zap_datafork)
1529 xrep_dinode_zap_dfork(ri, dip, mode);
1530 xrep_dinode_ensure_forkoff(ri, dip, mode);
1531
1532 /*
1533 * Zero di_nblocks if we don't have any extents at all to satisfy the
1534 * buffer verifier.
1535 */
1536 data_extents = xfs_dfork_data_extents(dip);
1537 attr_extents = xfs_dfork_attr_extents(dip);
1538 if (data_extents + attr_extents == 0)
1539 dip->di_nblocks = 0;
1540 }
1541
1542 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1543 STATIC int
xrep_dinode_core(struct xrep_inode * ri)1544 xrep_dinode_core(
1545 struct xrep_inode *ri)
1546 {
1547 struct xfs_scrub *sc = ri->sc;
1548 struct xfs_buf *bp;
1549 struct xfs_dinode *dip;
1550 xfs_ino_t ino = sc->sm->sm_ino;
1551 int error;
1552 int iget_error;
1553
1554 /* Figure out what this inode had mapped in both forks. */
1555 error = xrep_dinode_count_rmaps(ri);
1556 if (error)
1557 return error;
1558
1559 /* Read the inode cluster buffer. */
1560 error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1561 ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1562 NULL);
1563 if (error)
1564 return error;
1565
1566 /* Make sure we can pass the inode buffer verifier. */
1567 xrep_dinode_buf(sc, bp);
1568 bp->b_ops = &xfs_inode_buf_ops;
1569
1570 /* Fix everything the verifier will complain about. */
1571 dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1572 xrep_dinode_header(sc, dip);
1573 iget_error = xrep_dinode_mode(ri, dip);
1574 if (iget_error)
1575 goto write;
1576 xrep_dinode_nlinks(dip);
1577 xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1578 xrep_dinode_size(ri, dip);
1579 xrep_dinode_extsize_hints(sc, dip);
1580 xrep_dinode_zap_forks(ri, dip);
1581
1582 write:
1583 /* Write out the inode. */
1584 trace_xrep_dinode_fixed(sc, dip);
1585 xfs_dinode_calc_crc(sc->mp, dip);
1586 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1587 xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1588 ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1589
1590 /*
1591 * In theory, we've fixed the ondisk inode record enough that we should
1592 * be able to load the inode into the cache. Try to iget that inode
1593 * now while we hold the AGI and the inode cluster buffer and take the
1594 * IOLOCK so that we can continue with repairs without anyone else
1595 * accessing the inode. If iget fails, we still need to commit the
1596 * changes.
1597 */
1598 if (!iget_error)
1599 iget_error = xchk_iget(sc, ino, &sc->ip);
1600 if (!iget_error)
1601 xchk_ilock(sc, XFS_IOLOCK_EXCL);
1602
1603 /*
1604 * Commit the inode cluster buffer updates and drop the AGI buffer that
1605 * we've been holding since scrub setup. From here on out, repairs
1606 * deal only with the cached inode.
1607 */
1608 error = xrep_trans_commit(sc);
1609 if (error)
1610 return error;
1611
1612 if (iget_error)
1613 return iget_error;
1614
1615 error = xchk_trans_alloc(sc, 0);
1616 if (error)
1617 return error;
1618
1619 error = xrep_ino_dqattach(sc);
1620 if (error)
1621 return error;
1622
1623 xchk_ilock(sc, XFS_ILOCK_EXCL);
1624 if (ri->ino_sick_mask)
1625 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1626 return 0;
1627 }
1628
1629 /* Fix everything xfs_dinode_verify cares about. */
1630 STATIC int
xrep_dinode_problems(struct xrep_inode * ri)1631 xrep_dinode_problems(
1632 struct xrep_inode *ri)
1633 {
1634 struct xfs_scrub *sc = ri->sc;
1635 int error;
1636
1637 error = xrep_dinode_core(ri);
1638 if (error)
1639 return error;
1640
1641 /* We had to fix a totally busted inode, schedule quotacheck. */
1642 if (XFS_IS_UQUOTA_ON(sc->mp))
1643 xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1644 if (XFS_IS_GQUOTA_ON(sc->mp))
1645 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1646 if (XFS_IS_PQUOTA_ON(sc->mp))
1647 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1648
1649 return 0;
1650 }
1651
1652 /*
1653 * Fix problems that the verifiers don't care about. In general these are
1654 * errors that don't cause problems elsewhere in the kernel that we can easily
1655 * detect, so we don't check them all that rigorously.
1656 */
1657
1658 /* Make sure block and extent counts are ok. */
1659 STATIC int
xrep_inode_blockcounts(struct xfs_scrub * sc)1660 xrep_inode_blockcounts(
1661 struct xfs_scrub *sc)
1662 {
1663 struct xfs_ifork *ifp;
1664 xfs_filblks_t count;
1665 xfs_filblks_t acount;
1666 xfs_extnum_t nextents;
1667 int error;
1668
1669 trace_xrep_inode_blockcounts(sc);
1670
1671 /* Set data fork counters from the data fork mappings. */
1672 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
1673 if (error)
1674 return error;
1675 if (xfs_is_reflink_inode(sc->ip)) {
1676 /*
1677 * data fork blockcount can exceed physical storage if a user
1678 * reflinks the same block over and over again.
1679 */
1680 ;
1681 } else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1682 if (count >= sc->mp->m_sb.sb_rblocks)
1683 return -EFSCORRUPTED;
1684 } else {
1685 if (count >= sc->mp->m_sb.sb_dblocks)
1686 return -EFSCORRUPTED;
1687 }
1688 error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1689 if (error)
1690 return error;
1691 sc->ip->i_df.if_nextents = nextents;
1692
1693 /* Set attr fork counters from the attr fork mappings. */
1694 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1695 if (ifp) {
1696 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents,
1697 &acount);
1698 if (error)
1699 return error;
1700 if (count >= sc->mp->m_sb.sb_dblocks)
1701 return -EFSCORRUPTED;
1702 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1703 nextents);
1704 if (error)
1705 return error;
1706 ifp->if_nextents = nextents;
1707 } else {
1708 acount = 0;
1709 }
1710
1711 sc->ip->i_nblocks = count + acount;
1712 return 0;
1713 }
1714
1715 /* Check for invalid uid/gid/prid. */
1716 STATIC void
xrep_inode_ids(struct xfs_scrub * sc)1717 xrep_inode_ids(
1718 struct xfs_scrub *sc)
1719 {
1720 bool dirty = false;
1721
1722 trace_xrep_inode_ids(sc);
1723
1724 if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1725 i_uid_write(VFS_I(sc->ip), 0);
1726 dirty = true;
1727 if (XFS_IS_UQUOTA_ON(sc->mp))
1728 xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1729 }
1730
1731 if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1732 i_gid_write(VFS_I(sc->ip), 0);
1733 dirty = true;
1734 if (XFS_IS_GQUOTA_ON(sc->mp))
1735 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1736 }
1737
1738 if (sc->ip->i_projid == -1U) {
1739 sc->ip->i_projid = 0;
1740 dirty = true;
1741 if (XFS_IS_PQUOTA_ON(sc->mp))
1742 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1743 }
1744
1745 /* strip setuid/setgid if we touched any of the ids */
1746 if (dirty)
1747 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1748 }
1749
1750 static inline void
xrep_clamp_timestamp(struct xfs_inode * ip,struct timespec64 * ts)1751 xrep_clamp_timestamp(
1752 struct xfs_inode *ip,
1753 struct timespec64 *ts)
1754 {
1755 ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1756 *ts = timestamp_truncate(*ts, VFS_I(ip));
1757 }
1758
1759 /* Nanosecond counters can't have more than 1 billion. */
1760 STATIC void
xrep_inode_timestamps(struct xfs_inode * ip)1761 xrep_inode_timestamps(
1762 struct xfs_inode *ip)
1763 {
1764 struct timespec64 tstamp;
1765 struct inode *inode = VFS_I(ip);
1766
1767 tstamp = inode_get_atime(inode);
1768 xrep_clamp_timestamp(ip, &tstamp);
1769 inode_set_atime_to_ts(inode, tstamp);
1770
1771 tstamp = inode_get_mtime(inode);
1772 xrep_clamp_timestamp(ip, &tstamp);
1773 inode_set_mtime_to_ts(inode, tstamp);
1774
1775 tstamp = inode_get_ctime(inode);
1776 xrep_clamp_timestamp(ip, &tstamp);
1777 inode_set_ctime_to_ts(inode, tstamp);
1778
1779 xrep_clamp_timestamp(ip, &ip->i_crtime);
1780 }
1781
1782 /* Fix inode flags that don't make sense together. */
1783 STATIC void
xrep_inode_flags(struct xfs_scrub * sc)1784 xrep_inode_flags(
1785 struct xfs_scrub *sc)
1786 {
1787 uint16_t mode;
1788
1789 trace_xrep_inode_flags(sc);
1790
1791 mode = VFS_I(sc->ip)->i_mode;
1792
1793 /* Clear junk flags */
1794 if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1795 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1796
1797 /* NEWRTBM only applies to realtime bitmaps */
1798 if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1799 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1800 else
1801 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1802
1803 /* These only make sense for directories. */
1804 if (!S_ISDIR(mode))
1805 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1806 XFS_DIFLAG_EXTSZINHERIT |
1807 XFS_DIFLAG_PROJINHERIT |
1808 XFS_DIFLAG_NOSYMLINKS);
1809
1810 /* These only make sense for files. */
1811 if (!S_ISREG(mode))
1812 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1813 XFS_DIFLAG_EXTSIZE);
1814
1815 /* These only make sense for non-rt files. */
1816 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1817 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1818
1819 /* Immutable and append only? Drop the append. */
1820 if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1821 (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1822 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1823
1824 /* Clear junk flags. */
1825 if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1826 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1827
1828 /* No reflink flag unless we support it and it's a file. */
1829 if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1830 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1831
1832 /* DAX only applies to files and dirs. */
1833 if (!(S_ISREG(mode) || S_ISDIR(mode)))
1834 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1835 }
1836
1837 /*
1838 * Fix size problems with block/node format directories. If we fail to find
1839 * the extent list, just bail out and let the bmapbtd repair functions clean
1840 * up that mess.
1841 */
1842 STATIC void
xrep_inode_blockdir_size(struct xfs_scrub * sc)1843 xrep_inode_blockdir_size(
1844 struct xfs_scrub *sc)
1845 {
1846 struct xfs_iext_cursor icur;
1847 struct xfs_bmbt_irec got;
1848 struct xfs_ifork *ifp;
1849 xfs_fileoff_t off;
1850 int error;
1851
1852 trace_xrep_inode_blockdir_size(sc);
1853
1854 error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1855 if (error)
1856 return;
1857
1858 /* Find the last block before 32G; this is the dir size. */
1859 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1860 off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1861 if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1862 /* zero-extents directory? */
1863 return;
1864 }
1865
1866 off = got.br_startoff + got.br_blockcount;
1867 sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1868 XFS_FSB_TO_B(sc->mp, off));
1869 }
1870
1871 /* Fix size problems with short format directories. */
1872 STATIC void
xrep_inode_sfdir_size(struct xfs_scrub * sc)1873 xrep_inode_sfdir_size(
1874 struct xfs_scrub *sc)
1875 {
1876 struct xfs_ifork *ifp;
1877
1878 trace_xrep_inode_sfdir_size(sc);
1879
1880 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1881 sc->ip->i_disk_size = ifp->if_bytes;
1882 }
1883
1884 /*
1885 * Fix any irregularities in a directory inode's size now that we can iterate
1886 * extent maps and access other regular inode data.
1887 */
1888 STATIC void
xrep_inode_dir_size(struct xfs_scrub * sc)1889 xrep_inode_dir_size(
1890 struct xfs_scrub *sc)
1891 {
1892 trace_xrep_inode_dir_size(sc);
1893
1894 switch (sc->ip->i_df.if_format) {
1895 case XFS_DINODE_FMT_EXTENTS:
1896 case XFS_DINODE_FMT_BTREE:
1897 xrep_inode_blockdir_size(sc);
1898 break;
1899 case XFS_DINODE_FMT_LOCAL:
1900 xrep_inode_sfdir_size(sc);
1901 break;
1902 }
1903 }
1904
1905 /* Fix extent size hint problems. */
1906 STATIC void
xrep_inode_extsize(struct xfs_scrub * sc)1907 xrep_inode_extsize(
1908 struct xfs_scrub *sc)
1909 {
1910 /* Fix misaligned extent size hints on a directory. */
1911 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1912 (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1913 xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1914 sc->ip->i_extsize = 0;
1915 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1916 }
1917 }
1918
1919 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1920 STATIC int
xrep_inode_pptr(struct xfs_scrub * sc)1921 xrep_inode_pptr(
1922 struct xfs_scrub *sc)
1923 {
1924 struct xfs_mount *mp = sc->mp;
1925 struct xfs_inode *ip = sc->ip;
1926 struct inode *inode = VFS_I(ip);
1927
1928 if (!xfs_has_parent(mp))
1929 return 0;
1930
1931 /*
1932 * Unlinked inodes that cannot be added to the directory tree will not
1933 * have a parent pointer.
1934 */
1935 if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
1936 return 0;
1937
1938 /* Children of the superblock do not have parent pointers. */
1939 if (xchk_inode_is_sb_rooted(ip))
1940 return 0;
1941
1942 /* Inode already has an attr fork; no further work possible here. */
1943 if (xfs_inode_has_attr_fork(ip))
1944 return 0;
1945
1946 return xfs_bmap_add_attrfork(sc->tp, ip,
1947 sizeof(struct xfs_attr_sf_hdr), true);
1948 }
1949
1950 /* Fix COW extent size hint problems. */
1951 STATIC void
xrep_inode_cowextsize(struct xfs_scrub * sc)1952 xrep_inode_cowextsize(
1953 struct xfs_scrub *sc)
1954 {
1955 /* Fix misaligned CoW extent size hints on a directory. */
1956 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1957 (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
1958 sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) {
1959 sc->ip->i_cowextsize = 0;
1960 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
1961 }
1962 }
1963
1964 /* Fix any irregularities in an inode that the verifiers don't catch. */
1965 STATIC int
xrep_inode_problems(struct xfs_scrub * sc)1966 xrep_inode_problems(
1967 struct xfs_scrub *sc)
1968 {
1969 int error;
1970
1971 error = xrep_inode_blockcounts(sc);
1972 if (error)
1973 return error;
1974 error = xrep_inode_pptr(sc);
1975 if (error)
1976 return error;
1977 xrep_inode_timestamps(sc->ip);
1978 xrep_inode_flags(sc);
1979 xrep_inode_ids(sc);
1980 /*
1981 * We can now do a better job fixing the size of a directory now that
1982 * we can scan the data fork extents than we could in xrep_dinode_size.
1983 */
1984 if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1985 xrep_inode_dir_size(sc);
1986 xrep_inode_extsize(sc);
1987 xrep_inode_cowextsize(sc);
1988
1989 trace_xrep_inode_fixed(sc);
1990 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1991 return xrep_roll_trans(sc);
1992 }
1993
1994 /*
1995 * Make sure this inode's unlinked list pointers are consistent with its
1996 * link count.
1997 */
1998 STATIC int
xrep_inode_unlinked(struct xfs_scrub * sc)1999 xrep_inode_unlinked(
2000 struct xfs_scrub *sc)
2001 {
2002 unsigned int nlink = VFS_I(sc->ip)->i_nlink;
2003 int error;
2004
2005 /*
2006 * If this inode is linked from the directory tree and on the unlinked
2007 * list, remove it from the unlinked list.
2008 */
2009 if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
2010 struct xfs_perag *pag;
2011 int error;
2012
2013 pag = xfs_perag_get(sc->mp,
2014 XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
2015 error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
2016 xfs_perag_put(pag);
2017 if (error)
2018 return error;
2019 }
2020
2021 /*
2022 * If this inode is not linked from the directory tree yet not on the
2023 * unlinked list, put it on the unlinked list.
2024 */
2025 if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
2026 error = xfs_iunlink(sc->tp, sc->ip);
2027 if (error)
2028 return error;
2029 }
2030
2031 return 0;
2032 }
2033
2034 /* Repair an inode's fields. */
2035 int
xrep_inode(struct xfs_scrub * sc)2036 xrep_inode(
2037 struct xfs_scrub *sc)
2038 {
2039 int error = 0;
2040
2041 /*
2042 * No inode? That means we failed the _iget verifiers. Repair all
2043 * the things that the inode verifiers care about, then retry _iget.
2044 */
2045 if (!sc->ip) {
2046 struct xrep_inode *ri = sc->buf;
2047
2048 ASSERT(ri != NULL);
2049
2050 error = xrep_dinode_problems(ri);
2051 if (error == -EBUSY) {
2052 /*
2053 * Directory scan to recover inode mode encountered a
2054 * busy inode, so we did not continue repairing things.
2055 */
2056 return 0;
2057 }
2058 if (error)
2059 return error;
2060
2061 /* By this point we had better have a working incore inode. */
2062 if (!sc->ip)
2063 return -EFSCORRUPTED;
2064 }
2065
2066 xfs_trans_ijoin(sc->tp, sc->ip, 0);
2067
2068 /* If we found corruption of any kind, try to fix it. */
2069 if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
2070 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
2071 error = xrep_inode_problems(sc);
2072 if (error)
2073 return error;
2074 }
2075
2076 /* See if we can clear the reflink flag. */
2077 if (xfs_is_reflink_inode(sc->ip)) {
2078 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
2079 if (error)
2080 return error;
2081 }
2082
2083 /* Reconnect incore unlinked list */
2084 error = xrep_inode_unlinked(sc);
2085 if (error)
2086 return error;
2087
2088 return xrep_defer_finish(sc);
2089 }
2090