1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_btree.h"
13 #include "xfs_bit.h"
14 #include "xfs_log_format.h"
15 #include "xfs_trans.h"
16 #include "xfs_inode.h"
17 #include "xfs_alloc.h"
18 #include "xfs_bmap.h"
19 #include "xfs_bmap_btree.h"
20 #include "xfs_rmap.h"
21 #include "xfs_rmap_btree.h"
22 #include "xfs_rtgroup.h"
23 #include "xfs_health.h"
24 #include "xfs_rtalloc.h"
25 #include "xfs_rtrmap_btree.h"
26 #include "scrub/scrub.h"
27 #include "scrub/common.h"
28 #include "scrub/btree.h"
29 #include "scrub/health.h"
30 #include "xfs_ag.h"
31
32 /* Set us up with an inode's bmap. */
33 int
xchk_setup_inode_bmap(struct xfs_scrub * sc)34 xchk_setup_inode_bmap(
35 struct xfs_scrub *sc)
36 {
37 int error;
38
39 if (xchk_need_intent_drain(sc))
40 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
41
42 error = xchk_iget_for_scrubbing(sc);
43 if (error)
44 goto out;
45
46 xchk_ilock(sc, XFS_IOLOCK_EXCL);
47
48 /*
49 * We don't want any ephemeral data/cow fork updates sitting around
50 * while we inspect block mappings, so wait for directio to finish
51 * and flush dirty data if we have delalloc reservations.
52 */
53 if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
54 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
55 struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
56 bool is_repair = xchk_could_repair(sc);
57
58 xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
59
60 /* Break all our leases, we're going to mess with things. */
61 if (is_repair) {
62 error = xfs_break_layouts(VFS_I(sc->ip),
63 &sc->ilock_flags, BREAK_WRITE);
64 if (error)
65 goto out;
66 }
67
68 inode_dio_wait(VFS_I(sc->ip));
69
70 /*
71 * Try to flush all incore state to disk before we examine the
72 * space mappings for the data fork. Leave accumulated errors
73 * in the mapping for the writer threads to consume.
74 *
75 * On ENOSPC or EIO writeback errors, we continue into the
76 * extent mapping checks because write failures do not
77 * necessarily imply anything about the correctness of the file
78 * metadata. The metadata and the file data could be on
79 * completely separate devices; a media failure might only
80 * affect a subset of the disk, etc. We can handle delalloc
81 * extents in the scrubber, so leaving them in memory is fine.
82 */
83 error = filemap_fdatawrite(mapping);
84 if (!error)
85 error = filemap_fdatawait_keep_errors(mapping);
86 if (error && (error != -ENOSPC && error != -EIO))
87 goto out;
88
89 /* Drop the page cache if we're repairing block mappings. */
90 if (is_repair) {
91 error = invalidate_inode_pages2(
92 VFS_I(sc->ip)->i_mapping);
93 if (error)
94 goto out;
95 }
96
97 }
98
99 /* Got the inode, lock it and we're ready to go. */
100 error = xchk_trans_alloc(sc, 0);
101 if (error)
102 goto out;
103
104 error = xchk_ino_dqattach(sc);
105 if (error)
106 goto out;
107
108 xchk_ilock(sc, XFS_ILOCK_EXCL);
109 out:
110 /* scrub teardown will unlock and release the inode */
111 return error;
112 }
113
114 /*
115 * Inode fork block mapping (BMBT) scrubber.
116 * More complex than the others because we have to scrub
117 * all the extents regardless of whether or not the fork
118 * is in btree format.
119 */
120
121 struct xchk_bmap_info {
122 struct xfs_scrub *sc;
123
124 /* Incore extent tree cursor */
125 struct xfs_iext_cursor icur;
126
127 /* Previous fork mapping that we examined */
128 struct xfs_bmbt_irec prev_rec;
129
130 /* Is this a realtime fork? */
131 bool is_rt;
132
133 /* May mappings point to shared space? */
134 bool is_shared;
135
136 /* Was the incore extent tree loaded? */
137 bool was_loaded;
138
139 /* Which inode fork are we checking? */
140 int whichfork;
141 };
142
143 /* Look for a corresponding rmap for this irec. */
144 static inline bool
xchk_bmap_get_rmap(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec,xfs_agblock_t bno,uint64_t owner,struct xfs_rmap_irec * rmap)145 xchk_bmap_get_rmap(
146 struct xchk_bmap_info *info,
147 struct xfs_bmbt_irec *irec,
148 xfs_agblock_t bno,
149 uint64_t owner,
150 struct xfs_rmap_irec *rmap)
151 {
152 struct xfs_btree_cur **curp = &info->sc->sa.rmap_cur;
153 xfs_fileoff_t offset;
154 unsigned int rflags = 0;
155 int has_rmap;
156 int error;
157
158 if (xfs_ifork_is_realtime(info->sc->ip, info->whichfork))
159 curp = &info->sc->sr.rmap_cur;
160
161 if (*curp == NULL)
162 return false;
163
164 if (info->whichfork == XFS_ATTR_FORK)
165 rflags |= XFS_RMAP_ATTR_FORK;
166 if (irec->br_state == XFS_EXT_UNWRITTEN)
167 rflags |= XFS_RMAP_UNWRITTEN;
168
169 /*
170 * CoW staging extents are owned (on disk) by the refcountbt, so
171 * their rmaps do not have offsets.
172 */
173 if (info->whichfork == XFS_COW_FORK)
174 offset = 0;
175 else
176 offset = irec->br_startoff;
177
178 /*
179 * If the caller thinks this could be a shared bmbt extent (IOWs,
180 * any data fork extent of a reflink inode) then we have to use the
181 * range rmap lookup to make sure we get the correct owner/offset.
182 */
183 if (info->is_shared) {
184 error = xfs_rmap_lookup_le_range(*curp, bno, owner, offset,
185 rflags, rmap, &has_rmap);
186 } else {
187 error = xfs_rmap_lookup_le(*curp, bno, owner, offset,
188 rflags, rmap, &has_rmap);
189 }
190 if (!xchk_should_check_xref(info->sc, &error, curp))
191 return false;
192
193 if (!has_rmap)
194 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
195 irec->br_startoff);
196 return has_rmap;
197 }
198
199 /* Make sure that we have rmapbt records for this data/attr fork extent. */
200 STATIC void
xchk_bmap_xref_rmap(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec,xfs_agblock_t bno)201 xchk_bmap_xref_rmap(
202 struct xchk_bmap_info *info,
203 struct xfs_bmbt_irec *irec,
204 xfs_agblock_t bno)
205 {
206 struct xfs_rmap_irec rmap;
207 unsigned long long rmap_end;
208 uint64_t owner = info->sc->ip->i_ino;
209
210 if (xchk_skip_xref(info->sc->sm))
211 return;
212
213 /* Find the rmap record for this irec. */
214 if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap))
215 return;
216
217 /*
218 * The rmap must be an exact match for this incore file mapping record,
219 * which may have arisen from multiple ondisk records.
220 */
221 if (rmap.rm_startblock != bno)
222 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
223 irec->br_startoff);
224
225 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
226 if (rmap_end != bno + irec->br_blockcount)
227 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
228 irec->br_startoff);
229
230 /* Check the logical offsets. */
231 if (rmap.rm_offset != irec->br_startoff)
232 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
233 irec->br_startoff);
234
235 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
236 if (rmap_end != irec->br_startoff + irec->br_blockcount)
237 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
238 irec->br_startoff);
239
240 /* Check the owner */
241 if (rmap.rm_owner != owner)
242 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
243 irec->br_startoff);
244
245 /*
246 * Check for discrepancies between the unwritten flag in the irec and
247 * the rmap. Note that the (in-memory) CoW fork distinguishes between
248 * unwritten and written extents, but we don't track that in the rmap
249 * records because the blocks are owned (on-disk) by the refcountbt,
250 * which doesn't track unwritten state.
251 */
252 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
253 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
254 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
255 irec->br_startoff);
256
257 if (!!(info->whichfork == XFS_ATTR_FORK) !=
258 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
259 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
260 irec->br_startoff);
261 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
262 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
263 irec->br_startoff);
264 }
265
266 /* Make sure that we have rmapbt records for this COW fork extent. */
267 STATIC void
xchk_bmap_xref_rmap_cow(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec,xfs_agblock_t bno)268 xchk_bmap_xref_rmap_cow(
269 struct xchk_bmap_info *info,
270 struct xfs_bmbt_irec *irec,
271 xfs_agblock_t bno)
272 {
273 struct xfs_rmap_irec rmap;
274 unsigned long long rmap_end;
275 uint64_t owner = XFS_RMAP_OWN_COW;
276
277 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
278 return;
279
280 /* Find the rmap record for this irec. */
281 if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap))
282 return;
283
284 /*
285 * CoW staging extents are owned by the refcount btree, so the rmap
286 * can start before and end after the physical space allocated to this
287 * mapping. There are no offsets to check.
288 */
289 if (rmap.rm_startblock > bno)
290 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
291 irec->br_startoff);
292
293 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
294 if (rmap_end < bno + irec->br_blockcount)
295 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
296 irec->br_startoff);
297
298 /* Check the owner */
299 if (rmap.rm_owner != owner)
300 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
301 irec->br_startoff);
302
303 /*
304 * No flags allowed. Note that the (in-memory) CoW fork distinguishes
305 * between unwritten and written extents, but we don't track that in
306 * the rmap records because the blocks are owned (on-disk) by the
307 * refcountbt, which doesn't track unwritten state.
308 */
309 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
310 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
311 irec->br_startoff);
312 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
313 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
314 irec->br_startoff);
315 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
316 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
317 irec->br_startoff);
318 }
319
320 /* Cross-reference a single rtdev extent record. */
321 STATIC void
xchk_bmap_rt_iextent_xref(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)322 xchk_bmap_rt_iextent_xref(
323 struct xfs_inode *ip,
324 struct xchk_bmap_info *info,
325 struct xfs_bmbt_irec *irec)
326 {
327 struct xfs_owner_info oinfo;
328 xfs_rgblock_t rgbno;
329 int error;
330
331 error = xchk_rtgroup_init_existing(info->sc,
332 xfs_rtb_to_rgno(ip->i_mount, irec->br_startblock),
333 &info->sc->sr);
334 if (!xchk_fblock_process_error(info->sc, info->whichfork,
335 irec->br_startoff, &error))
336 return;
337
338 error = xchk_rtgroup_lock(info->sc, &info->sc->sr, XCHK_RTGLOCK_ALL);
339 if (!xchk_fblock_process_error(info->sc, info->whichfork,
340 irec->br_startoff, &error))
341 goto out_free;
342
343 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
344 irec->br_blockcount);
345
346 if (!xfs_has_rtrmapbt(info->sc->mp))
347 goto out_cur;
348
349 rgbno = xfs_rtb_to_rgbno(info->sc->mp, irec->br_startblock);
350
351 switch (info->whichfork) {
352 case XFS_DATA_FORK:
353 xchk_bmap_xref_rmap(info, irec, rgbno);
354 if (!xfs_is_reflink_inode(info->sc->ip)) {
355 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
356 info->whichfork, irec->br_startoff);
357 xchk_xref_is_only_rt_owned_by(info->sc, rgbno,
358 irec->br_blockcount, &oinfo);
359 xchk_xref_is_not_rt_shared(info->sc, rgbno,
360 irec->br_blockcount);
361 }
362 xchk_xref_is_not_rt_cow_staging(info->sc, rgbno,
363 irec->br_blockcount);
364 break;
365 case XFS_COW_FORK:
366 xchk_bmap_xref_rmap_cow(info, irec, rgbno);
367 xchk_xref_is_only_rt_owned_by(info->sc, rgbno,
368 irec->br_blockcount, &XFS_RMAP_OINFO_COW);
369 xchk_xref_is_rt_cow_staging(info->sc, rgbno,
370 irec->br_blockcount);
371 xchk_xref_is_not_rt_shared(info->sc, rgbno,
372 irec->br_blockcount);
373 break;
374 }
375 out_cur:
376 xchk_rtgroup_btcur_free(&info->sc->sr);
377 out_free:
378 xchk_rtgroup_free(info->sc, &info->sc->sr);
379 }
380
381 /* Cross-reference a single datadev extent record. */
382 STATIC void
xchk_bmap_iextent_xref(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)383 xchk_bmap_iextent_xref(
384 struct xfs_inode *ip,
385 struct xchk_bmap_info *info,
386 struct xfs_bmbt_irec *irec)
387 {
388 struct xfs_owner_info oinfo;
389 struct xfs_mount *mp = info->sc->mp;
390 xfs_agnumber_t agno;
391 xfs_agblock_t agbno;
392 xfs_extlen_t len;
393 int error;
394
395 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
396 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
397 len = irec->br_blockcount;
398
399 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
400 if (!xchk_fblock_process_error(info->sc, info->whichfork,
401 irec->br_startoff, &error))
402 goto out_free;
403
404 xchk_xref_is_used_space(info->sc, agbno, len);
405 xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
406 switch (info->whichfork) {
407 case XFS_DATA_FORK:
408 xchk_bmap_xref_rmap(info, irec, agbno);
409 if (!xfs_is_reflink_inode(info->sc->ip)) {
410 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
411 info->whichfork, irec->br_startoff);
412 xchk_xref_is_only_owned_by(info->sc, agbno,
413 irec->br_blockcount, &oinfo);
414 xchk_xref_is_not_shared(info->sc, agbno,
415 irec->br_blockcount);
416 }
417 xchk_xref_is_not_cow_staging(info->sc, agbno,
418 irec->br_blockcount);
419 break;
420 case XFS_ATTR_FORK:
421 xchk_bmap_xref_rmap(info, irec, agbno);
422 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
423 info->whichfork, irec->br_startoff);
424 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
425 &oinfo);
426 xchk_xref_is_not_shared(info->sc, agbno,
427 irec->br_blockcount);
428 xchk_xref_is_not_cow_staging(info->sc, agbno,
429 irec->br_blockcount);
430 break;
431 case XFS_COW_FORK:
432 xchk_bmap_xref_rmap_cow(info, irec, agbno);
433 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
434 &XFS_RMAP_OINFO_COW);
435 xchk_xref_is_cow_staging(info->sc, agbno,
436 irec->br_blockcount);
437 xchk_xref_is_not_shared(info->sc, agbno,
438 irec->br_blockcount);
439 break;
440 }
441
442 out_free:
443 xchk_ag_free(info->sc, &info->sc->sa);
444 }
445
446 /*
447 * Directories and attr forks should never have blocks that can't be addressed
448 * by a xfs_dablk_t.
449 */
450 STATIC void
xchk_bmap_dirattr_extent(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)451 xchk_bmap_dirattr_extent(
452 struct xfs_inode *ip,
453 struct xchk_bmap_info *info,
454 struct xfs_bmbt_irec *irec)
455 {
456 struct xfs_mount *mp = ip->i_mount;
457 xfs_fileoff_t off;
458
459 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
460 return;
461
462 if (!xfs_verify_dablk(mp, irec->br_startoff))
463 xchk_fblock_set_corrupt(info->sc, info->whichfork,
464 irec->br_startoff);
465
466 off = irec->br_startoff + irec->br_blockcount - 1;
467 if (!xfs_verify_dablk(mp, off))
468 xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
469 }
470
471 /* Scrub a single extent record. */
472 STATIC void
xchk_bmap_iextent(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)473 xchk_bmap_iextent(
474 struct xfs_inode *ip,
475 struct xchk_bmap_info *info,
476 struct xfs_bmbt_irec *irec)
477 {
478 struct xfs_mount *mp = info->sc->mp;
479
480 /*
481 * Check for out-of-order extents. This record could have come
482 * from the incore list, for which there is no ordering check.
483 */
484 if (irec->br_startoff < info->prev_rec.br_startoff +
485 info->prev_rec.br_blockcount)
486 xchk_fblock_set_corrupt(info->sc, info->whichfork,
487 irec->br_startoff);
488
489 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
490 xchk_fblock_set_corrupt(info->sc, info->whichfork,
491 irec->br_startoff);
492
493 xchk_bmap_dirattr_extent(ip, info, irec);
494
495 /* Make sure the extent points to a valid place. */
496 if (info->is_rt &&
497 !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount))
498 xchk_fblock_set_corrupt(info->sc, info->whichfork,
499 irec->br_startoff);
500 if (!info->is_rt &&
501 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
502 xchk_fblock_set_corrupt(info->sc, info->whichfork,
503 irec->br_startoff);
504
505 /* We don't allow unwritten extents on attr forks. */
506 if (irec->br_state == XFS_EXT_UNWRITTEN &&
507 info->whichfork == XFS_ATTR_FORK)
508 xchk_fblock_set_corrupt(info->sc, info->whichfork,
509 irec->br_startoff);
510
511 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
512 return;
513
514 if (info->is_rt)
515 xchk_bmap_rt_iextent_xref(ip, info, irec);
516 else
517 xchk_bmap_iextent_xref(ip, info, irec);
518 }
519
520 /* Scrub a bmbt record. */
521 STATIC int
xchk_bmapbt_rec(struct xchk_btree * bs,const union xfs_btree_rec * rec)522 xchk_bmapbt_rec(
523 struct xchk_btree *bs,
524 const union xfs_btree_rec *rec)
525 {
526 struct xfs_bmbt_irec irec;
527 struct xfs_bmbt_irec iext_irec;
528 struct xfs_iext_cursor icur;
529 struct xchk_bmap_info *info = bs->private;
530 struct xfs_inode *ip = bs->cur->bc_ino.ip;
531 struct xfs_buf *bp = NULL;
532 struct xfs_btree_block *block;
533 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork);
534 uint64_t owner;
535 int i;
536
537 /*
538 * Check the owners of the btree blocks up to the level below
539 * the root since the verifiers don't do that.
540 */
541 if (xfs_has_crc(bs->cur->bc_mp) &&
542 bs->cur->bc_levels[0].ptr == 1) {
543 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
544 block = xfs_btree_get_block(bs->cur, i, &bp);
545 owner = be64_to_cpu(block->bb_u.l.bb_owner);
546 if (owner != ip->i_ino)
547 xchk_fblock_set_corrupt(bs->sc,
548 info->whichfork, 0);
549 }
550 }
551
552 /*
553 * Check that the incore extent tree contains an extent that matches
554 * this one exactly. We validate those cached bmaps later, so we don't
555 * need to check them here. If the incore extent tree was just loaded
556 * from disk by the scrubber, we assume that its contents match what's
557 * on disk (we still hold the ILOCK) and skip the equivalence check.
558 */
559 if (!info->was_loaded)
560 return 0;
561
562 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
563 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
564 xchk_fblock_set_corrupt(bs->sc, info->whichfork,
565 irec.br_startoff);
566 return 0;
567 }
568
569 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
570 &iext_irec) ||
571 irec.br_startoff != iext_irec.br_startoff ||
572 irec.br_startblock != iext_irec.br_startblock ||
573 irec.br_blockcount != iext_irec.br_blockcount ||
574 irec.br_state != iext_irec.br_state)
575 xchk_fblock_set_corrupt(bs->sc, info->whichfork,
576 irec.br_startoff);
577 return 0;
578 }
579
580 /* Scan the btree records. */
581 STATIC int
xchk_bmap_btree(struct xfs_scrub * sc,int whichfork,struct xchk_bmap_info * info)582 xchk_bmap_btree(
583 struct xfs_scrub *sc,
584 int whichfork,
585 struct xchk_bmap_info *info)
586 {
587 struct xfs_owner_info oinfo;
588 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
589 struct xfs_mount *mp = sc->mp;
590 struct xfs_inode *ip = sc->ip;
591 struct xfs_btree_cur *cur;
592 int error;
593
594 /* Load the incore bmap cache if it's not loaded. */
595 info->was_loaded = !xfs_need_iread_extents(ifp);
596
597 error = xfs_iread_extents(sc->tp, ip, whichfork);
598 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
599 goto out;
600
601 /* Check the btree structure. */
602 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
603 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
604 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
605 xfs_btree_del_cursor(cur, error);
606 out:
607 return error;
608 }
609
610 struct xchk_bmap_check_rmap_info {
611 struct xfs_scrub *sc;
612 int whichfork;
613 struct xfs_iext_cursor icur;
614 };
615
616 /* Can we find bmaps that fit this rmap? */
617 STATIC int
xchk_bmap_check_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)618 xchk_bmap_check_rmap(
619 struct xfs_btree_cur *cur,
620 const struct xfs_rmap_irec *rec,
621 void *priv)
622 {
623 struct xfs_bmbt_irec irec;
624 struct xfs_rmap_irec check_rec;
625 struct xchk_bmap_check_rmap_info *sbcri = priv;
626 struct xfs_ifork *ifp;
627 struct xfs_scrub *sc = sbcri->sc;
628 bool have_map;
629
630 /* Is this even the right fork? */
631 if (rec->rm_owner != sc->ip->i_ino)
632 return 0;
633 if ((sbcri->whichfork == XFS_ATTR_FORK) ^
634 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
635 return 0;
636 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
637 return 0;
638
639 /* Now look up the bmbt record. */
640 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
641 if (!ifp) {
642 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
643 rec->rm_offset);
644 goto out;
645 }
646 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
647 &sbcri->icur, &irec);
648 if (!have_map)
649 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
650 rec->rm_offset);
651 /*
652 * bmap extent record lengths are constrained to 2^21 blocks in length
653 * because of space constraints in the on-disk metadata structure.
654 * However, rmap extent record lengths are constrained only by AG
655 * length, so we have to loop through the bmbt to make sure that the
656 * entire rmap is covered by bmbt records.
657 */
658 check_rec = *rec;
659 while (have_map) {
660 if (irec.br_startoff != check_rec.rm_offset)
661 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
662 check_rec.rm_offset);
663 if (irec.br_startblock !=
664 xfs_gbno_to_fsb(cur->bc_group, check_rec.rm_startblock))
665 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
666 check_rec.rm_offset);
667 if (irec.br_blockcount > check_rec.rm_blockcount)
668 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
669 check_rec.rm_offset);
670 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
671 break;
672 check_rec.rm_startblock += irec.br_blockcount;
673 check_rec.rm_offset += irec.br_blockcount;
674 check_rec.rm_blockcount -= irec.br_blockcount;
675 if (check_rec.rm_blockcount == 0)
676 break;
677 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
678 if (!have_map)
679 xchk_fblock_set_corrupt(sc, sbcri->whichfork,
680 check_rec.rm_offset);
681 }
682
683 out:
684 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
685 return -ECANCELED;
686 return 0;
687 }
688
689 /* Make sure each rmap has a corresponding bmbt entry. */
690 STATIC int
xchk_bmap_check_ag_rmaps(struct xfs_scrub * sc,int whichfork,struct xfs_perag * pag)691 xchk_bmap_check_ag_rmaps(
692 struct xfs_scrub *sc,
693 int whichfork,
694 struct xfs_perag *pag)
695 {
696 struct xchk_bmap_check_rmap_info sbcri;
697 struct xfs_btree_cur *cur;
698 struct xfs_buf *agf;
699 int error;
700
701 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
702 if (error)
703 return error;
704
705 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
706
707 sbcri.sc = sc;
708 sbcri.whichfork = whichfork;
709 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
710 if (error == -ECANCELED)
711 error = 0;
712
713 xfs_btree_del_cursor(cur, error);
714 xfs_trans_brelse(sc->tp, agf);
715 return error;
716 }
717
718 /* Make sure each rt rmap has a corresponding bmbt entry. */
719 STATIC int
xchk_bmap_check_rt_rmaps(struct xfs_scrub * sc,struct xfs_rtgroup * rtg)720 xchk_bmap_check_rt_rmaps(
721 struct xfs_scrub *sc,
722 struct xfs_rtgroup *rtg)
723 {
724 struct xchk_bmap_check_rmap_info sbcri;
725 struct xfs_btree_cur *cur;
726 int error;
727
728 xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
729 cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg);
730
731 sbcri.sc = sc;
732 sbcri.whichfork = XFS_DATA_FORK;
733 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
734 if (error == -ECANCELED)
735 error = 0;
736
737 xfs_btree_del_cursor(cur, error);
738 xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
739 return error;
740 }
741
742 /*
743 * Decide if we want to scan the reverse mappings to determine if the attr
744 * fork /really/ has zero space mappings.
745 */
746 STATIC bool
xchk_bmap_check_empty_attrfork(struct xfs_inode * ip)747 xchk_bmap_check_empty_attrfork(
748 struct xfs_inode *ip)
749 {
750 struct xfs_ifork *ifp = &ip->i_af;
751
752 /*
753 * If the dinode repair found a bad attr fork, it will reset the fork
754 * to extents format with zero records and wait for the this scrubber
755 * to reconstruct the block mappings. If the fork is not in this
756 * state, then the fork cannot have been zapped.
757 */
758 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
759 return false;
760
761 /*
762 * Files can have an attr fork in EXTENTS format with zero records for
763 * several reasons:
764 *
765 * a) an attr set created a fork but ran out of space
766 * b) attr replace deleted an old attr but failed during the set step
767 * c) the data fork was in btree format when all attrs were deleted, so
768 * the fork was left in place
769 * d) the inode repair code zapped the fork
770 *
771 * Only in case (d) do we want to scan the rmapbt to see if we need to
772 * rebuild the attr fork. The fork zap code clears all DAC permission
773 * bits and zeroes the uid and gid, so avoid the scan if any of those
774 * three conditions are not met.
775 */
776 if ((VFS_I(ip)->i_mode & 0777) != 0)
777 return false;
778 if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID))
779 return false;
780 if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID))
781 return false;
782
783 return true;
784 }
785
786 /*
787 * Decide if we want to scan the reverse mappings to determine if the data
788 * fork /really/ has zero space mappings.
789 */
790 STATIC bool
xchk_bmap_check_empty_datafork(struct xfs_inode * ip)791 xchk_bmap_check_empty_datafork(
792 struct xfs_inode *ip)
793 {
794 struct xfs_ifork *ifp = &ip->i_df;
795
796 /*
797 * If the dinode repair found a bad data fork, it will reset the fork
798 * to extents format with zero records and wait for the this scrubber
799 * to reconstruct the block mappings. If the fork is not in this
800 * state, then the fork cannot have been zapped.
801 */
802 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0)
803 return false;
804
805 /*
806 * If we encounter an empty data fork along with evidence that the fork
807 * might not really be empty, we need to scan the reverse mappings to
808 * decide if we're going to rebuild the fork. Data forks with nonzero
809 * file size are scanned.
810 */
811 return i_size_read(VFS_I(ip)) != 0;
812 }
813
814 /*
815 * Decide if we want to walk every rmap btree in the fs to make sure that each
816 * rmap for this file fork has corresponding bmbt entries.
817 */
818 static bool
xchk_bmap_want_check_rmaps(struct xchk_bmap_info * info)819 xchk_bmap_want_check_rmaps(
820 struct xchk_bmap_info *info)
821 {
822 struct xfs_scrub *sc = info->sc;
823
824 if (!xfs_has_rmapbt(sc->mp))
825 return false;
826 if (info->whichfork == XFS_COW_FORK)
827 return false;
828 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
829 return false;
830
831 if (info->whichfork == XFS_ATTR_FORK)
832 return xchk_bmap_check_empty_attrfork(sc->ip);
833
834 return xchk_bmap_check_empty_datafork(sc->ip);
835 }
836
837 /* Make sure each rmap has a corresponding bmbt entry. */
838 STATIC int
xchk_bmap_check_rmaps(struct xfs_scrub * sc,int whichfork)839 xchk_bmap_check_rmaps(
840 struct xfs_scrub *sc,
841 int whichfork)
842 {
843 struct xfs_perag *pag = NULL;
844 int error;
845
846 if (xfs_ifork_is_realtime(sc->ip, whichfork)) {
847 struct xfs_rtgroup *rtg = NULL;
848
849 while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
850 error = xchk_bmap_check_rt_rmaps(sc, rtg);
851 if (error ||
852 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
853 xfs_rtgroup_rele(rtg);
854 return error;
855 }
856 }
857
858 return 0;
859 }
860
861 while ((pag = xfs_perag_next(sc->mp, pag))) {
862 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
863 if (error ||
864 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
865 xfs_perag_rele(pag);
866 return error;
867 }
868 }
869
870 return 0;
871 }
872
873 /* Scrub a delalloc reservation from the incore extent map tree. */
874 STATIC void
xchk_bmap_iextent_delalloc(struct xfs_inode * ip,struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)875 xchk_bmap_iextent_delalloc(
876 struct xfs_inode *ip,
877 struct xchk_bmap_info *info,
878 struct xfs_bmbt_irec *irec)
879 {
880 struct xfs_mount *mp = info->sc->mp;
881
882 /*
883 * Check for out-of-order extents. This record could have come
884 * from the incore list, for which there is no ordering check.
885 */
886 if (irec->br_startoff < info->prev_rec.br_startoff +
887 info->prev_rec.br_blockcount)
888 xchk_fblock_set_corrupt(info->sc, info->whichfork,
889 irec->br_startoff);
890
891 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
892 xchk_fblock_set_corrupt(info->sc, info->whichfork,
893 irec->br_startoff);
894
895 /* Make sure the extent points to a valid place. */
896 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
897 xchk_fblock_set_corrupt(info->sc, info->whichfork,
898 irec->br_startoff);
899 }
900
901 /* Decide if this individual fork mapping is ok. */
902 static bool
xchk_bmap_iext_mapping(struct xchk_bmap_info * info,const struct xfs_bmbt_irec * irec)903 xchk_bmap_iext_mapping(
904 struct xchk_bmap_info *info,
905 const struct xfs_bmbt_irec *irec)
906 {
907 /* There should never be a "hole" extent in either extent list. */
908 if (irec->br_startblock == HOLESTARTBLOCK)
909 return false;
910 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
911 return false;
912 return true;
913 }
914
915 /* Are these two mappings contiguous with each other? */
916 static inline bool
xchk_are_bmaps_contiguous(const struct xchk_bmap_info * info,const struct xfs_bmbt_irec * b1,const struct xfs_bmbt_irec * b2)917 xchk_are_bmaps_contiguous(
918 const struct xchk_bmap_info *info,
919 const struct xfs_bmbt_irec *b1,
920 const struct xfs_bmbt_irec *b2)
921 {
922 struct xfs_mount *mp = info->sc->mp;
923
924 /* Don't try to combine unallocated mappings. */
925 if (!xfs_bmap_is_real_extent(b1))
926 return false;
927 if (!xfs_bmap_is_real_extent(b2))
928 return false;
929
930 /* Does b2 come right after b1 in the logical and physical range? */
931 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
932 return false;
933 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
934 return false;
935 if (b1->br_state != b2->br_state)
936 return false;
937
938 /*
939 * Don't combine bmaps that would cross rtgroup boundaries. This is a
940 * valid state, but if combined they will fail rtb extent checks.
941 */
942 if (info->is_rt && xfs_has_rtgroups(mp)) {
943 if (xfs_rtb_to_rgno(mp, b1->br_startblock) !=
944 xfs_rtb_to_rgno(mp, b2->br_startblock))
945 return false;
946 }
947
948 return true;
949 }
950
951 /*
952 * Walk the incore extent records, accumulating consecutive contiguous records
953 * into a single incore mapping. Returns true if @irec has been set to a
954 * mapping or false if there are no more mappings. Caller must ensure that
955 * @info.icur is zeroed before the first call.
956 */
957 static bool
xchk_bmap_iext_iter(struct xchk_bmap_info * info,struct xfs_bmbt_irec * irec)958 xchk_bmap_iext_iter(
959 struct xchk_bmap_info *info,
960 struct xfs_bmbt_irec *irec)
961 {
962 struct xfs_bmbt_irec got;
963 struct xfs_ifork *ifp;
964 unsigned int nr = 0;
965
966 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
967
968 /* Advance to the next iextent record and check the mapping. */
969 xfs_iext_next(ifp, &info->icur);
970 if (!xfs_iext_get_extent(ifp, &info->icur, irec))
971 return false;
972
973 if (!xchk_bmap_iext_mapping(info, irec)) {
974 xchk_fblock_set_corrupt(info->sc, info->whichfork,
975 irec->br_startoff);
976 return false;
977 }
978 nr++;
979
980 /*
981 * Iterate subsequent iextent records and merge them with the one
982 * that we just read, if possible.
983 */
984 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
985 if (!xchk_are_bmaps_contiguous(info, irec, &got))
986 break;
987
988 if (!xchk_bmap_iext_mapping(info, &got)) {
989 xchk_fblock_set_corrupt(info->sc, info->whichfork,
990 got.br_startoff);
991 return false;
992 }
993 nr++;
994
995 irec->br_blockcount += got.br_blockcount;
996 xfs_iext_next(ifp, &info->icur);
997 }
998
999 /*
1000 * If the merged mapping could be expressed with fewer bmbt records
1001 * than we actually found, notify the user that this fork could be
1002 * optimized. CoW forks only exist in memory so we ignore them.
1003 */
1004 if (nr > 1 && info->whichfork != XFS_COW_FORK &&
1005 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
1006 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
1007
1008 return true;
1009 }
1010
1011 /*
1012 * Scrub an inode fork's block mappings.
1013 *
1014 * First we scan every record in every btree block, if applicable.
1015 * Then we unconditionally scan the incore extent cache.
1016 */
1017 STATIC int
xchk_bmap(struct xfs_scrub * sc,int whichfork)1018 xchk_bmap(
1019 struct xfs_scrub *sc,
1020 int whichfork)
1021 {
1022 struct xfs_bmbt_irec irec;
1023 struct xchk_bmap_info info = { NULL };
1024 struct xfs_mount *mp = sc->mp;
1025 struct xfs_inode *ip = sc->ip;
1026 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
1027 xfs_fileoff_t endoff;
1028 int error = 0;
1029
1030 /* Non-existent forks can be ignored. */
1031 if (!ifp)
1032 return -ENOENT;
1033
1034 info.is_rt = xfs_ifork_is_realtime(ip, whichfork);
1035 info.whichfork = whichfork;
1036 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
1037 info.sc = sc;
1038
1039 switch (whichfork) {
1040 case XFS_COW_FORK:
1041 /* No CoW forks on non-reflink filesystems. */
1042 if (!xfs_has_reflink(mp)) {
1043 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1044 return 0;
1045 }
1046 break;
1047 case XFS_ATTR_FORK:
1048 /*
1049 * "attr" means that an attr fork was created at some point in
1050 * the life of this filesystem. "attr2" means that inodes have
1051 * variable-sized data/attr fork areas. Hence we only check
1052 * attr here.
1053 */
1054 if (!xfs_has_attr(mp))
1055 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1056 break;
1057 default:
1058 ASSERT(whichfork == XFS_DATA_FORK);
1059 break;
1060 }
1061
1062 /* Check the fork values */
1063 switch (ifp->if_format) {
1064 case XFS_DINODE_FMT_UUID:
1065 case XFS_DINODE_FMT_DEV:
1066 case XFS_DINODE_FMT_LOCAL:
1067 case XFS_DINODE_FMT_META_BTREE:
1068 /* No mappings to check. */
1069 if (whichfork == XFS_COW_FORK)
1070 xchk_fblock_set_corrupt(sc, whichfork, 0);
1071 return 0;
1072 case XFS_DINODE_FMT_EXTENTS:
1073 break;
1074 case XFS_DINODE_FMT_BTREE:
1075 if (whichfork == XFS_COW_FORK) {
1076 xchk_fblock_set_corrupt(sc, whichfork, 0);
1077 return 0;
1078 }
1079
1080 error = xchk_bmap_btree(sc, whichfork, &info);
1081 if (error)
1082 return error;
1083 break;
1084 default:
1085 xchk_fblock_set_corrupt(sc, whichfork, 0);
1086 return 0;
1087 }
1088
1089 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1090 return 0;
1091
1092 /* Find the offset of the last extent in the mapping. */
1093 error = xfs_bmap_last_offset(ip, &endoff, whichfork);
1094 if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
1095 return error;
1096
1097 /*
1098 * Scrub extent records. We use a special iterator function here that
1099 * combines adjacent mappings if they are logically and physically
1100 * contiguous. For large allocations that require multiple bmbt
1101 * records, this reduces the number of cross-referencing calls, which
1102 * reduces runtime. Cross referencing with the rmap is simpler because
1103 * the rmap must match the combined mapping exactly.
1104 */
1105 while (xchk_bmap_iext_iter(&info, &irec)) {
1106 if (xchk_should_terminate(sc, &error) ||
1107 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
1108 return 0;
1109
1110 if (irec.br_startoff >= endoff) {
1111 xchk_fblock_set_corrupt(sc, whichfork,
1112 irec.br_startoff);
1113 return 0;
1114 }
1115
1116 if (isnullstartblock(irec.br_startblock))
1117 xchk_bmap_iextent_delalloc(ip, &info, &irec);
1118 else
1119 xchk_bmap_iextent(ip, &info, &irec);
1120 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
1121 }
1122
1123 if (xchk_bmap_want_check_rmaps(&info)) {
1124 error = xchk_bmap_check_rmaps(sc, whichfork);
1125 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
1126 return error;
1127 }
1128
1129 return 0;
1130 }
1131
1132 /* Scrub an inode's data fork. */
1133 int
xchk_bmap_data(struct xfs_scrub * sc)1134 xchk_bmap_data(
1135 struct xfs_scrub *sc)
1136 {
1137 int error;
1138
1139 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) {
1140 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1141 return 0;
1142 }
1143
1144 error = xchk_bmap(sc, XFS_DATA_FORK);
1145 if (error)
1146 return error;
1147
1148 /* If the data fork is clean, it is clearly not zapped. */
1149 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED);
1150 return 0;
1151 }
1152
1153 /* Scrub an inode's attr fork. */
1154 int
xchk_bmap_attr(struct xfs_scrub * sc)1155 xchk_bmap_attr(
1156 struct xfs_scrub *sc)
1157 {
1158 int error;
1159
1160 /*
1161 * If the attr fork has been zapped, it's possible that forkoff was
1162 * reset to zero and hence sc->ip->i_afp is NULL. We don't want the
1163 * NULL ifp check in xchk_bmap to conclude that the attr fork is ok,
1164 * so short circuit that logic by setting the corruption flag and
1165 * returning immediately.
1166 */
1167 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) {
1168 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1169 return 0;
1170 }
1171
1172 error = xchk_bmap(sc, XFS_ATTR_FORK);
1173 if (error)
1174 return error;
1175
1176 /* If the attr fork is clean, it is clearly not zapped. */
1177 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED);
1178 return 0;
1179 }
1180
1181 /* Scrub an inode's CoW fork. */
1182 int
xchk_bmap_cow(struct xfs_scrub * sc)1183 xchk_bmap_cow(
1184 struct xfs_scrub *sc)
1185 {
1186 return xchk_bmap(sc, XFS_COW_FORK);
1187 }
1188