1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_trans.h" 15 #include "xfs_inode.h" 16 #include "xfs_icache.h" 17 #include "xfs_alloc.h" 18 #include "xfs_alloc_btree.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_ialloc_btree.h" 21 #include "xfs_refcount_btree.h" 22 #include "xfs_rmap.h" 23 #include "xfs_rmap_btree.h" 24 #include "xfs_log.h" 25 #include "xfs_trans_priv.h" 26 #include "xfs_da_format.h" 27 #include "xfs_da_btree.h" 28 #include "xfs_dir2_priv.h" 29 #include "xfs_dir2.h" 30 #include "xfs_attr.h" 31 #include "xfs_reflink.h" 32 #include "xfs_ag.h" 33 #include "xfs_error.h" 34 #include "xfs_quota.h" 35 #include "xfs_exchmaps.h" 36 #include "xfs_rtbitmap.h" 37 #include "scrub/scrub.h" 38 #include "scrub/common.h" 39 #include "scrub/trace.h" 40 #include "scrub/repair.h" 41 #include "scrub/health.h" 42 43 /* Common code for the metadata scrubbers. */ 44 45 /* 46 * Handling operational errors. 47 * 48 * The *_process_error() family of functions are used to process error return 49 * codes from functions called as part of a scrub operation. 50 * 51 * If there's no error, we return true to tell the caller that it's ok 52 * to move on to the next check in its list. 53 * 54 * For non-verifier errors (e.g. ENOMEM) we return false to tell the 55 * caller that something bad happened, and we preserve *error so that 56 * the caller can return the *error up the stack to userspace. 57 * 58 * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting 59 * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, 60 * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, 61 * not via return codes. We return false to tell the caller that 62 * something bad happened. Since the error has been cleared, the caller 63 * will (presumably) return that zero and scrubbing will move on to 64 * whatever's next. 65 * 66 * ftrace can be used to record the precise metadata location and the 67 * approximate code location of the failed operation. 68 */ 69 70 /* Check for operational errors. */ 71 static bool 72 __xchk_process_error( 73 struct xfs_scrub *sc, 74 xfs_agnumber_t agno, 75 xfs_agblock_t bno, 76 int *error, 77 __u32 errflag, 78 void *ret_ip) 79 { 80 switch (*error) { 81 case 0: 82 return true; 83 case -EDEADLOCK: 84 case -ECHRNG: 85 /* Used to restart an op with deadlock avoidance. */ 86 trace_xchk_deadlock_retry( 87 sc->ip ? sc->ip : XFS_I(file_inode(sc->file)), 88 sc->sm, *error); 89 break; 90 case -ECANCELED: 91 /* 92 * ECANCELED here means that the caller set one of the scrub 93 * outcome flags (corrupt, xfail, xcorrupt) and wants to exit 94 * quickly. Set error to zero and do not continue. 95 */ 96 trace_xchk_op_error(sc, agno, bno, *error, ret_ip); 97 *error = 0; 98 break; 99 case -EFSBADCRC: 100 case -EFSCORRUPTED: 101 /* Note the badness but don't abort. */ 102 sc->sm->sm_flags |= errflag; 103 *error = 0; 104 fallthrough; 105 default: 106 trace_xchk_op_error(sc, agno, bno, *error, ret_ip); 107 break; 108 } 109 return false; 110 } 111 112 bool 113 xchk_process_error( 114 struct xfs_scrub *sc, 115 xfs_agnumber_t agno, 116 xfs_agblock_t bno, 117 int *error) 118 { 119 return __xchk_process_error(sc, agno, bno, error, 120 XFS_SCRUB_OFLAG_CORRUPT, __return_address); 121 } 122 123 bool 124 xchk_xref_process_error( 125 struct xfs_scrub *sc, 126 xfs_agnumber_t agno, 127 xfs_agblock_t bno, 128 int *error) 129 { 130 return __xchk_process_error(sc, agno, bno, error, 131 XFS_SCRUB_OFLAG_XFAIL, __return_address); 132 } 133 134 /* Check for operational errors for a file offset. */ 135 static bool 136 __xchk_fblock_process_error( 137 struct xfs_scrub *sc, 138 int whichfork, 139 xfs_fileoff_t offset, 140 int *error, 141 __u32 errflag, 142 void *ret_ip) 143 { 144 switch (*error) { 145 case 0: 146 return true; 147 case -EDEADLOCK: 148 case -ECHRNG: 149 /* Used to restart an op with deadlock avoidance. */ 150 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 151 break; 152 case -ECANCELED: 153 /* 154 * ECANCELED here means that the caller set one of the scrub 155 * outcome flags (corrupt, xfail, xcorrupt) and wants to exit 156 * quickly. Set error to zero and do not continue. 157 */ 158 trace_xchk_file_op_error(sc, whichfork, offset, *error, 159 ret_ip); 160 *error = 0; 161 break; 162 case -EFSBADCRC: 163 case -EFSCORRUPTED: 164 /* Note the badness but don't abort. */ 165 sc->sm->sm_flags |= errflag; 166 *error = 0; 167 fallthrough; 168 default: 169 trace_xchk_file_op_error(sc, whichfork, offset, *error, 170 ret_ip); 171 break; 172 } 173 return false; 174 } 175 176 bool 177 xchk_fblock_process_error( 178 struct xfs_scrub *sc, 179 int whichfork, 180 xfs_fileoff_t offset, 181 int *error) 182 { 183 return __xchk_fblock_process_error(sc, whichfork, offset, error, 184 XFS_SCRUB_OFLAG_CORRUPT, __return_address); 185 } 186 187 bool 188 xchk_fblock_xref_process_error( 189 struct xfs_scrub *sc, 190 int whichfork, 191 xfs_fileoff_t offset, 192 int *error) 193 { 194 return __xchk_fblock_process_error(sc, whichfork, offset, error, 195 XFS_SCRUB_OFLAG_XFAIL, __return_address); 196 } 197 198 /* 199 * Handling scrub corruption/optimization/warning checks. 200 * 201 * The *_set_{corrupt,preen,warning}() family of functions are used to 202 * record the presence of metadata that is incorrect (corrupt), could be 203 * optimized somehow (preen), or should be flagged for administrative 204 * review but is not incorrect (warn). 205 * 206 * ftrace can be used to record the precise metadata location and 207 * approximate code location of the failed check. 208 */ 209 210 /* Record a block which could be optimized. */ 211 void 212 xchk_block_set_preen( 213 struct xfs_scrub *sc, 214 struct xfs_buf *bp) 215 { 216 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 217 trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address); 218 } 219 220 /* 221 * Record an inode which could be optimized. The trace data will 222 * include the block given by bp if bp is given; otherwise it will use 223 * the block location of the inode record itself. 224 */ 225 void 226 xchk_ino_set_preen( 227 struct xfs_scrub *sc, 228 xfs_ino_t ino) 229 { 230 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 231 trace_xchk_ino_preen(sc, ino, __return_address); 232 } 233 234 /* Record something being wrong with the filesystem primary superblock. */ 235 void 236 xchk_set_corrupt( 237 struct xfs_scrub *sc) 238 { 239 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 240 trace_xchk_fs_error(sc, 0, __return_address); 241 } 242 243 /* Record a corrupt block. */ 244 void 245 xchk_block_set_corrupt( 246 struct xfs_scrub *sc, 247 struct xfs_buf *bp) 248 { 249 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 250 trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); 251 } 252 253 #ifdef CONFIG_XFS_QUOTA 254 /* Record a corrupt quota counter. */ 255 void 256 xchk_qcheck_set_corrupt( 257 struct xfs_scrub *sc, 258 unsigned int dqtype, 259 xfs_dqid_t id) 260 { 261 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 262 trace_xchk_qcheck_error(sc, dqtype, id, __return_address); 263 } 264 #endif 265 266 /* Record a corruption while cross-referencing. */ 267 void 268 xchk_block_xref_set_corrupt( 269 struct xfs_scrub *sc, 270 struct xfs_buf *bp) 271 { 272 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 273 trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); 274 } 275 276 /* 277 * Record a corrupt inode. The trace data will include the block given 278 * by bp if bp is given; otherwise it will use the block location of the 279 * inode record itself. 280 */ 281 void 282 xchk_ino_set_corrupt( 283 struct xfs_scrub *sc, 284 xfs_ino_t ino) 285 { 286 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 287 trace_xchk_ino_error(sc, ino, __return_address); 288 } 289 290 /* Record a corruption while cross-referencing with an inode. */ 291 void 292 xchk_ino_xref_set_corrupt( 293 struct xfs_scrub *sc, 294 xfs_ino_t ino) 295 { 296 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 297 trace_xchk_ino_error(sc, ino, __return_address); 298 } 299 300 /* Record corruption in a block indexed by a file fork. */ 301 void 302 xchk_fblock_set_corrupt( 303 struct xfs_scrub *sc, 304 int whichfork, 305 xfs_fileoff_t offset) 306 { 307 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 308 trace_xchk_fblock_error(sc, whichfork, offset, __return_address); 309 } 310 311 /* Record a corruption while cross-referencing a fork block. */ 312 void 313 xchk_fblock_xref_set_corrupt( 314 struct xfs_scrub *sc, 315 int whichfork, 316 xfs_fileoff_t offset) 317 { 318 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 319 trace_xchk_fblock_error(sc, whichfork, offset, __return_address); 320 } 321 322 /* 323 * Warn about inodes that need administrative review but is not 324 * incorrect. 325 */ 326 void 327 xchk_ino_set_warning( 328 struct xfs_scrub *sc, 329 xfs_ino_t ino) 330 { 331 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 332 trace_xchk_ino_warning(sc, ino, __return_address); 333 } 334 335 /* Warn about a block indexed by a file fork that needs review. */ 336 void 337 xchk_fblock_set_warning( 338 struct xfs_scrub *sc, 339 int whichfork, 340 xfs_fileoff_t offset) 341 { 342 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 343 trace_xchk_fblock_warning(sc, whichfork, offset, __return_address); 344 } 345 346 /* Signal an incomplete scrub. */ 347 void 348 xchk_set_incomplete( 349 struct xfs_scrub *sc) 350 { 351 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 352 trace_xchk_incomplete(sc, __return_address); 353 } 354 355 /* 356 * rmap scrubbing -- compute the number of blocks with a given owner, 357 * at least according to the reverse mapping data. 358 */ 359 360 struct xchk_rmap_ownedby_info { 361 const struct xfs_owner_info *oinfo; 362 xfs_filblks_t *blocks; 363 }; 364 365 STATIC int 366 xchk_count_rmap_ownedby_irec( 367 struct xfs_btree_cur *cur, 368 const struct xfs_rmap_irec *rec, 369 void *priv) 370 { 371 struct xchk_rmap_ownedby_info *sroi = priv; 372 bool irec_attr; 373 bool oinfo_attr; 374 375 irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; 376 oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; 377 378 if (rec->rm_owner != sroi->oinfo->oi_owner) 379 return 0; 380 381 if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) 382 (*sroi->blocks) += rec->rm_blockcount; 383 384 return 0; 385 } 386 387 /* 388 * Calculate the number of blocks the rmap thinks are owned by something. 389 * The caller should pass us an rmapbt cursor. 390 */ 391 int 392 xchk_count_rmap_ownedby_ag( 393 struct xfs_scrub *sc, 394 struct xfs_btree_cur *cur, 395 const struct xfs_owner_info *oinfo, 396 xfs_filblks_t *blocks) 397 { 398 struct xchk_rmap_ownedby_info sroi = { 399 .oinfo = oinfo, 400 .blocks = blocks, 401 }; 402 403 *blocks = 0; 404 return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec, 405 &sroi); 406 } 407 408 /* 409 * AG scrubbing 410 * 411 * These helpers facilitate locking an allocation group's header 412 * buffers, setting up cursors for all btrees that are present, and 413 * cleaning everything up once we're through. 414 */ 415 416 /* Decide if we want to return an AG header read failure. */ 417 static inline bool 418 want_ag_read_header_failure( 419 struct xfs_scrub *sc, 420 unsigned int type) 421 { 422 /* Return all AG header read failures when scanning btrees. */ 423 if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && 424 sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL && 425 sc->sm->sm_type != XFS_SCRUB_TYPE_AGI) 426 return true; 427 /* 428 * If we're scanning a given type of AG header, we only want to 429 * see read failures from that specific header. We'd like the 430 * other headers to cross-check them, but this isn't required. 431 */ 432 if (sc->sm->sm_type == type) 433 return true; 434 return false; 435 } 436 437 /* 438 * Grab the AG header buffers for the attached perag structure. 439 * 440 * The headers should be released by xchk_ag_free, but as a fail safe we attach 441 * all the buffers we grab to the scrub transaction so they'll all be freed 442 * when we cancel it. 443 */ 444 static inline int 445 xchk_perag_read_headers( 446 struct xfs_scrub *sc, 447 struct xchk_ag *sa) 448 { 449 int error; 450 451 error = xfs_ialloc_read_agi(sa->pag, sc->tp, 0, &sa->agi_bp); 452 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) 453 return error; 454 455 error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp); 456 if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) 457 return error; 458 459 return 0; 460 } 461 462 /* 463 * Grab the AG headers for the attached perag structure and wait for pending 464 * intents to drain. 465 */ 466 int 467 xchk_perag_drain_and_lock( 468 struct xfs_scrub *sc) 469 { 470 struct xchk_ag *sa = &sc->sa; 471 int error = 0; 472 473 ASSERT(sa->pag != NULL); 474 ASSERT(sa->agi_bp == NULL); 475 ASSERT(sa->agf_bp == NULL); 476 477 do { 478 if (xchk_should_terminate(sc, &error)) 479 return error; 480 481 error = xchk_perag_read_headers(sc, sa); 482 if (error) 483 return error; 484 485 /* 486 * If we've grabbed an inode for scrubbing then we assume that 487 * holding its ILOCK will suffice to coordinate with any intent 488 * chains involving this inode. 489 */ 490 if (sc->ip) 491 return 0; 492 493 /* 494 * Decide if this AG is quiet enough for all metadata to be 495 * consistent with each other. XFS allows the AG header buffer 496 * locks to cycle across transaction rolls while processing 497 * chains of deferred ops, which means that there could be 498 * other threads in the middle of processing a chain of 499 * deferred ops. For regular operations we are careful about 500 * ordering operations to prevent collisions between threads 501 * (which is why we don't need a per-AG lock), but scrub and 502 * repair have to serialize against chained operations. 503 * 504 * We just locked all the AG headers buffers; now take a look 505 * to see if there are any intents in progress. If there are, 506 * drop the AG headers and wait for the intents to drain. 507 * Since we hold all the AG header locks for the duration of 508 * the scrub, this is the only time we have to sample the 509 * intents counter; any threads increasing it after this point 510 * can't possibly be in the middle of a chain of AG metadata 511 * updates. 512 * 513 * Obviously, this should be slanted against scrub and in favor 514 * of runtime threads. 515 */ 516 if (!xfs_perag_intent_busy(sa->pag)) 517 return 0; 518 519 if (sa->agf_bp) { 520 xfs_trans_brelse(sc->tp, sa->agf_bp); 521 sa->agf_bp = NULL; 522 } 523 524 if (sa->agi_bp) { 525 xfs_trans_brelse(sc->tp, sa->agi_bp); 526 sa->agi_bp = NULL; 527 } 528 529 if (!(sc->flags & XCHK_FSGATES_DRAIN)) 530 return -ECHRNG; 531 error = xfs_perag_intent_drain(sa->pag); 532 if (error == -ERESTARTSYS) 533 error = -EINTR; 534 } while (!error); 535 536 return error; 537 } 538 539 /* 540 * Grab the per-AG structure, grab all AG header buffers, and wait until there 541 * aren't any pending intents. Returns -ENOENT if we can't grab the perag 542 * structure. 543 */ 544 int 545 xchk_ag_read_headers( 546 struct xfs_scrub *sc, 547 xfs_agnumber_t agno, 548 struct xchk_ag *sa) 549 { 550 struct xfs_mount *mp = sc->mp; 551 552 ASSERT(!sa->pag); 553 sa->pag = xfs_perag_get(mp, agno); 554 if (!sa->pag) 555 return -ENOENT; 556 557 return xchk_perag_drain_and_lock(sc); 558 } 559 560 /* Release all the AG btree cursors. */ 561 void 562 xchk_ag_btcur_free( 563 struct xchk_ag *sa) 564 { 565 if (sa->refc_cur) 566 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); 567 if (sa->rmap_cur) 568 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR); 569 if (sa->fino_cur) 570 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR); 571 if (sa->ino_cur) 572 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR); 573 if (sa->cnt_cur) 574 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR); 575 if (sa->bno_cur) 576 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR); 577 578 sa->refc_cur = NULL; 579 sa->rmap_cur = NULL; 580 sa->fino_cur = NULL; 581 sa->ino_cur = NULL; 582 sa->bno_cur = NULL; 583 sa->cnt_cur = NULL; 584 } 585 586 /* Initialize all the btree cursors for an AG. */ 587 void 588 xchk_ag_btcur_init( 589 struct xfs_scrub *sc, 590 struct xchk_ag *sa) 591 { 592 struct xfs_mount *mp = sc->mp; 593 594 if (sa->agf_bp) { 595 /* Set up a bnobt cursor for cross-referencing. */ 596 sa->bno_cur = xfs_bnobt_init_cursor(mp, sc->tp, sa->agf_bp, 597 sa->pag); 598 xchk_ag_btree_del_cursor_if_sick(sc, &sa->bno_cur, 599 XFS_SCRUB_TYPE_BNOBT); 600 601 /* Set up a cntbt cursor for cross-referencing. */ 602 sa->cnt_cur = xfs_cntbt_init_cursor(mp, sc->tp, sa->agf_bp, 603 sa->pag); 604 xchk_ag_btree_del_cursor_if_sick(sc, &sa->cnt_cur, 605 XFS_SCRUB_TYPE_CNTBT); 606 607 /* Set up a rmapbt cursor for cross-referencing. */ 608 if (xfs_has_rmapbt(mp)) { 609 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, 610 sa->agf_bp, sa->pag); 611 xchk_ag_btree_del_cursor_if_sick(sc, &sa->rmap_cur, 612 XFS_SCRUB_TYPE_RMAPBT); 613 } 614 615 /* Set up a refcountbt cursor for cross-referencing. */ 616 if (xfs_has_reflink(mp)) { 617 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, 618 sa->agf_bp, sa->pag); 619 xchk_ag_btree_del_cursor_if_sick(sc, &sa->refc_cur, 620 XFS_SCRUB_TYPE_REFCNTBT); 621 } 622 } 623 624 if (sa->agi_bp) { 625 /* Set up a inobt cursor for cross-referencing. */ 626 sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, 627 sa->agi_bp); 628 xchk_ag_btree_del_cursor_if_sick(sc, &sa->ino_cur, 629 XFS_SCRUB_TYPE_INOBT); 630 631 /* Set up a finobt cursor for cross-referencing. */ 632 if (xfs_has_finobt(mp)) { 633 sa->fino_cur = xfs_finobt_init_cursor(sa->pag, sc->tp, 634 sa->agi_bp); 635 xchk_ag_btree_del_cursor_if_sick(sc, &sa->fino_cur, 636 XFS_SCRUB_TYPE_FINOBT); 637 } 638 } 639 } 640 641 /* Release the AG header context and btree cursors. */ 642 void 643 xchk_ag_free( 644 struct xfs_scrub *sc, 645 struct xchk_ag *sa) 646 { 647 xchk_ag_btcur_free(sa); 648 xrep_reset_perag_resv(sc); 649 if (sa->agf_bp) { 650 xfs_trans_brelse(sc->tp, sa->agf_bp); 651 sa->agf_bp = NULL; 652 } 653 if (sa->agi_bp) { 654 xfs_trans_brelse(sc->tp, sa->agi_bp); 655 sa->agi_bp = NULL; 656 } 657 if (sa->pag) { 658 xfs_perag_put(sa->pag); 659 sa->pag = NULL; 660 } 661 } 662 663 /* 664 * For scrub, grab the perag structure, the AGI, and the AGF headers, in that 665 * order. Locking order requires us to get the AGI before the AGF. We use the 666 * transaction to avoid deadlocking on crosslinked metadata buffers; either the 667 * caller passes one in (bmap scrub) or we have to create a transaction 668 * ourselves. Returns ENOENT if the perag struct cannot be grabbed. 669 */ 670 int 671 xchk_ag_init( 672 struct xfs_scrub *sc, 673 xfs_agnumber_t agno, 674 struct xchk_ag *sa) 675 { 676 int error; 677 678 error = xchk_ag_read_headers(sc, agno, sa); 679 if (error) 680 return error; 681 682 xchk_ag_btcur_init(sc, sa); 683 return 0; 684 } 685 686 /* Per-scrubber setup functions */ 687 688 void 689 xchk_trans_cancel( 690 struct xfs_scrub *sc) 691 { 692 xfs_trans_cancel(sc->tp); 693 sc->tp = NULL; 694 } 695 696 int 697 xchk_trans_alloc_empty( 698 struct xfs_scrub *sc) 699 { 700 return xfs_trans_alloc_empty(sc->mp, &sc->tp); 701 } 702 703 /* 704 * Grab an empty transaction so that we can re-grab locked buffers if 705 * one of our btrees turns out to be cyclic. 706 * 707 * If we're going to repair something, we need to ask for the largest possible 708 * log reservation so that we can handle the worst case scenario for metadata 709 * updates while rebuilding a metadata item. We also need to reserve as many 710 * blocks in the head transaction as we think we're going to need to rebuild 711 * the metadata object. 712 */ 713 int 714 xchk_trans_alloc( 715 struct xfs_scrub *sc, 716 uint resblks) 717 { 718 if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 719 return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, 720 resblks, 0, 0, &sc->tp); 721 722 return xchk_trans_alloc_empty(sc); 723 } 724 725 /* Set us up with a transaction and an empty context. */ 726 int 727 xchk_setup_fs( 728 struct xfs_scrub *sc) 729 { 730 uint resblks; 731 732 resblks = xrep_calc_ag_resblks(sc); 733 return xchk_trans_alloc(sc, resblks); 734 } 735 736 /* Set us up with AG headers and btree cursors. */ 737 int 738 xchk_setup_ag_btree( 739 struct xfs_scrub *sc, 740 bool force_log) 741 { 742 struct xfs_mount *mp = sc->mp; 743 int error; 744 745 /* 746 * If the caller asks us to checkpont the log, do so. This 747 * expensive operation should be performed infrequently and only 748 * as a last resort. Any caller that sets force_log should 749 * document why they need to do so. 750 */ 751 if (force_log) { 752 error = xchk_checkpoint_log(mp); 753 if (error) 754 return error; 755 } 756 757 error = xchk_setup_fs(sc); 758 if (error) 759 return error; 760 761 return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa); 762 } 763 764 /* Push everything out of the log onto disk. */ 765 int 766 xchk_checkpoint_log( 767 struct xfs_mount *mp) 768 { 769 int error; 770 771 error = xfs_log_force(mp, XFS_LOG_SYNC); 772 if (error) 773 return error; 774 xfs_ail_push_all_sync(mp->m_ail); 775 return 0; 776 } 777 778 /* Verify that an inode is allocated ondisk, then return its cached inode. */ 779 int 780 xchk_iget( 781 struct xfs_scrub *sc, 782 xfs_ino_t inum, 783 struct xfs_inode **ipp) 784 { 785 ASSERT(sc->tp != NULL); 786 787 return xfs_iget(sc->mp, sc->tp, inum, XCHK_IGET_FLAGS, 0, ipp); 788 } 789 790 /* 791 * Try to grab an inode in a manner that avoids races with physical inode 792 * allocation. If we can't, return the locked AGI buffer so that the caller 793 * can single-step the loading process to see where things went wrong. 794 * Callers must have a valid scrub transaction. 795 * 796 * If the iget succeeds, return 0, a NULL AGI, and the inode. 797 * 798 * If the iget fails, return the error, the locked AGI, and a NULL inode. This 799 * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are 800 * no longer allocated; or any other corruption or runtime error. 801 * 802 * If the AGI read fails, return the error, a NULL AGI, and NULL inode. 803 * 804 * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode. 805 */ 806 int 807 xchk_iget_agi( 808 struct xfs_scrub *sc, 809 xfs_ino_t inum, 810 struct xfs_buf **agi_bpp, 811 struct xfs_inode **ipp) 812 { 813 struct xfs_mount *mp = sc->mp; 814 struct xfs_trans *tp = sc->tp; 815 struct xfs_perag *pag; 816 int error; 817 818 ASSERT(sc->tp != NULL); 819 820 again: 821 *agi_bpp = NULL; 822 *ipp = NULL; 823 error = 0; 824 825 if (xchk_should_terminate(sc, &error)) 826 return error; 827 828 /* 829 * Attach the AGI buffer to the scrub transaction to avoid deadlocks 830 * in the iget cache miss path. 831 */ 832 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 833 error = xfs_ialloc_read_agi(pag, tp, 0, agi_bpp); 834 xfs_perag_put(pag); 835 if (error) 836 return error; 837 838 error = xfs_iget(mp, tp, inum, XFS_IGET_NORETRY | XCHK_IGET_FLAGS, 0, 839 ipp); 840 if (error == -EAGAIN) { 841 /* 842 * The inode may be in core but temporarily unavailable and may 843 * require the AGI buffer before it can be returned. Drop the 844 * AGI buffer and retry the lookup. 845 * 846 * Incore lookup will fail with EAGAIN on a cache hit if the 847 * inode is queued to the inactivation list. The inactivation 848 * worker may remove the inode from the unlinked list and hence 849 * needs the AGI. 850 * 851 * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN 852 * to allow inodegc to make progress and move the inode to 853 * IRECLAIMABLE state where xfs_iget will be able to return it 854 * again if it can lock the inode. 855 */ 856 xfs_trans_brelse(tp, *agi_bpp); 857 delay(1); 858 goto again; 859 } 860 if (error) 861 return error; 862 863 /* We got the inode, so we can release the AGI. */ 864 ASSERT(*ipp != NULL); 865 xfs_trans_brelse(tp, *agi_bpp); 866 *agi_bpp = NULL; 867 return 0; 868 } 869 870 #ifdef CONFIG_XFS_QUOTA 871 /* 872 * Try to attach dquots to this inode if we think we might want to repair it. 873 * Callers must not hold any ILOCKs. If the dquots are broken and cannot be 874 * attached, a quotacheck will be scheduled. 875 */ 876 int 877 xchk_ino_dqattach( 878 struct xfs_scrub *sc) 879 { 880 ASSERT(sc->tp != NULL); 881 ASSERT(sc->ip != NULL); 882 883 if (!xchk_could_repair(sc)) 884 return 0; 885 886 return xrep_ino_dqattach(sc); 887 } 888 #endif 889 890 /* Install an inode that we opened by handle for scrubbing. */ 891 int 892 xchk_install_handle_inode( 893 struct xfs_scrub *sc, 894 struct xfs_inode *ip) 895 { 896 if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { 897 xchk_irele(sc, ip); 898 return -ENOENT; 899 } 900 901 sc->ip = ip; 902 return 0; 903 } 904 905 /* 906 * Install an already-referenced inode for scrubbing. Get our own reference to 907 * the inode to make disposal simpler. The inode must not be in I_FREEING or 908 * I_WILL_FREE state! 909 */ 910 int 911 xchk_install_live_inode( 912 struct xfs_scrub *sc, 913 struct xfs_inode *ip) 914 { 915 if (!igrab(VFS_I(ip))) { 916 xchk_ino_set_corrupt(sc, ip->i_ino); 917 return -EFSCORRUPTED; 918 } 919 920 sc->ip = ip; 921 return 0; 922 } 923 924 /* 925 * In preparation to scrub metadata structures that hang off of an inode, 926 * grab either the inode referenced in the scrub control structure or the 927 * inode passed in. If the inumber does not reference an allocated inode 928 * record, the function returns ENOENT to end the scrub early. The inode 929 * is not locked. 930 */ 931 int 932 xchk_iget_for_scrubbing( 933 struct xfs_scrub *sc) 934 { 935 struct xfs_imap imap; 936 struct xfs_mount *mp = sc->mp; 937 struct xfs_perag *pag; 938 struct xfs_buf *agi_bp; 939 struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); 940 struct xfs_inode *ip = NULL; 941 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino); 942 int error; 943 944 ASSERT(sc->tp == NULL); 945 946 /* We want to scan the inode we already had opened. */ 947 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) 948 return xchk_install_live_inode(sc, ip_in); 949 950 /* Reject internal metadata files and obviously bad inode numbers. */ 951 if (xfs_internal_inum(mp, sc->sm->sm_ino)) 952 return -ENOENT; 953 if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) 954 return -ENOENT; 955 956 /* Try a safe untrusted iget. */ 957 error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip); 958 if (!error) 959 return xchk_install_handle_inode(sc, ip); 960 if (error == -ENOENT) 961 return error; 962 if (error != -EINVAL) 963 goto out_error; 964 965 /* 966 * EINVAL with IGET_UNTRUSTED probably means one of several things: 967 * userspace gave us an inode number that doesn't correspond to fs 968 * space; the inode btree lacks a record for this inode; or there is a 969 * record, and it says this inode is free. 970 * 971 * We want to look up this inode in the inobt to distinguish two 972 * scenarios: (1) the inobt says the inode is free, in which case 973 * there's nothing to do; and (2) the inobt says the inode is 974 * allocated, but loading it failed due to corruption. 975 * 976 * Allocate a transaction and grab the AGI to prevent inobt activity 977 * in this AG. Retry the iget in case someone allocated a new inode 978 * after the first iget failed. 979 */ 980 error = xchk_trans_alloc(sc, 0); 981 if (error) 982 goto out_error; 983 984 error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip); 985 if (error == 0) { 986 /* Actually got the inode, so install it. */ 987 xchk_trans_cancel(sc); 988 return xchk_install_handle_inode(sc, ip); 989 } 990 if (error == -ENOENT) 991 goto out_gone; 992 if (error != -EINVAL) 993 goto out_cancel; 994 995 /* Ensure that we have protected against inode allocation/freeing. */ 996 if (agi_bp == NULL) { 997 ASSERT(agi_bp != NULL); 998 error = -ECANCELED; 999 goto out_cancel; 1000 } 1001 1002 /* 1003 * Untrusted iget failed a second time. Let's try an inobt lookup. 1004 * If the inobt thinks this the inode neither can exist inside the 1005 * filesystem nor is allocated, return ENOENT to signal that the check 1006 * can be skipped. 1007 * 1008 * If the lookup returns corruption, we'll mark this inode corrupt and 1009 * exit to userspace. There's little chance of fixing anything until 1010 * the inobt is straightened out, but there's nothing we can do here. 1011 * 1012 * If the lookup encounters any other error, exit to userspace. 1013 * 1014 * If the lookup succeeds, something else must be very wrong in the fs 1015 * such that setting up the incore inode failed in some strange way. 1016 * Treat those as corruptions. 1017 */ 1018 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); 1019 if (!pag) { 1020 error = -EFSCORRUPTED; 1021 goto out_cancel; 1022 } 1023 1024 error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, 1025 XFS_IGET_UNTRUSTED); 1026 xfs_perag_put(pag); 1027 if (error == -EINVAL || error == -ENOENT) 1028 goto out_gone; 1029 if (!error) 1030 error = -EFSCORRUPTED; 1031 1032 out_cancel: 1033 xchk_trans_cancel(sc); 1034 out_error: 1035 trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 1036 error, __return_address); 1037 return error; 1038 out_gone: 1039 /* The file is gone, so there's nothing to check. */ 1040 xchk_trans_cancel(sc); 1041 return -ENOENT; 1042 } 1043 1044 /* Release an inode, possibly dropping it in the process. */ 1045 void 1046 xchk_irele( 1047 struct xfs_scrub *sc, 1048 struct xfs_inode *ip) 1049 { 1050 if (sc->tp) { 1051 /* 1052 * If we are in a transaction, we /cannot/ drop the inode 1053 * ourselves, because the VFS will trigger writeback, which 1054 * can require a transaction. Clear DONTCACHE to force the 1055 * inode to the LRU, where someone else can take care of 1056 * dropping it. 1057 * 1058 * Note that when we grabbed our reference to the inode, it 1059 * could have had an active ref and DONTCACHE set if a sysadmin 1060 * is trying to coerce a change in file access mode. icache 1061 * hits do not clear DONTCACHE, so we must do it here. 1062 */ 1063 spin_lock(&VFS_I(ip)->i_lock); 1064 VFS_I(ip)->i_state &= ~I_DONTCACHE; 1065 spin_unlock(&VFS_I(ip)->i_lock); 1066 } 1067 1068 xfs_irele(ip); 1069 } 1070 1071 /* 1072 * Set us up to scrub metadata mapped by a file's fork. Callers must not use 1073 * this to operate on user-accessible regular file data because the MMAPLOCK is 1074 * not taken. 1075 */ 1076 int 1077 xchk_setup_inode_contents( 1078 struct xfs_scrub *sc, 1079 unsigned int resblks) 1080 { 1081 int error; 1082 1083 error = xchk_iget_for_scrubbing(sc); 1084 if (error) 1085 return error; 1086 1087 /* Lock the inode so the VFS cannot touch this file. */ 1088 xchk_ilock(sc, XFS_IOLOCK_EXCL); 1089 1090 error = xchk_trans_alloc(sc, resblks); 1091 if (error) 1092 goto out; 1093 1094 error = xchk_ino_dqattach(sc); 1095 if (error) 1096 goto out; 1097 1098 xchk_ilock(sc, XFS_ILOCK_EXCL); 1099 out: 1100 /* scrub teardown will unlock and release the inode for us */ 1101 return error; 1102 } 1103 1104 void 1105 xchk_ilock( 1106 struct xfs_scrub *sc, 1107 unsigned int ilock_flags) 1108 { 1109 xfs_ilock(sc->ip, ilock_flags); 1110 sc->ilock_flags |= ilock_flags; 1111 } 1112 1113 bool 1114 xchk_ilock_nowait( 1115 struct xfs_scrub *sc, 1116 unsigned int ilock_flags) 1117 { 1118 if (xfs_ilock_nowait(sc->ip, ilock_flags)) { 1119 sc->ilock_flags |= ilock_flags; 1120 return true; 1121 } 1122 1123 return false; 1124 } 1125 1126 void 1127 xchk_iunlock( 1128 struct xfs_scrub *sc, 1129 unsigned int ilock_flags) 1130 { 1131 sc->ilock_flags &= ~ilock_flags; 1132 xfs_iunlock(sc->ip, ilock_flags); 1133 } 1134 1135 /* 1136 * Predicate that decides if we need to evaluate the cross-reference check. 1137 * If there was an error accessing the cross-reference btree, just delete 1138 * the cursor and skip the check. 1139 */ 1140 bool 1141 xchk_should_check_xref( 1142 struct xfs_scrub *sc, 1143 int *error, 1144 struct xfs_btree_cur **curpp) 1145 { 1146 /* No point in xref if we already know we're corrupt. */ 1147 if (xchk_skip_xref(sc->sm)) 1148 return false; 1149 1150 if (*error == 0) 1151 return true; 1152 1153 if (curpp) { 1154 /* If we've already given up on xref, just bail out. */ 1155 if (!*curpp) 1156 return false; 1157 1158 /* xref error, delete cursor and bail out. */ 1159 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); 1160 *curpp = NULL; 1161 } 1162 1163 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 1164 trace_xchk_xref_error(sc, *error, __return_address); 1165 1166 /* 1167 * Errors encountered during cross-referencing with another 1168 * data structure should not cause this scrubber to abort. 1169 */ 1170 *error = 0; 1171 return false; 1172 } 1173 1174 /* Run the structure verifiers on in-memory buffers to detect bad memory. */ 1175 void 1176 xchk_buffer_recheck( 1177 struct xfs_scrub *sc, 1178 struct xfs_buf *bp) 1179 { 1180 xfs_failaddr_t fa; 1181 1182 if (bp->b_ops == NULL) { 1183 xchk_block_set_corrupt(sc, bp); 1184 return; 1185 } 1186 if (bp->b_ops->verify_struct == NULL) { 1187 xchk_set_incomplete(sc); 1188 return; 1189 } 1190 fa = bp->b_ops->verify_struct(bp); 1191 if (!fa) 1192 return; 1193 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 1194 trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa); 1195 } 1196 1197 static inline int 1198 xchk_metadata_inode_subtype( 1199 struct xfs_scrub *sc, 1200 unsigned int scrub_type) 1201 { 1202 struct xfs_scrub_subord *sub; 1203 int error; 1204 1205 sub = xchk_scrub_create_subord(sc, scrub_type); 1206 error = sub->sc.ops->scrub(&sub->sc); 1207 xchk_scrub_free_subord(sub); 1208 return error; 1209 } 1210 1211 /* 1212 * Scrub the attr/data forks of a metadata inode. The metadata inode must be 1213 * pointed to by sc->ip and the ILOCK must be held. 1214 */ 1215 int 1216 xchk_metadata_inode_forks( 1217 struct xfs_scrub *sc) 1218 { 1219 bool shared; 1220 int error; 1221 1222 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 1223 return 0; 1224 1225 /* Check the inode record. */ 1226 error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE); 1227 if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 1228 return error; 1229 1230 /* Metadata inodes don't live on the rt device. */ 1231 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) { 1232 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1233 return 0; 1234 } 1235 1236 /* They should never participate in reflink. */ 1237 if (xfs_is_reflink_inode(sc->ip)) { 1238 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1239 return 0; 1240 } 1241 1242 /* They also should never have extended attributes. */ 1243 if (xfs_inode_hasattr(sc->ip)) { 1244 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1245 return 0; 1246 } 1247 1248 /* Invoke the data fork scrubber. */ 1249 error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); 1250 if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 1251 return error; 1252 1253 /* Look for incorrect shared blocks. */ 1254 if (xfs_has_reflink(sc->mp)) { 1255 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 1256 &shared); 1257 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, 1258 &error)) 1259 return error; 1260 if (shared) 1261 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1262 } 1263 1264 return 0; 1265 } 1266 1267 /* 1268 * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub 1269 * operation. Callers must not hold any locks that intersect with the CPU 1270 * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs 1271 * to change kernel code. 1272 */ 1273 void 1274 xchk_fsgates_enable( 1275 struct xfs_scrub *sc, 1276 unsigned int scrub_fsgates) 1277 { 1278 ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL)); 1279 ASSERT(!(sc->flags & scrub_fsgates)); 1280 1281 trace_xchk_fsgates_enable(sc, scrub_fsgates); 1282 1283 if (scrub_fsgates & XCHK_FSGATES_DRAIN) 1284 xfs_drain_wait_enable(); 1285 1286 if (scrub_fsgates & XCHK_FSGATES_QUOTA) 1287 xfs_dqtrx_hook_enable(); 1288 1289 if (scrub_fsgates & XCHK_FSGATES_DIRENTS) 1290 xfs_dir_hook_enable(); 1291 1292 if (scrub_fsgates & XCHK_FSGATES_RMAP) 1293 xfs_rmap_hook_enable(); 1294 1295 sc->flags |= scrub_fsgates; 1296 } 1297 1298 /* 1299 * Decide if this is this a cached inode that's also allocated. The caller 1300 * must hold a reference to an AG and the AGI buffer lock to prevent inodes 1301 * from being allocated or freed. 1302 * 1303 * Look up an inode by number in the given file system. If the inode number 1304 * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA. 1305 * If the inode is being reclaimed, return -ENODATA because we know the inode 1306 * cache cannot be updating the ondisk metadata. 1307 * 1308 * Otherwise, the incore inode is the one we want, and it is either live, 1309 * somewhere in the inactivation machinery, or reclaimable. The inode is 1310 * allocated if i_mode is nonzero. In all three cases, the cached inode will 1311 * be more up to date than the ondisk inode buffer, so we must use the incore 1312 * i_mode. 1313 */ 1314 int 1315 xchk_inode_is_allocated( 1316 struct xfs_scrub *sc, 1317 xfs_agino_t agino, 1318 bool *inuse) 1319 { 1320 struct xfs_mount *mp = sc->mp; 1321 struct xfs_perag *pag = sc->sa.pag; 1322 xfs_ino_t ino; 1323 struct xfs_inode *ip; 1324 int error; 1325 1326 /* caller must hold perag reference */ 1327 if (pag == NULL) { 1328 ASSERT(pag != NULL); 1329 return -EINVAL; 1330 } 1331 1332 /* caller must have AGI buffer */ 1333 if (sc->sa.agi_bp == NULL) { 1334 ASSERT(sc->sa.agi_bp != NULL); 1335 return -EINVAL; 1336 } 1337 1338 /* reject inode numbers outside existing AGs */ 1339 ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino); 1340 if (!xfs_verify_ino(mp, ino)) 1341 return -EINVAL; 1342 1343 error = -ENODATA; 1344 rcu_read_lock(); 1345 ip = radix_tree_lookup(&pag->pag_ici_root, agino); 1346 if (!ip) { 1347 /* cache miss */ 1348 goto out_rcu; 1349 } 1350 1351 /* 1352 * If the inode number doesn't match, the incore inode got reused 1353 * during an RCU grace period and the radix tree hasn't been updated. 1354 * This isn't the inode we want. 1355 */ 1356 spin_lock(&ip->i_flags_lock); 1357 if (ip->i_ino != ino) 1358 goto out_skip; 1359 1360 trace_xchk_inode_is_allocated(ip); 1361 1362 /* 1363 * We have an incore inode that matches the inode we want, and the 1364 * caller holds the perag structure and the AGI buffer. Let's check 1365 * our assumptions below: 1366 */ 1367 1368 #ifdef DEBUG 1369 /* 1370 * (1) If the incore inode is live (i.e. referenced from the dcache), 1371 * it will not be INEW, nor will it be in the inactivation or reclaim 1372 * machinery. The ondisk inode had better be allocated. This is the 1373 * most trivial case. 1374 */ 1375 if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE | 1376 XFS_INACTIVATING))) { 1377 /* live inode */ 1378 ASSERT(VFS_I(ip)->i_mode != 0); 1379 } 1380 1381 /* 1382 * If the incore inode is INEW, there are several possibilities: 1383 * 1384 * (2) For a file that is being created, note that we allocate the 1385 * ondisk inode before allocating, initializing, and adding the incore 1386 * inode to the radix tree. 1387 * 1388 * (3) If the incore inode is being recycled, the inode has to be 1389 * allocated because we don't allow freed inodes to be recycled. 1390 * Recycling doesn't touch i_mode. 1391 */ 1392 if (ip->i_flags & XFS_INEW) { 1393 /* created on disk already or recycling */ 1394 ASSERT(VFS_I(ip)->i_mode != 0); 1395 } 1396 1397 /* 1398 * (4) If the inode is queued for inactivation (NEED_INACTIVE) but 1399 * inactivation has not started (!INACTIVATING), it is still allocated. 1400 */ 1401 if ((ip->i_flags & XFS_NEED_INACTIVE) && 1402 !(ip->i_flags & XFS_INACTIVATING)) { 1403 /* definitely before difree */ 1404 ASSERT(VFS_I(ip)->i_mode != 0); 1405 } 1406 #endif 1407 1408 /* 1409 * If the incore inode is undergoing inactivation (INACTIVATING), there 1410 * are two possibilities: 1411 * 1412 * (5) It is before the point where it would get freed ondisk, in which 1413 * case i_mode is still nonzero. 1414 * 1415 * (6) It has already been freed, in which case i_mode is zero. 1416 * 1417 * We don't take the ILOCK here, but difree and dialloc update the AGI, 1418 * and we've taken the AGI buffer lock, which prevents that from 1419 * happening. 1420 */ 1421 1422 /* 1423 * (7) Inodes undergoing inactivation (INACTIVATING) or queued for 1424 * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still 1425 * reflects the ondisk state. 1426 */ 1427 1428 /* 1429 * (8) If the inode is in IFLUSHING, it's safe to query i_mode because 1430 * the flush code uses i_mode to format the ondisk inode. 1431 */ 1432 1433 /* 1434 * (9) If the inode is in IRECLAIM and was reachable via the radix 1435 * tree, it still has the same i_mode as it did before it entered 1436 * reclaim. The inode object is still alive because we hold the RCU 1437 * read lock. 1438 */ 1439 1440 *inuse = VFS_I(ip)->i_mode != 0; 1441 error = 0; 1442 1443 out_skip: 1444 spin_unlock(&ip->i_flags_lock); 1445 out_rcu: 1446 rcu_read_unlock(); 1447 return error; 1448 } 1449