1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_quota.h" 16 #include "xfs_qm.h" 17 #include "xfs_scrub.h" 18 #include "xfs_buf_mem.h" 19 #include "xfs_rmap.h" 20 #include "xfs_exchrange.h" 21 #include "xfs_exchmaps.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/trace.h" 25 #include "scrub/repair.h" 26 #include "scrub/health.h" 27 #include "scrub/stats.h" 28 #include "scrub/xfile.h" 29 #include "scrub/tempfile.h" 30 #include "scrub/orphanage.h" 31 32 /* 33 * Online Scrub and Repair 34 * 35 * Traditionally, XFS (the kernel driver) did not know how to check or 36 * repair on-disk data structures. That task was left to the xfs_check 37 * and xfs_repair tools, both of which require taking the filesystem 38 * offline for a thorough but time consuming examination. Online 39 * scrub & repair, on the other hand, enables us to check the metadata 40 * for obvious errors while carefully stepping around the filesystem's 41 * ongoing operations, locking rules, etc. 42 * 43 * Given that most XFS metadata consist of records stored in a btree, 44 * most of the checking functions iterate the btree blocks themselves 45 * looking for irregularities. When a record block is encountered, each 46 * record can be checked for obviously bad values. Record values can 47 * also be cross-referenced against other btrees to look for potential 48 * misunderstandings between pieces of metadata. 49 * 50 * It is expected that the checkers responsible for per-AG metadata 51 * structures will lock the AG headers (AGI, AGF, AGFL), iterate the 52 * metadata structure, and perform any relevant cross-referencing before 53 * unlocking the AG and returning the results to userspace. These 54 * scrubbers must not keep an AG locked for too long to avoid tying up 55 * the block and inode allocators. 56 * 57 * Block maps and b-trees rooted in an inode present a special challenge 58 * because they can involve extents from any AG. The general scrubber 59 * structure of lock -> check -> xref -> unlock still holds, but AG 60 * locking order rules /must/ be obeyed to avoid deadlocks. The 61 * ordering rule, of course, is that we must lock in increasing AG 62 * order. Helper functions are provided to track which AG headers we've 63 * already locked. If we detect an imminent locking order violation, we 64 * can signal a potential deadlock, in which case the scrubber can jump 65 * out to the top level, lock all the AGs in order, and retry the scrub. 66 * 67 * For file data (directories, extended attributes, symlinks) scrub, we 68 * can simply lock the inode and walk the data. For btree data 69 * (directories and attributes) we follow the same btree-scrubbing 70 * strategy outlined previously to check the records. 71 * 72 * We use a bit of trickery with transactions to avoid buffer deadlocks 73 * if there is a cycle in the metadata. The basic problem is that 74 * travelling down a btree involves locking the current buffer at each 75 * tree level. If a pointer should somehow point back to a buffer that 76 * we've already examined, we will deadlock due to the second buffer 77 * locking attempt. Note however that grabbing a buffer in transaction 78 * context links the locked buffer to the transaction. If we try to 79 * re-grab the buffer in the context of the same transaction, we avoid 80 * the second lock attempt and continue. Between the verifier and the 81 * scrubber, something will notice that something is amiss and report 82 * the corruption. Therefore, each scrubber will allocate an empty 83 * transaction, attach buffers to it, and cancel the transaction at the 84 * end of the scrub run. Cancelling a non-dirty transaction simply 85 * unlocks the buffers. 86 * 87 * There are four pieces of data that scrub can communicate to 88 * userspace. The first is the error code (errno), which can be used to 89 * communicate operational errors in performing the scrub. There are 90 * also three flags that can be set in the scrub context. If the data 91 * structure itself is corrupt, the CORRUPT flag will be set. If 92 * the metadata is correct but otherwise suboptimal, the PREEN flag 93 * will be set. 94 * 95 * We perform secondary validation of filesystem metadata by 96 * cross-referencing every record with all other available metadata. 97 * For example, for block mapping extents, we verify that there are no 98 * records in the free space and inode btrees corresponding to that 99 * space extent and that there is a corresponding entry in the reverse 100 * mapping btree. Inconsistent metadata is noted by setting the 101 * XCORRUPT flag; btree query function errors are noted by setting the 102 * XFAIL flag and deleting the cursor to prevent further attempts to 103 * cross-reference with a defective btree. 104 * 105 * If a piece of metadata proves corrupt or suboptimal, the userspace 106 * program can ask the kernel to apply some tender loving care (TLC) to 107 * the metadata object by setting the REPAIR flag and re-calling the 108 * scrub ioctl. "Corruption" is defined by metadata violating the 109 * on-disk specification; operations cannot continue if the violation is 110 * left untreated. It is possible for XFS to continue if an object is 111 * "suboptimal", however performance may be degraded. Repairs are 112 * usually performed by rebuilding the metadata entirely out of 113 * redundant metadata. Optimizing, on the other hand, can sometimes be 114 * done without rebuilding entire structures. 115 * 116 * Generally speaking, the repair code has the following code structure: 117 * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock. 118 * The first check helps us figure out if we need to rebuild or simply 119 * optimize the structure so that the rebuild knows what to do. The 120 * second check evaluates the completeness of the repair; that is what 121 * is reported to userspace. 122 * 123 * A quick note on symbol prefixes: 124 * - "xfs_" are general XFS symbols. 125 * - "xchk_" are symbols related to metadata checking. 126 * - "xrep_" are symbols related to metadata repair. 127 * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS. 128 */ 129 130 /* 131 * Scrub probe -- userspace uses this to probe if we're willing to scrub 132 * or repair a given mountpoint. This will be used by xfs_scrub to 133 * probe the kernel's abilities to scrub (and repair) the metadata. We 134 * do this by validating the ioctl inputs from userspace, preparing the 135 * filesystem for a scrub (or a repair) operation, and immediately 136 * returning to userspace. Userspace can use the returned errno and 137 * structure state to decide (in broad terms) if scrub/repair are 138 * supported by the running kernel. 139 */ 140 static int 141 xchk_probe( 142 struct xfs_scrub *sc) 143 { 144 int error = 0; 145 146 if (xchk_should_terminate(sc, &error)) 147 return error; 148 149 return 0; 150 } 151 152 /* Scrub setup and teardown */ 153 154 #define FSGATES_MASK (XCHK_FSGATES_ALL | XREP_FSGATES_ALL) 155 static inline void 156 xchk_fsgates_disable( 157 struct xfs_scrub *sc) 158 { 159 if (!(sc->flags & FSGATES_MASK)) 160 return; 161 162 trace_xchk_fsgates_disable(sc, sc->flags & FSGATES_MASK); 163 164 if (sc->flags & XCHK_FSGATES_DRAIN) 165 xfs_drain_wait_disable(); 166 167 if (sc->flags & XCHK_FSGATES_QUOTA) 168 xfs_dqtrx_hook_disable(); 169 170 if (sc->flags & XCHK_FSGATES_DIRENTS) 171 xfs_dir_hook_disable(); 172 173 if (sc->flags & XCHK_FSGATES_RMAP) 174 xfs_rmap_hook_disable(); 175 176 sc->flags &= ~FSGATES_MASK; 177 } 178 #undef FSGATES_MASK 179 180 /* Free the resources associated with a scrub subtype. */ 181 void 182 xchk_scrub_free_subord( 183 struct xfs_scrub_subord *sub) 184 { 185 struct xfs_scrub *sc = sub->parent_sc; 186 187 ASSERT(sc->ip == sub->sc.ip); 188 ASSERT(sc->orphanage == sub->sc.orphanage); 189 ASSERT(sc->tempip == sub->sc.tempip); 190 191 sc->sm->sm_type = sub->old_smtype; 192 sc->sm->sm_flags = sub->old_smflags | 193 (sc->sm->sm_flags & XFS_SCRUB_FLAGS_OUT); 194 sc->tp = sub->sc.tp; 195 196 if (sub->sc.buf) { 197 if (sub->sc.buf_cleanup) 198 sub->sc.buf_cleanup(sub->sc.buf); 199 kvfree(sub->sc.buf); 200 } 201 if (sub->sc.xmbtp) 202 xmbuf_free(sub->sc.xmbtp); 203 if (sub->sc.xfile) 204 xfile_destroy(sub->sc.xfile); 205 206 sc->ilock_flags = sub->sc.ilock_flags; 207 sc->orphanage_ilock_flags = sub->sc.orphanage_ilock_flags; 208 sc->temp_ilock_flags = sub->sc.temp_ilock_flags; 209 210 kfree(sub); 211 } 212 213 /* Free all the resources and finish the transactions. */ 214 STATIC int 215 xchk_teardown( 216 struct xfs_scrub *sc, 217 int error) 218 { 219 xchk_ag_free(sc, &sc->sa); 220 if (sc->tp) { 221 if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) 222 error = xfs_trans_commit(sc->tp); 223 else 224 xfs_trans_cancel(sc->tp); 225 sc->tp = NULL; 226 } 227 if (sc->ip) { 228 if (sc->ilock_flags) 229 xchk_iunlock(sc, sc->ilock_flags); 230 xchk_irele(sc, sc->ip); 231 sc->ip = NULL; 232 } 233 if (sc->flags & XCHK_HAVE_FREEZE_PROT) { 234 sc->flags &= ~XCHK_HAVE_FREEZE_PROT; 235 mnt_drop_write_file(sc->file); 236 } 237 if (sc->xmbtp) { 238 xmbuf_free(sc->xmbtp); 239 sc->xmbtp = NULL; 240 } 241 if (sc->xfile) { 242 xfile_destroy(sc->xfile); 243 sc->xfile = NULL; 244 } 245 if (sc->buf) { 246 if (sc->buf_cleanup) 247 sc->buf_cleanup(sc->buf); 248 kvfree(sc->buf); 249 sc->buf_cleanup = NULL; 250 sc->buf = NULL; 251 } 252 253 xrep_tempfile_rele(sc); 254 xrep_orphanage_rele(sc); 255 xchk_fsgates_disable(sc); 256 return error; 257 } 258 259 /* Scrubbing dispatch. */ 260 261 static const struct xchk_meta_ops meta_scrub_ops[] = { 262 [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */ 263 .type = ST_NONE, 264 .setup = xchk_setup_fs, 265 .scrub = xchk_probe, 266 .repair = xrep_probe, 267 }, 268 [XFS_SCRUB_TYPE_SB] = { /* superblock */ 269 .type = ST_PERAG, 270 .setup = xchk_setup_agheader, 271 .scrub = xchk_superblock, 272 .repair = xrep_superblock, 273 }, 274 [XFS_SCRUB_TYPE_AGF] = { /* agf */ 275 .type = ST_PERAG, 276 .setup = xchk_setup_agheader, 277 .scrub = xchk_agf, 278 .repair = xrep_agf, 279 }, 280 [XFS_SCRUB_TYPE_AGFL]= { /* agfl */ 281 .type = ST_PERAG, 282 .setup = xchk_setup_agheader, 283 .scrub = xchk_agfl, 284 .repair = xrep_agfl, 285 }, 286 [XFS_SCRUB_TYPE_AGI] = { /* agi */ 287 .type = ST_PERAG, 288 .setup = xchk_setup_agheader, 289 .scrub = xchk_agi, 290 .repair = xrep_agi, 291 }, 292 [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */ 293 .type = ST_PERAG, 294 .setup = xchk_setup_ag_allocbt, 295 .scrub = xchk_allocbt, 296 .repair = xrep_allocbt, 297 .repair_eval = xrep_revalidate_allocbt, 298 }, 299 [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */ 300 .type = ST_PERAG, 301 .setup = xchk_setup_ag_allocbt, 302 .scrub = xchk_allocbt, 303 .repair = xrep_allocbt, 304 .repair_eval = xrep_revalidate_allocbt, 305 }, 306 [XFS_SCRUB_TYPE_INOBT] = { /* inobt */ 307 .type = ST_PERAG, 308 .setup = xchk_setup_ag_iallocbt, 309 .scrub = xchk_iallocbt, 310 .repair = xrep_iallocbt, 311 .repair_eval = xrep_revalidate_iallocbt, 312 }, 313 [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */ 314 .type = ST_PERAG, 315 .setup = xchk_setup_ag_iallocbt, 316 .scrub = xchk_iallocbt, 317 .has = xfs_has_finobt, 318 .repair = xrep_iallocbt, 319 .repair_eval = xrep_revalidate_iallocbt, 320 }, 321 [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */ 322 .type = ST_PERAG, 323 .setup = xchk_setup_ag_rmapbt, 324 .scrub = xchk_rmapbt, 325 .has = xfs_has_rmapbt, 326 .repair = xrep_rmapbt, 327 }, 328 [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */ 329 .type = ST_PERAG, 330 .setup = xchk_setup_ag_refcountbt, 331 .scrub = xchk_refcountbt, 332 .has = xfs_has_reflink, 333 .repair = xrep_refcountbt, 334 }, 335 [XFS_SCRUB_TYPE_INODE] = { /* inode record */ 336 .type = ST_INODE, 337 .setup = xchk_setup_inode, 338 .scrub = xchk_inode, 339 .repair = xrep_inode, 340 }, 341 [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */ 342 .type = ST_INODE, 343 .setup = xchk_setup_inode_bmap, 344 .scrub = xchk_bmap_data, 345 .repair = xrep_bmap_data, 346 }, 347 [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */ 348 .type = ST_INODE, 349 .setup = xchk_setup_inode_bmap, 350 .scrub = xchk_bmap_attr, 351 .repair = xrep_bmap_attr, 352 }, 353 [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */ 354 .type = ST_INODE, 355 .setup = xchk_setup_inode_bmap, 356 .scrub = xchk_bmap_cow, 357 .repair = xrep_bmap_cow, 358 }, 359 [XFS_SCRUB_TYPE_DIR] = { /* directory */ 360 .type = ST_INODE, 361 .setup = xchk_setup_directory, 362 .scrub = xchk_directory, 363 .repair = xrep_directory, 364 }, 365 [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */ 366 .type = ST_INODE, 367 .setup = xchk_setup_xattr, 368 .scrub = xchk_xattr, 369 .repair = xrep_xattr, 370 }, 371 [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ 372 .type = ST_INODE, 373 .setup = xchk_setup_symlink, 374 .scrub = xchk_symlink, 375 .repair = xrep_symlink, 376 }, 377 [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */ 378 .type = ST_INODE, 379 .setup = xchk_setup_parent, 380 .scrub = xchk_parent, 381 .repair = xrep_parent, 382 }, 383 [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */ 384 .type = ST_FS, 385 .setup = xchk_setup_rtbitmap, 386 .scrub = xchk_rtbitmap, 387 .repair = xrep_rtbitmap, 388 }, 389 [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ 390 .type = ST_FS, 391 .setup = xchk_setup_rtsummary, 392 .scrub = xchk_rtsummary, 393 .repair = xrep_rtsummary, 394 }, 395 [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ 396 .type = ST_FS, 397 .setup = xchk_setup_quota, 398 .scrub = xchk_quota, 399 .repair = xrep_quota, 400 }, 401 [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */ 402 .type = ST_FS, 403 .setup = xchk_setup_quota, 404 .scrub = xchk_quota, 405 .repair = xrep_quota, 406 }, 407 [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */ 408 .type = ST_FS, 409 .setup = xchk_setup_quota, 410 .scrub = xchk_quota, 411 .repair = xrep_quota, 412 }, 413 [XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */ 414 .type = ST_FS, 415 .setup = xchk_setup_fscounters, 416 .scrub = xchk_fscounters, 417 .repair = xrep_fscounters, 418 }, 419 [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */ 420 .type = ST_FS, 421 .setup = xchk_setup_quotacheck, 422 .scrub = xchk_quotacheck, 423 .repair = xrep_quotacheck, 424 }, 425 [XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */ 426 .type = ST_FS, 427 .setup = xchk_setup_nlinks, 428 .scrub = xchk_nlinks, 429 .repair = xrep_nlinks, 430 }, 431 [XFS_SCRUB_TYPE_HEALTHY] = { /* fs healthy; clean all reminders */ 432 .type = ST_FS, 433 .setup = xchk_setup_fs, 434 .scrub = xchk_health_record, 435 .repair = xrep_notsupported, 436 }, 437 }; 438 439 static int 440 xchk_validate_inputs( 441 struct xfs_mount *mp, 442 struct xfs_scrub_metadata *sm) 443 { 444 int error; 445 const struct xchk_meta_ops *ops; 446 447 error = -EINVAL; 448 /* Check our inputs. */ 449 sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 450 if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN) 451 goto out; 452 /* sm_reserved[] must be zero */ 453 if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved))) 454 goto out; 455 456 error = -ENOENT; 457 /* Do we know about this type of metadata? */ 458 if (sm->sm_type >= XFS_SCRUB_TYPE_NR) 459 goto out; 460 ops = &meta_scrub_ops[sm->sm_type]; 461 if (ops->setup == NULL || ops->scrub == NULL) 462 goto out; 463 /* Does this fs even support this type of metadata? */ 464 if (ops->has && !ops->has(mp)) 465 goto out; 466 467 error = -EINVAL; 468 /* restricting fields must be appropriate for type */ 469 switch (ops->type) { 470 case ST_NONE: 471 case ST_FS: 472 if (sm->sm_ino || sm->sm_gen || sm->sm_agno) 473 goto out; 474 break; 475 case ST_PERAG: 476 if (sm->sm_ino || sm->sm_gen || 477 sm->sm_agno >= mp->m_sb.sb_agcount) 478 goto out; 479 break; 480 case ST_INODE: 481 if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) 482 goto out; 483 break; 484 default: 485 goto out; 486 } 487 488 /* No rebuild without repair. */ 489 if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) && 490 !(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)) 491 return -EINVAL; 492 493 /* 494 * We only want to repair read-write v5+ filesystems. Defer the check 495 * for ops->repair until after our scrub confirms that we need to 496 * perform repairs so that we avoid failing due to not supporting 497 * repairing an object that doesn't need repairs. 498 */ 499 if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { 500 error = -EOPNOTSUPP; 501 if (!xfs_has_crc(mp)) 502 goto out; 503 504 error = -EROFS; 505 if (xfs_is_readonly(mp)) 506 goto out; 507 } 508 509 error = 0; 510 out: 511 return error; 512 } 513 514 #ifdef CONFIG_XFS_ONLINE_REPAIR 515 static inline void xchk_postmortem(struct xfs_scrub *sc) 516 { 517 /* 518 * Userspace asked us to repair something, we repaired it, rescanned 519 * it, and the rescan says it's still broken. Scream about this in 520 * the system logs. 521 */ 522 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && 523 (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | 524 XFS_SCRUB_OFLAG_XCORRUPT))) 525 xrep_failure(sc->mp); 526 } 527 #else 528 static inline void xchk_postmortem(struct xfs_scrub *sc) 529 { 530 /* 531 * Userspace asked us to scrub something, it's broken, and we have no 532 * way of fixing it. Scream in the logs. 533 */ 534 if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | 535 XFS_SCRUB_OFLAG_XCORRUPT)) 536 xfs_alert_ratelimited(sc->mp, 537 "Corruption detected during scrub."); 538 } 539 #endif /* CONFIG_XFS_ONLINE_REPAIR */ 540 541 /* 542 * Create a new scrub context from an existing one, but with a different scrub 543 * type. 544 */ 545 struct xfs_scrub_subord * 546 xchk_scrub_create_subord( 547 struct xfs_scrub *sc, 548 unsigned int subtype) 549 { 550 struct xfs_scrub_subord *sub; 551 552 sub = kzalloc(sizeof(*sub), XCHK_GFP_FLAGS); 553 if (!sub) 554 return ERR_PTR(-ENOMEM); 555 556 sub->old_smtype = sc->sm->sm_type; 557 sub->old_smflags = sc->sm->sm_flags; 558 sub->parent_sc = sc; 559 memcpy(&sub->sc, sc, sizeof(struct xfs_scrub)); 560 sub->sc.ops = &meta_scrub_ops[subtype]; 561 sub->sc.sm->sm_type = subtype; 562 sub->sc.sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT; 563 sub->sc.buf = NULL; 564 sub->sc.buf_cleanup = NULL; 565 sub->sc.xfile = NULL; 566 sub->sc.xmbtp = NULL; 567 568 return sub; 569 } 570 571 /* Dispatch metadata scrubbing. */ 572 int 573 xfs_scrub_metadata( 574 struct file *file, 575 struct xfs_scrub_metadata *sm) 576 { 577 struct xchk_stats_run run = { }; 578 struct xfs_scrub *sc; 579 struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount; 580 u64 check_start; 581 int error = 0; 582 583 BUILD_BUG_ON(sizeof(meta_scrub_ops) != 584 (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR)); 585 586 trace_xchk_start(XFS_I(file_inode(file)), sm, error); 587 588 /* Forbidden if we are shut down or mounted norecovery. */ 589 error = -ESHUTDOWN; 590 if (xfs_is_shutdown(mp)) 591 goto out; 592 error = -ENOTRECOVERABLE; 593 if (xfs_has_norecovery(mp)) 594 goto out; 595 596 error = xchk_validate_inputs(mp, sm); 597 if (error) 598 goto out; 599 600 xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, 601 "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); 602 603 sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS); 604 if (!sc) { 605 error = -ENOMEM; 606 goto out; 607 } 608 609 sc->mp = mp; 610 sc->file = file; 611 sc->sm = sm; 612 sc->ops = &meta_scrub_ops[sm->sm_type]; 613 sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type); 614 retry_op: 615 /* 616 * When repairs are allowed, prevent freezing or readonly remount while 617 * scrub is running with a real transaction. 618 */ 619 if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) { 620 error = mnt_want_write_file(sc->file); 621 if (error) 622 goto out_sc; 623 624 sc->flags |= XCHK_HAVE_FREEZE_PROT; 625 } 626 627 /* Set up for the operation. */ 628 error = sc->ops->setup(sc); 629 if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER)) 630 goto try_harder; 631 if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN)) 632 goto need_drain; 633 if (error) 634 goto out_teardown; 635 636 /* Scrub for errors. */ 637 check_start = xchk_stats_now(); 638 if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL) 639 error = sc->ops->repair_eval(sc); 640 else 641 error = sc->ops->scrub(sc); 642 run.scrub_ns += xchk_stats_elapsed_ns(check_start); 643 if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER)) 644 goto try_harder; 645 if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN)) 646 goto need_drain; 647 if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)) 648 goto out_teardown; 649 650 xchk_update_health(sc); 651 652 if (xchk_could_repair(sc)) { 653 /* 654 * If userspace asked for a repair but it wasn't necessary, 655 * report that back to userspace. 656 */ 657 if (!xrep_will_attempt(sc)) { 658 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED; 659 goto out_nofix; 660 } 661 662 /* 663 * If it's broken, userspace wants us to fix it, and we haven't 664 * already tried to fix it, then attempt a repair. 665 */ 666 error = xrep_attempt(sc, &run); 667 if (error == -EAGAIN) { 668 /* 669 * Either the repair function succeeded or it couldn't 670 * get all the resources it needs; either way, we go 671 * back to the beginning and call the scrub function. 672 */ 673 error = xchk_teardown(sc, 0); 674 if (error) { 675 xrep_failure(mp); 676 goto out_sc; 677 } 678 goto retry_op; 679 } 680 } 681 682 out_nofix: 683 xchk_postmortem(sc); 684 out_teardown: 685 error = xchk_teardown(sc, error); 686 out_sc: 687 if (error != -ENOENT) 688 xchk_stats_merge(mp, sm, &run); 689 kfree(sc); 690 out: 691 trace_xchk_done(XFS_I(file_inode(file)), sm, error); 692 if (error == -EFSCORRUPTED || error == -EFSBADCRC) { 693 sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 694 error = 0; 695 } 696 return error; 697 need_drain: 698 error = xchk_teardown(sc, 0); 699 if (error) 700 goto out_sc; 701 sc->flags |= XCHK_NEED_DRAIN; 702 run.retries++; 703 goto retry_op; 704 try_harder: 705 /* 706 * Scrubbers return -EDEADLOCK to mean 'try harder'. Tear down 707 * everything we hold, then set up again with preparation for 708 * worst-case scenarios. 709 */ 710 error = xchk_teardown(sc, 0); 711 if (error) 712 goto out_sc; 713 sc->flags |= XCHK_TRY_HARDER; 714 run.retries++; 715 goto retry_op; 716 } 717