1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_quota.h" 17 #include "xfs_trans_space.h" 18 #include "xfs_dir2.h" 19 #include "xfs_icache.h" 20 #include "xfs_bmap.h" 21 #include "xfs_bmap_btree.h" 22 #include "xfs_parent.h" 23 #include "xfs_attr_sf.h" 24 #include "scrub/scrub.h" 25 #include "scrub/common.h" 26 #include "scrub/repair.h" 27 #include "scrub/trace.h" 28 #include "scrub/orphanage.h" 29 #include "scrub/readdir.h" 30 31 #include <linux/namei.h> 32 33 /* 34 * The Orphanage 35 * ============= 36 * 37 * If the directory tree is damaged, children of that directory become 38 * inaccessible via that file path. If a child has no other parents, the file 39 * is said to be orphaned. xfs_repair fixes this situation by creating a 40 * orphanage directory (specifically, /lost+found) and creating a directory 41 * entry pointing to the orphaned file. 42 * 43 * Online repair follows this tactic by creating a root-owned /lost+found 44 * directory if one does not exist. If an orphan is found, it will move that 45 * files into orphanage. 46 */ 47 48 /* Make the orphanage owned by root. */ 49 STATIC int 50 xrep_chown_orphanage( 51 struct xfs_scrub *sc, 52 struct xfs_inode *dp) 53 { 54 struct xfs_trans *tp; 55 struct xfs_mount *mp = sc->mp; 56 struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL; 57 struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL; 58 struct inode *inode = VFS_I(dp); 59 int error; 60 61 error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 62 XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); 63 if (error) 64 return error; 65 66 error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp); 67 if (error) 68 goto out_dqrele; 69 70 /* 71 * Always clear setuid/setgid/sticky on the orphanage since we don't 72 * normally want that functionality on this directory and xfs_repair 73 * doesn't create it this way either. Leave the other access bits 74 * unchanged. 75 */ 76 inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX); 77 78 /* 79 * Change the ownerships and register quota modifications 80 * in the transaction. 81 */ 82 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) { 83 if (XFS_IS_UQUOTA_ON(mp)) 84 oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp); 85 inode->i_uid = GLOBAL_ROOT_UID; 86 } 87 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) { 88 if (XFS_IS_GQUOTA_ON(mp)) 89 oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp); 90 inode->i_gid = GLOBAL_ROOT_GID; 91 } 92 if (dp->i_projid != 0) { 93 if (XFS_IS_PQUOTA_ON(mp)) 94 oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp); 95 dp->i_projid = 0; 96 } 97 98 dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 99 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 100 101 XFS_STATS_INC(mp, xs_ig_attrchg); 102 103 if (xfs_has_wsync(mp)) 104 xfs_trans_set_sync(tp); 105 error = xfs_trans_commit(tp); 106 107 xfs_qm_dqrele(oldu); 108 xfs_qm_dqrele(oldg); 109 xfs_qm_dqrele(oldp); 110 111 out_dqrele: 112 xfs_qm_dqrele(udqp); 113 xfs_qm_dqrele(gdqp); 114 xfs_qm_dqrele(pdqp); 115 return error; 116 } 117 118 #define ORPHANAGE "lost+found" 119 120 /* Create the orphanage directory, and set sc->orphanage to it. */ 121 int 122 xrep_orphanage_create( 123 struct xfs_scrub *sc) 124 { 125 struct xfs_mount *mp = sc->mp; 126 struct dentry *root_dentry, *orphanage_dentry; 127 struct inode *root_inode = VFS_I(sc->mp->m_rootip); 128 struct inode *orphanage_inode; 129 int error; 130 131 if (xfs_is_shutdown(mp)) 132 return -EIO; 133 if (xfs_is_readonly(mp)) { 134 sc->orphanage = NULL; 135 return 0; 136 } 137 138 ASSERT(sc->tp == NULL); 139 ASSERT(sc->orphanage == NULL); 140 141 /* Find the dentry for the root directory... */ 142 root_dentry = d_find_alias(root_inode); 143 if (!root_dentry) { 144 error = -EFSCORRUPTED; 145 goto out; 146 } 147 148 /* ...which is a directory, right? */ 149 if (!d_is_dir(root_dentry)) { 150 error = -EFSCORRUPTED; 151 goto out_dput_root; 152 } 153 154 /* Try to find the orphanage directory. */ 155 inode_lock_nested(root_inode, I_MUTEX_PARENT); 156 orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry, 157 strlen(ORPHANAGE)); 158 if (IS_ERR(orphanage_dentry)) { 159 error = PTR_ERR(orphanage_dentry); 160 goto out_unlock_root; 161 } 162 163 /* 164 * Nothing found? Call mkdir to create the orphanage. Create the 165 * directory without other-user access because we're live and someone 166 * could have been relying partly on minimal access to a parent 167 * directory to control access to a file we put in here. 168 */ 169 if (d_really_is_negative(orphanage_dentry)) { 170 error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry, 171 0750); 172 if (error) 173 goto out_dput_orphanage; 174 } 175 176 /* Not a directory? Bail out. */ 177 if (!d_is_dir(orphanage_dentry)) { 178 error = -ENOTDIR; 179 goto out_dput_orphanage; 180 } 181 182 /* 183 * Grab a reference to the orphanage. This /should/ succeed since 184 * we hold the root directory locked and therefore nobody can delete 185 * the orphanage. 186 */ 187 orphanage_inode = igrab(d_inode(orphanage_dentry)); 188 if (!orphanage_inode) { 189 error = -ENOENT; 190 goto out_dput_orphanage; 191 } 192 193 /* Make sure the orphanage is owned by root. */ 194 error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode)); 195 if (error) 196 goto out_dput_orphanage; 197 198 /* Stash the reference for later and bail out. */ 199 sc->orphanage = XFS_I(orphanage_inode); 200 sc->orphanage_ilock_flags = 0; 201 202 out_dput_orphanage: 203 dput(orphanage_dentry); 204 out_unlock_root: 205 inode_unlock(VFS_I(sc->mp->m_rootip)); 206 out_dput_root: 207 dput(root_dentry); 208 out: 209 return error; 210 } 211 212 void 213 xrep_orphanage_ilock( 214 struct xfs_scrub *sc, 215 unsigned int ilock_flags) 216 { 217 sc->orphanage_ilock_flags |= ilock_flags; 218 xfs_ilock(sc->orphanage, ilock_flags); 219 } 220 221 bool 222 xrep_orphanage_ilock_nowait( 223 struct xfs_scrub *sc, 224 unsigned int ilock_flags) 225 { 226 if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) { 227 sc->orphanage_ilock_flags |= ilock_flags; 228 return true; 229 } 230 231 return false; 232 } 233 234 void 235 xrep_orphanage_iunlock( 236 struct xfs_scrub *sc, 237 unsigned int ilock_flags) 238 { 239 xfs_iunlock(sc->orphanage, ilock_flags); 240 sc->orphanage_ilock_flags &= ~ilock_flags; 241 } 242 243 /* Grab the IOLOCK of the orphanage and sc->ip. */ 244 int 245 xrep_orphanage_iolock_two( 246 struct xfs_scrub *sc) 247 { 248 int error = 0; 249 250 while (true) { 251 if (xchk_should_terminate(sc, &error)) 252 return error; 253 254 /* 255 * Normal XFS takes the IOLOCK before grabbing a transaction. 256 * Scrub holds a transaction, which means that we can't block 257 * on either IOLOCK. 258 */ 259 if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { 260 if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) 261 break; 262 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); 263 } 264 delay(1); 265 } 266 267 return 0; 268 } 269 270 /* Release the orphanage. */ 271 void 272 xrep_orphanage_rele( 273 struct xfs_scrub *sc) 274 { 275 if (!sc->orphanage) 276 return; 277 278 if (sc->orphanage_ilock_flags) 279 xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags); 280 281 xchk_irele(sc, sc->orphanage); 282 sc->orphanage = NULL; 283 } 284 285 /* Adoption moves a file into /lost+found */ 286 287 /* Can the orphanage adopt @sc->ip? */ 288 bool 289 xrep_orphanage_can_adopt( 290 struct xfs_scrub *sc) 291 { 292 ASSERT(sc->ip != NULL); 293 294 if (!sc->orphanage) 295 return false; 296 if (sc->ip == sc->orphanage) 297 return false; 298 if (xfs_internal_inum(sc->mp, sc->ip->i_ino)) 299 return false; 300 return true; 301 } 302 303 /* 304 * Create a new transaction to send a child to the orphanage. 305 * 306 * Allocate a new transaction with sufficient disk space to handle the 307 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the 308 * transaction, and reserve quota to reparent the latter. Caller must hold the 309 * IOLOCK of the orphanage and sc->ip. 310 */ 311 int 312 xrep_adoption_trans_alloc( 313 struct xfs_scrub *sc, 314 struct xrep_adoption *adopt) 315 { 316 struct xfs_mount *mp = sc->mp; 317 unsigned int child_blkres = 0; 318 int error; 319 320 ASSERT(sc->tp == NULL); 321 ASSERT(sc->ip != NULL); 322 ASSERT(sc->orphanage != NULL); 323 ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL); 324 ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL); 325 ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); 326 ASSERT(!(sc->orphanage_ilock_flags & 327 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); 328 329 /* Compute the worst case space reservation that we need. */ 330 adopt->sc = sc; 331 adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN); 332 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 333 child_blkres = xfs_rename_space_res(mp, 0, false, 334 xfs_name_dotdot.len, false); 335 if (xfs_has_parent(mp)) 336 child_blkres += XFS_ADDAFORK_SPACE_RES(mp); 337 adopt->child_blkres = child_blkres; 338 339 /* 340 * Allocate a transaction to link the child into the parent, along with 341 * enough disk space to handle expansion of both the orphanage and the 342 * dotdot entry of a child directory. 343 */ 344 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 345 adopt->orphanage_blkres + adopt->child_blkres, 0, 0, 346 &sc->tp); 347 if (error) 348 return error; 349 350 xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL, 351 sc->ip, XFS_ILOCK_EXCL); 352 sc->ilock_flags |= XFS_ILOCK_EXCL; 353 sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL; 354 355 xfs_trans_ijoin(sc->tp, sc->orphanage, 0); 356 xfs_trans_ijoin(sc->tp, sc->ip, 0); 357 358 /* 359 * Reserve enough quota in the orphan directory to add the new name. 360 * Normally the orphanage should have user/group/project ids of zero 361 * and hence is not subject to quota enforcement, but we're allowed to 362 * exceed quota to reattach disconnected parts of the directory tree. 363 */ 364 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage, 365 adopt->orphanage_blkres, 0, true); 366 if (error) 367 goto out_cancel; 368 369 /* 370 * Reserve enough quota in the child directory to change dotdot. 371 * Here we're also allowed to exceed file quota to repair inconsistent 372 * metadata. 373 */ 374 if (adopt->child_blkres) { 375 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip, 376 adopt->child_blkres, 0, true); 377 if (error) 378 goto out_cancel; 379 } 380 381 return 0; 382 out_cancel: 383 xchk_trans_cancel(sc); 384 xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); 385 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); 386 return error; 387 } 388 389 /* 390 * Compute the xfs_name for the directory entry that we're adding to the 391 * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not 392 * reuse namebuf until the adoption completes or is dissolved. 393 */ 394 int 395 xrep_adoption_compute_name( 396 struct xrep_adoption *adopt, 397 struct xfs_name *xname) 398 { 399 struct xfs_scrub *sc = adopt->sc; 400 char *namebuf = (void *)xname->name; 401 xfs_ino_t ino; 402 unsigned int incr = 0; 403 int error = 0; 404 405 adopt->xname = xname; 406 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino); 407 xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode); 408 409 /* Make sure the filename is unique in the lost+found. */ 410 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); 411 while (error == 0 && incr < 10000) { 412 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u", 413 sc->ip->i_ino, ++incr); 414 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); 415 } 416 if (error == 0) { 417 /* We already have 10,000 entries in the orphanage? */ 418 return -EFSCORRUPTED; 419 } 420 421 if (error != -ENOENT) 422 return error; 423 return 0; 424 } 425 426 /* 427 * Make sure the dcache does not have a positive dentry for the name we've 428 * chosen. The caller should have checked with the ondisk directory, so any 429 * discrepancy is a sign that something is seriously wrong. 430 */ 431 static int 432 xrep_adoption_check_dcache( 433 struct xrep_adoption *adopt) 434 { 435 struct qstr qname = QSTR_INIT(adopt->xname->name, 436 adopt->xname->len); 437 struct dentry *d_orphanage, *d_child; 438 int error = 0; 439 440 d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage)); 441 if (!d_orphanage) 442 return 0; 443 444 d_child = d_hash_and_lookup(d_orphanage, &qname); 445 if (d_child) { 446 trace_xrep_adoption_check_child(adopt->sc->mp, d_child); 447 448 if (d_is_positive(d_child)) { 449 ASSERT(d_is_negative(d_child)); 450 error = -EFSCORRUPTED; 451 } 452 453 dput(d_child); 454 } 455 456 dput(d_orphanage); 457 if (error) 458 return error; 459 460 /* 461 * Do we need to update d_parent of the dentry for the file being 462 * repaired? There shouldn't be a hashed dentry with a parent since 463 * the file had nonzero nlink but wasn't connected to any parent dir. 464 */ 465 d_child = d_find_alias(VFS_I(adopt->sc->ip)); 466 if (!d_child) 467 return 0; 468 469 trace_xrep_adoption_check_alias(adopt->sc->mp, d_child); 470 471 if (d_child->d_parent && !d_unhashed(d_child)) { 472 ASSERT(d_child->d_parent == NULL || d_unhashed(d_child)); 473 error = -EFSCORRUPTED; 474 } 475 476 dput(d_child); 477 return error; 478 } 479 480 /* 481 * Remove all negative dentries from the dcache. There should not be any 482 * positive entries, since we've maintained our lock on the orphanage 483 * directory. 484 */ 485 static void 486 xrep_adoption_zap_dcache( 487 struct xrep_adoption *adopt) 488 { 489 struct qstr qname = QSTR_INIT(adopt->xname->name, 490 adopt->xname->len); 491 struct dentry *d_orphanage, *d_child; 492 493 d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage)); 494 if (!d_orphanage) 495 return; 496 497 d_child = d_hash_and_lookup(d_orphanage, &qname); 498 while (d_child != NULL) { 499 trace_xrep_adoption_invalidate_child(adopt->sc->mp, d_child); 500 501 ASSERT(d_is_negative(d_child)); 502 d_invalidate(d_child); 503 dput(d_child); 504 d_child = d_lookup(d_orphanage, &qname); 505 } 506 507 dput(d_orphanage); 508 } 509 510 /* 511 * If we have to add an attr fork ahead of a parent pointer update, how much 512 * space should we ask for? 513 */ 514 static inline int 515 xrep_adoption_attr_sizeof( 516 const struct xrep_adoption *adopt) 517 { 518 return sizeof(struct xfs_attr_sf_hdr) + 519 xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec), 520 adopt->xname->len); 521 } 522 523 /* 524 * Move the current file to the orphanage under the computed name. 525 * 526 * Returns with a dirty transaction so that the caller can handle any other 527 * work, such as fixing up unlinked lists or resetting link counts. 528 */ 529 int 530 xrep_adoption_move( 531 struct xrep_adoption *adopt) 532 { 533 struct xfs_scrub *sc = adopt->sc; 534 bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode); 535 int error; 536 537 trace_xrep_adoption_reparent(sc->orphanage, adopt->xname, 538 sc->ip->i_ino); 539 540 error = xrep_adoption_check_dcache(adopt); 541 if (error) 542 return error; 543 544 /* 545 * If this filesystem has parent pointers, ensure that the file being 546 * moved to the orphanage has an attribute fork. This is required 547 * because the parent pointer code does not itself add attr forks. 548 */ 549 if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) { 550 int sf_size = xrep_adoption_attr_sizeof(adopt); 551 552 error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true); 553 if (error) 554 return error; 555 } 556 557 /* Create the new name in the orphanage. */ 558 error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname, 559 sc->ip->i_ino, adopt->orphanage_blkres); 560 if (error) 561 return error; 562 563 /* 564 * Bump the link count of the orphanage if we just added a 565 * subdirectory, and update its timestamps. 566 */ 567 xfs_trans_ichgtime(sc->tp, sc->orphanage, 568 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 569 if (isdir) 570 xfs_bumplink(sc->tp, sc->orphanage); 571 xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE); 572 573 /* Bump the link count of the child. */ 574 if (adopt->bump_child_nlink) { 575 xfs_bumplink(sc->tp, sc->ip); 576 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 577 } 578 579 /* Replace the dotdot entry if the child is a subdirectory. */ 580 if (isdir) { 581 error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, 582 sc->orphanage->i_ino, adopt->child_blkres); 583 if (error) 584 return error; 585 } 586 587 /* Add a parent pointer from the file back to the lost+found. */ 588 if (xfs_has_parent(sc->mp)) { 589 error = xfs_parent_addname(sc->tp, &adopt->ppargs, 590 sc->orphanage, adopt->xname, sc->ip); 591 if (error) 592 return error; 593 } 594 595 /* 596 * Notify dirent hooks that we moved the file to /lost+found, and 597 * finish all the deferred work so that we know the adoption is fully 598 * recorded in the log. 599 */ 600 xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname); 601 602 /* Remove negative dentries from the lost+found's dcache */ 603 xrep_adoption_zap_dcache(adopt); 604 return 0; 605 } 606 607 /* 608 * Roll to a clean scrub transaction so that we can release the orphanage, 609 * even if xrep_adoption_move was not called. 610 * 611 * Commits all the work and deferred ops attached to an adoption request and 612 * rolls to a clean scrub transaction. On success, returns 0 with the scrub 613 * context holding a clean transaction with no inodes joined. On failure, 614 * returns negative errno with no scrub transaction. All inode locks are 615 * still held after this function returns. 616 */ 617 int 618 xrep_adoption_trans_roll( 619 struct xrep_adoption *adopt) 620 { 621 struct xfs_scrub *sc = adopt->sc; 622 int error; 623 624 trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip, 625 !!(sc->tp->t_flags & XFS_TRANS_DIRTY)); 626 627 /* Finish all the deferred ops to commit all repairs. */ 628 error = xrep_defer_finish(sc); 629 if (error) 630 return error; 631 632 /* Roll the transaction once more to detach the inodes. */ 633 return xfs_trans_roll(&sc->tp); 634 } 635