1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_quota.h" 17 #include "xfs_trans_space.h" 18 #include "xfs_dir2.h" 19 #include "xfs_icache.h" 20 #include "xfs_bmap.h" 21 #include "xfs_bmap_btree.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/repair.h" 25 #include "scrub/trace.h" 26 #include "scrub/orphanage.h" 27 #include "scrub/readdir.h" 28 29 #include <linux/namei.h> 30 31 /* 32 * The Orphanage 33 * ============= 34 * 35 * If the directory tree is damaged, children of that directory become 36 * inaccessible via that file path. If a child has no other parents, the file 37 * is said to be orphaned. xfs_repair fixes this situation by creating a 38 * orphanage directory (specifically, /lost+found) and creating a directory 39 * entry pointing to the orphaned file. 40 * 41 * Online repair follows this tactic by creating a root-owned /lost+found 42 * directory if one does not exist. If an orphan is found, it will move that 43 * files into orphanage. 44 */ 45 46 /* Make the orphanage owned by root. */ 47 STATIC int 48 xrep_chown_orphanage( 49 struct xfs_scrub *sc, 50 struct xfs_inode *dp) 51 { 52 struct xfs_trans *tp; 53 struct xfs_mount *mp = sc->mp; 54 struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL; 55 struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL; 56 struct inode *inode = VFS_I(dp); 57 int error; 58 59 error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 60 XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); 61 if (error) 62 return error; 63 64 error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp); 65 if (error) 66 goto out_dqrele; 67 68 /* 69 * Always clear setuid/setgid/sticky on the orphanage since we don't 70 * normally want that functionality on this directory and xfs_repair 71 * doesn't create it this way either. Leave the other access bits 72 * unchanged. 73 */ 74 inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX); 75 76 /* 77 * Change the ownerships and register quota modifications 78 * in the transaction. 79 */ 80 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) { 81 if (XFS_IS_UQUOTA_ON(mp)) 82 oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp); 83 inode->i_uid = GLOBAL_ROOT_UID; 84 } 85 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) { 86 if (XFS_IS_GQUOTA_ON(mp)) 87 oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp); 88 inode->i_gid = GLOBAL_ROOT_GID; 89 } 90 if (dp->i_projid != 0) { 91 if (XFS_IS_PQUOTA_ON(mp)) 92 oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp); 93 dp->i_projid = 0; 94 } 95 96 dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 97 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 98 99 XFS_STATS_INC(mp, xs_ig_attrchg); 100 101 if (xfs_has_wsync(mp)) 102 xfs_trans_set_sync(tp); 103 error = xfs_trans_commit(tp); 104 105 xfs_qm_dqrele(oldu); 106 xfs_qm_dqrele(oldg); 107 xfs_qm_dqrele(oldp); 108 109 out_dqrele: 110 xfs_qm_dqrele(udqp); 111 xfs_qm_dqrele(gdqp); 112 xfs_qm_dqrele(pdqp); 113 return error; 114 } 115 116 #define ORPHANAGE "lost+found" 117 118 /* Create the orphanage directory, and set sc->orphanage to it. */ 119 int 120 xrep_orphanage_create( 121 struct xfs_scrub *sc) 122 { 123 struct xfs_mount *mp = sc->mp; 124 struct dentry *root_dentry, *orphanage_dentry; 125 struct inode *root_inode = VFS_I(sc->mp->m_rootip); 126 struct inode *orphanage_inode; 127 int error; 128 129 if (xfs_is_shutdown(mp)) 130 return -EIO; 131 if (xfs_is_readonly(mp)) { 132 sc->orphanage = NULL; 133 return 0; 134 } 135 136 ASSERT(sc->tp == NULL); 137 ASSERT(sc->orphanage == NULL); 138 139 /* Find the dentry for the root directory... */ 140 root_dentry = d_find_alias(root_inode); 141 if (!root_dentry) { 142 error = -EFSCORRUPTED; 143 goto out; 144 } 145 146 /* ...which is a directory, right? */ 147 if (!d_is_dir(root_dentry)) { 148 error = -EFSCORRUPTED; 149 goto out_dput_root; 150 } 151 152 /* Try to find the orphanage directory. */ 153 inode_lock_nested(root_inode, I_MUTEX_PARENT); 154 orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry, 155 strlen(ORPHANAGE)); 156 if (IS_ERR(orphanage_dentry)) { 157 error = PTR_ERR(orphanage_dentry); 158 goto out_unlock_root; 159 } 160 161 /* 162 * Nothing found? Call mkdir to create the orphanage. Create the 163 * directory without other-user access because we're live and someone 164 * could have been relying partly on minimal access to a parent 165 * directory to control access to a file we put in here. 166 */ 167 if (d_really_is_negative(orphanage_dentry)) { 168 error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry, 169 0750); 170 if (error) 171 goto out_dput_orphanage; 172 } 173 174 /* Not a directory? Bail out. */ 175 if (!d_is_dir(orphanage_dentry)) { 176 error = -ENOTDIR; 177 goto out_dput_orphanage; 178 } 179 180 /* 181 * Grab a reference to the orphanage. This /should/ succeed since 182 * we hold the root directory locked and therefore nobody can delete 183 * the orphanage. 184 */ 185 orphanage_inode = igrab(d_inode(orphanage_dentry)); 186 if (!orphanage_inode) { 187 error = -ENOENT; 188 goto out_dput_orphanage; 189 } 190 191 /* Make sure the orphanage is owned by root. */ 192 error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode)); 193 if (error) 194 goto out_dput_orphanage; 195 196 /* Stash the reference for later and bail out. */ 197 sc->orphanage = XFS_I(orphanage_inode); 198 sc->orphanage_ilock_flags = 0; 199 200 out_dput_orphanage: 201 dput(orphanage_dentry); 202 out_unlock_root: 203 inode_unlock(VFS_I(sc->mp->m_rootip)); 204 out_dput_root: 205 dput(root_dentry); 206 out: 207 return error; 208 } 209 210 void 211 xrep_orphanage_ilock( 212 struct xfs_scrub *sc, 213 unsigned int ilock_flags) 214 { 215 sc->orphanage_ilock_flags |= ilock_flags; 216 xfs_ilock(sc->orphanage, ilock_flags); 217 } 218 219 bool 220 xrep_orphanage_ilock_nowait( 221 struct xfs_scrub *sc, 222 unsigned int ilock_flags) 223 { 224 if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) { 225 sc->orphanage_ilock_flags |= ilock_flags; 226 return true; 227 } 228 229 return false; 230 } 231 232 void 233 xrep_orphanage_iunlock( 234 struct xfs_scrub *sc, 235 unsigned int ilock_flags) 236 { 237 xfs_iunlock(sc->orphanage, ilock_flags); 238 sc->orphanage_ilock_flags &= ~ilock_flags; 239 } 240 241 /* Grab the IOLOCK of the orphanage and sc->ip. */ 242 int 243 xrep_orphanage_iolock_two( 244 struct xfs_scrub *sc) 245 { 246 int error = 0; 247 248 while (true) { 249 if (xchk_should_terminate(sc, &error)) 250 return error; 251 252 /* 253 * Normal XFS takes the IOLOCK before grabbing a transaction. 254 * Scrub holds a transaction, which means that we can't block 255 * on either IOLOCK. 256 */ 257 if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { 258 if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) 259 break; 260 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); 261 } 262 delay(1); 263 } 264 265 return 0; 266 } 267 268 /* Release the orphanage. */ 269 void 270 xrep_orphanage_rele( 271 struct xfs_scrub *sc) 272 { 273 if (!sc->orphanage) 274 return; 275 276 if (sc->orphanage_ilock_flags) 277 xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags); 278 279 xchk_irele(sc, sc->orphanage); 280 sc->orphanage = NULL; 281 } 282 283 /* Adoption moves a file into /lost+found */ 284 285 /* Can the orphanage adopt @sc->ip? */ 286 bool 287 xrep_orphanage_can_adopt( 288 struct xfs_scrub *sc) 289 { 290 ASSERT(sc->ip != NULL); 291 292 if (!sc->orphanage) 293 return false; 294 if (sc->ip == sc->orphanage) 295 return false; 296 if (xfs_internal_inum(sc->mp, sc->ip->i_ino)) 297 return false; 298 return true; 299 } 300 301 /* 302 * Create a new transaction to send a child to the orphanage. 303 * 304 * Allocate a new transaction with sufficient disk space to handle the 305 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the 306 * transaction, and reserve quota to reparent the latter. Caller must hold the 307 * IOLOCK of the orphanage and sc->ip. 308 */ 309 int 310 xrep_adoption_trans_alloc( 311 struct xfs_scrub *sc, 312 struct xrep_adoption *adopt) 313 { 314 struct xfs_mount *mp = sc->mp; 315 unsigned int child_blkres = 0; 316 int error; 317 318 ASSERT(sc->tp == NULL); 319 ASSERT(sc->ip != NULL); 320 ASSERT(sc->orphanage != NULL); 321 ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL); 322 ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL); 323 ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); 324 ASSERT(!(sc->orphanage_ilock_flags & 325 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); 326 327 /* Compute the worst case space reservation that we need. */ 328 adopt->sc = sc; 329 adopt->orphanage_blkres = XFS_LINK_SPACE_RES(mp, MAXNAMELEN); 330 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 331 child_blkres = XFS_RENAME_SPACE_RES(mp, xfs_name_dotdot.len); 332 adopt->child_blkres = child_blkres; 333 334 /* 335 * Allocate a transaction to link the child into the parent, along with 336 * enough disk space to handle expansion of both the orphanage and the 337 * dotdot entry of a child directory. 338 */ 339 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 340 adopt->orphanage_blkres + adopt->child_blkres, 0, 0, 341 &sc->tp); 342 if (error) 343 return error; 344 345 xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL, 346 sc->ip, XFS_ILOCK_EXCL); 347 sc->ilock_flags |= XFS_ILOCK_EXCL; 348 sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL; 349 350 xfs_trans_ijoin(sc->tp, sc->orphanage, 0); 351 xfs_trans_ijoin(sc->tp, sc->ip, 0); 352 353 /* 354 * Reserve enough quota in the orphan directory to add the new name. 355 * Normally the orphanage should have user/group/project ids of zero 356 * and hence is not subject to quota enforcement, but we're allowed to 357 * exceed quota to reattach disconnected parts of the directory tree. 358 */ 359 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage, 360 adopt->orphanage_blkres, 0, true); 361 if (error) 362 goto out_cancel; 363 364 /* 365 * Reserve enough quota in the child directory to change dotdot. 366 * Here we're also allowed to exceed file quota to repair inconsistent 367 * metadata. 368 */ 369 if (adopt->child_blkres) { 370 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip, 371 adopt->child_blkres, 0, true); 372 if (error) 373 goto out_cancel; 374 } 375 376 return 0; 377 out_cancel: 378 xchk_trans_cancel(sc); 379 xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); 380 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); 381 return error; 382 } 383 384 /* 385 * Compute the xfs_name for the directory entry that we're adding to the 386 * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not 387 * reuse namebuf until the adoption completes or is dissolved. 388 */ 389 int 390 xrep_adoption_compute_name( 391 struct xrep_adoption *adopt, 392 struct xfs_name *xname) 393 { 394 struct xfs_scrub *sc = adopt->sc; 395 char *namebuf = (void *)xname->name; 396 xfs_ino_t ino; 397 unsigned int incr = 0; 398 int error = 0; 399 400 adopt->xname = xname; 401 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino); 402 xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode); 403 404 /* Make sure the filename is unique in the lost+found. */ 405 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); 406 while (error == 0 && incr < 10000) { 407 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u", 408 sc->ip->i_ino, ++incr); 409 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); 410 } 411 if (error == 0) { 412 /* We already have 10,000 entries in the orphanage? */ 413 return -EFSCORRUPTED; 414 } 415 416 if (error != -ENOENT) 417 return error; 418 return 0; 419 } 420 421 /* 422 * Make sure the dcache does not have a positive dentry for the name we've 423 * chosen. The caller should have checked with the ondisk directory, so any 424 * discrepancy is a sign that something is seriously wrong. 425 */ 426 static int 427 xrep_adoption_check_dcache( 428 struct xrep_adoption *adopt) 429 { 430 struct qstr qname = QSTR_INIT(adopt->xname->name, 431 adopt->xname->len); 432 struct dentry *d_orphanage, *d_child; 433 int error = 0; 434 435 d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage)); 436 if (!d_orphanage) 437 return 0; 438 439 d_child = d_hash_and_lookup(d_orphanage, &qname); 440 if (d_child) { 441 trace_xrep_adoption_check_child(adopt->sc->mp, d_child); 442 443 if (d_is_positive(d_child)) { 444 ASSERT(d_is_negative(d_child)); 445 error = -EFSCORRUPTED; 446 } 447 448 dput(d_child); 449 } 450 451 dput(d_orphanage); 452 if (error) 453 return error; 454 455 /* 456 * Do we need to update d_parent of the dentry for the file being 457 * repaired? There shouldn't be a hashed dentry with a parent since 458 * the file had nonzero nlink but wasn't connected to any parent dir. 459 */ 460 d_child = d_find_alias(VFS_I(adopt->sc->ip)); 461 if (!d_child) 462 return 0; 463 464 trace_xrep_adoption_check_alias(adopt->sc->mp, d_child); 465 466 if (d_child->d_parent && !d_unhashed(d_child)) { 467 ASSERT(d_child->d_parent == NULL || d_unhashed(d_child)); 468 error = -EFSCORRUPTED; 469 } 470 471 dput(d_child); 472 return error; 473 } 474 475 /* 476 * Remove all negative dentries from the dcache. There should not be any 477 * positive entries, since we've maintained our lock on the orphanage 478 * directory. 479 */ 480 static void 481 xrep_adoption_zap_dcache( 482 struct xrep_adoption *adopt) 483 { 484 struct qstr qname = QSTR_INIT(adopt->xname->name, 485 adopt->xname->len); 486 struct dentry *d_orphanage, *d_child; 487 488 d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage)); 489 if (!d_orphanage) 490 return; 491 492 d_child = d_hash_and_lookup(d_orphanage, &qname); 493 while (d_child != NULL) { 494 trace_xrep_adoption_invalidate_child(adopt->sc->mp, d_child); 495 496 ASSERT(d_is_negative(d_child)); 497 d_invalidate(d_child); 498 dput(d_child); 499 d_child = d_lookup(d_orphanage, &qname); 500 } 501 502 dput(d_orphanage); 503 } 504 505 /* 506 * Move the current file to the orphanage under the computed name. 507 * 508 * Returns with a dirty transaction so that the caller can handle any other 509 * work, such as fixing up unlinked lists or resetting link counts. 510 */ 511 int 512 xrep_adoption_move( 513 struct xrep_adoption *adopt) 514 { 515 struct xfs_scrub *sc = adopt->sc; 516 bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode); 517 int error; 518 519 trace_xrep_adoption_reparent(sc->orphanage, adopt->xname, 520 sc->ip->i_ino); 521 522 error = xrep_adoption_check_dcache(adopt); 523 if (error) 524 return error; 525 526 /* Create the new name in the orphanage. */ 527 error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname, 528 sc->ip->i_ino, adopt->orphanage_blkres); 529 if (error) 530 return error; 531 532 /* 533 * Bump the link count of the orphanage if we just added a 534 * subdirectory, and update its timestamps. 535 */ 536 xfs_trans_ichgtime(sc->tp, sc->orphanage, 537 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 538 if (isdir) 539 xfs_bumplink(sc->tp, sc->orphanage); 540 xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE); 541 542 /* Replace the dotdot entry if the child is a subdirectory. */ 543 if (isdir) { 544 error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, 545 sc->orphanage->i_ino, adopt->child_blkres); 546 if (error) 547 return error; 548 } 549 550 /* 551 * Notify dirent hooks that we moved the file to /lost+found, and 552 * finish all the deferred work so that we know the adoption is fully 553 * recorded in the log. 554 */ 555 xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname); 556 557 /* Remove negative dentries from the lost+found's dcache */ 558 xrep_adoption_zap_dcache(adopt); 559 return 0; 560 } 561 562 /* 563 * Roll to a clean scrub transaction so that we can release the orphanage, 564 * even if xrep_adoption_move was not called. 565 * 566 * Commits all the work and deferred ops attached to an adoption request and 567 * rolls to a clean scrub transaction. On success, returns 0 with the scrub 568 * context holding a clean transaction with no inodes joined. On failure, 569 * returns negative errno with no scrub transaction. All inode locks are 570 * still held after this function returns. 571 */ 572 int 573 xrep_adoption_trans_roll( 574 struct xrep_adoption *adopt) 575 { 576 struct xfs_scrub *sc = adopt->sc; 577 int error; 578 579 trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip, 580 !!(sc->tp->t_flags & XFS_TRANS_DIRTY)); 581 582 /* Finish all the deferred ops to commit all repairs. */ 583 error = xrep_defer_finish(sc); 584 if (error) 585 return error; 586 587 /* Roll the transaction once more to detach the inodes. */ 588 return xfs_trans_roll(&sc->tp); 589 } 590