1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_quota.h" 17 #include "xfs_trans_space.h" 18 #include "xfs_dir2.h" 19 #include "xfs_icache.h" 20 #include "xfs_bmap.h" 21 #include "xfs_bmap_btree.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/repair.h" 25 #include "scrub/trace.h" 26 #include "scrub/orphanage.h" 27 #include "scrub/readdir.h" 28 29 #include <linux/namei.h> 30 31 /* 32 * The Orphanage 33 * ============= 34 * 35 * If the directory tree is damaged, children of that directory become 36 * inaccessible via that file path. If a child has no other parents, the file 37 * is said to be orphaned. xfs_repair fixes this situation by creating a 38 * orphanage directory (specifically, /lost+found) and creating a directory 39 * entry pointing to the orphaned file. 40 * 41 * Online repair follows this tactic by creating a root-owned /lost+found 42 * directory if one does not exist. If an orphan is found, it will move that 43 * files into orphanage. 44 */ 45 46 /* Make the orphanage owned by root. */ 47 STATIC int 48 xrep_chown_orphanage( 49 struct xfs_scrub *sc, 50 struct xfs_inode *dp) 51 { 52 struct xfs_trans *tp; 53 struct xfs_mount *mp = sc->mp; 54 struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL; 55 struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL; 56 struct inode *inode = VFS_I(dp); 57 int error; 58 59 error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, 60 XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); 61 if (error) 62 return error; 63 64 error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp); 65 if (error) 66 goto out_dqrele; 67 68 /* 69 * Always clear setuid/setgid/sticky on the orphanage since we don't 70 * normally want that functionality on this directory and xfs_repair 71 * doesn't create it this way either. Leave the other access bits 72 * unchanged. 73 */ 74 inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX); 75 76 /* 77 * Change the ownerships and register quota modifications 78 * in the transaction. 79 */ 80 if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) { 81 if (XFS_IS_UQUOTA_ON(mp)) 82 oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp); 83 inode->i_uid = GLOBAL_ROOT_UID; 84 } 85 if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) { 86 if (XFS_IS_GQUOTA_ON(mp)) 87 oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp); 88 inode->i_gid = GLOBAL_ROOT_GID; 89 } 90 if (dp->i_projid != 0) { 91 if (XFS_IS_PQUOTA_ON(mp)) 92 oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp); 93 dp->i_projid = 0; 94 } 95 96 dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 97 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 98 99 XFS_STATS_INC(mp, xs_ig_attrchg); 100 101 if (xfs_has_wsync(mp)) 102 xfs_trans_set_sync(tp); 103 error = xfs_trans_commit(tp); 104 105 xfs_qm_dqrele(oldu); 106 xfs_qm_dqrele(oldg); 107 xfs_qm_dqrele(oldp); 108 109 out_dqrele: 110 xfs_qm_dqrele(udqp); 111 xfs_qm_dqrele(gdqp); 112 xfs_qm_dqrele(pdqp); 113 return error; 114 } 115 116 #define ORPHANAGE "lost+found" 117 118 /* Create the orphanage directory, and set sc->orphanage to it. */ 119 int 120 xrep_orphanage_create( 121 struct xfs_scrub *sc) 122 { 123 struct xfs_mount *mp = sc->mp; 124 struct dentry *root_dentry, *orphanage_dentry; 125 struct inode *root_inode = VFS_I(sc->mp->m_rootip); 126 struct inode *orphanage_inode; 127 int error; 128 129 if (xfs_is_shutdown(mp)) 130 return -EIO; 131 if (xfs_is_readonly(mp)) { 132 sc->orphanage = NULL; 133 return 0; 134 } 135 136 ASSERT(sc->tp == NULL); 137 ASSERT(sc->orphanage == NULL); 138 139 /* Find the dentry for the root directory... */ 140 root_dentry = d_find_alias(root_inode); 141 if (!root_dentry) { 142 error = -EFSCORRUPTED; 143 goto out; 144 } 145 146 /* ...which is a directory, right? */ 147 if (!d_is_dir(root_dentry)) { 148 error = -EFSCORRUPTED; 149 goto out_dput_root; 150 } 151 152 /* Try to find the orphanage directory. */ 153 inode_lock_nested(root_inode, I_MUTEX_PARENT); 154 orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry, 155 strlen(ORPHANAGE)); 156 if (IS_ERR(orphanage_dentry)) { 157 error = PTR_ERR(orphanage_dentry); 158 goto out_unlock_root; 159 } 160 161 /* 162 * Nothing found? Call mkdir to create the orphanage. Create the 163 * directory without other-user access because we're live and someone 164 * could have been relying partly on minimal access to a parent 165 * directory to control access to a file we put in here. 166 */ 167 if (d_really_is_negative(orphanage_dentry)) { 168 error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry, 169 0750); 170 if (error) 171 goto out_dput_orphanage; 172 } 173 174 /* Not a directory? Bail out. */ 175 if (!d_is_dir(orphanage_dentry)) { 176 error = -ENOTDIR; 177 goto out_dput_orphanage; 178 } 179 180 /* 181 * Grab a reference to the orphanage. This /should/ succeed since 182 * we hold the root directory locked and therefore nobody can delete 183 * the orphanage. 184 */ 185 orphanage_inode = igrab(d_inode(orphanage_dentry)); 186 if (!orphanage_inode) { 187 error = -ENOENT; 188 goto out_dput_orphanage; 189 } 190 191 /* Make sure the orphanage is owned by root. */ 192 error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode)); 193 if (error) 194 goto out_dput_orphanage; 195 196 /* Stash the reference for later and bail out. */ 197 sc->orphanage = XFS_I(orphanage_inode); 198 sc->orphanage_ilock_flags = 0; 199 200 out_dput_orphanage: 201 dput(orphanage_dentry); 202 out_unlock_root: 203 inode_unlock(VFS_I(sc->mp->m_rootip)); 204 out_dput_root: 205 dput(root_dentry); 206 out: 207 return error; 208 } 209 210 void 211 xrep_orphanage_ilock( 212 struct xfs_scrub *sc, 213 unsigned int ilock_flags) 214 { 215 sc->orphanage_ilock_flags |= ilock_flags; 216 xfs_ilock(sc->orphanage, ilock_flags); 217 } 218 219 bool 220 xrep_orphanage_ilock_nowait( 221 struct xfs_scrub *sc, 222 unsigned int ilock_flags) 223 { 224 if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) { 225 sc->orphanage_ilock_flags |= ilock_flags; 226 return true; 227 } 228 229 return false; 230 } 231 232 void 233 xrep_orphanage_iunlock( 234 struct xfs_scrub *sc, 235 unsigned int ilock_flags) 236 { 237 xfs_iunlock(sc->orphanage, ilock_flags); 238 sc->orphanage_ilock_flags &= ~ilock_flags; 239 } 240 241 /* Grab the IOLOCK of the orphanage and sc->ip. */ 242 int 243 xrep_orphanage_iolock_two( 244 struct xfs_scrub *sc) 245 { 246 int error = 0; 247 248 while (true) { 249 if (xchk_should_terminate(sc, &error)) 250 return error; 251 252 /* 253 * Normal XFS takes the IOLOCK before grabbing a transaction. 254 * Scrub holds a transaction, which means that we can't block 255 * on either IOLOCK. 256 */ 257 if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { 258 if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) 259 break; 260 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); 261 } 262 delay(1); 263 } 264 265 return 0; 266 } 267 268 /* Release the orphanage. */ 269 void 270 xrep_orphanage_rele( 271 struct xfs_scrub *sc) 272 { 273 if (!sc->orphanage) 274 return; 275 276 if (sc->orphanage_ilock_flags) 277 xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags); 278 279 xchk_irele(sc, sc->orphanage); 280 sc->orphanage = NULL; 281 } 282 283 /* Adoption moves a file into /lost+found */ 284 285 /* Can the orphanage adopt @sc->ip? */ 286 bool 287 xrep_orphanage_can_adopt( 288 struct xfs_scrub *sc) 289 { 290 ASSERT(sc->ip != NULL); 291 292 if (!sc->orphanage) 293 return false; 294 if (sc->ip == sc->orphanage) 295 return false; 296 if (xfs_internal_inum(sc->mp, sc->ip->i_ino)) 297 return false; 298 return true; 299 } 300 301 /* 302 * Create a new transaction to send a child to the orphanage. 303 * 304 * Allocate a new transaction with sufficient disk space to handle the 305 * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the 306 * transaction, and reserve quota to reparent the latter. Caller must hold the 307 * IOLOCK of the orphanage and sc->ip. 308 */ 309 int 310 xrep_adoption_trans_alloc( 311 struct xfs_scrub *sc, 312 struct xrep_adoption *adopt) 313 { 314 struct xfs_mount *mp = sc->mp; 315 unsigned int child_blkres = 0; 316 int error; 317 318 ASSERT(sc->tp == NULL); 319 ASSERT(sc->ip != NULL); 320 ASSERT(sc->orphanage != NULL); 321 ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL); 322 ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL); 323 ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); 324 ASSERT(!(sc->orphanage_ilock_flags & 325 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); 326 327 /* Compute the worst case space reservation that we need. */ 328 adopt->sc = sc; 329 adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN); 330 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 331 child_blkres = xfs_rename_space_res(mp, 0, false, 332 xfs_name_dotdot.len, false); 333 adopt->child_blkres = child_blkres; 334 335 /* 336 * Allocate a transaction to link the child into the parent, along with 337 * enough disk space to handle expansion of both the orphanage and the 338 * dotdot entry of a child directory. 339 */ 340 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 341 adopt->orphanage_blkres + adopt->child_blkres, 0, 0, 342 &sc->tp); 343 if (error) 344 return error; 345 346 xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL, 347 sc->ip, XFS_ILOCK_EXCL); 348 sc->ilock_flags |= XFS_ILOCK_EXCL; 349 sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL; 350 351 xfs_trans_ijoin(sc->tp, sc->orphanage, 0); 352 xfs_trans_ijoin(sc->tp, sc->ip, 0); 353 354 /* 355 * Reserve enough quota in the orphan directory to add the new name. 356 * Normally the orphanage should have user/group/project ids of zero 357 * and hence is not subject to quota enforcement, but we're allowed to 358 * exceed quota to reattach disconnected parts of the directory tree. 359 */ 360 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage, 361 adopt->orphanage_blkres, 0, true); 362 if (error) 363 goto out_cancel; 364 365 /* 366 * Reserve enough quota in the child directory to change dotdot. 367 * Here we're also allowed to exceed file quota to repair inconsistent 368 * metadata. 369 */ 370 if (adopt->child_blkres) { 371 error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip, 372 adopt->child_blkres, 0, true); 373 if (error) 374 goto out_cancel; 375 } 376 377 return 0; 378 out_cancel: 379 xchk_trans_cancel(sc); 380 xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); 381 xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); 382 return error; 383 } 384 385 /* 386 * Compute the xfs_name for the directory entry that we're adding to the 387 * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not 388 * reuse namebuf until the adoption completes or is dissolved. 389 */ 390 int 391 xrep_adoption_compute_name( 392 struct xrep_adoption *adopt, 393 struct xfs_name *xname) 394 { 395 struct xfs_scrub *sc = adopt->sc; 396 char *namebuf = (void *)xname->name; 397 xfs_ino_t ino; 398 unsigned int incr = 0; 399 int error = 0; 400 401 adopt->xname = xname; 402 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino); 403 xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode); 404 405 /* Make sure the filename is unique in the lost+found. */ 406 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); 407 while (error == 0 && incr < 10000) { 408 xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u", 409 sc->ip->i_ino, ++incr); 410 error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); 411 } 412 if (error == 0) { 413 /* We already have 10,000 entries in the orphanage? */ 414 return -EFSCORRUPTED; 415 } 416 417 if (error != -ENOENT) 418 return error; 419 return 0; 420 } 421 422 /* 423 * Make sure the dcache does not have a positive dentry for the name we've 424 * chosen. The caller should have checked with the ondisk directory, so any 425 * discrepancy is a sign that something is seriously wrong. 426 */ 427 static int 428 xrep_adoption_check_dcache( 429 struct xrep_adoption *adopt) 430 { 431 struct qstr qname = QSTR_INIT(adopt->xname->name, 432 adopt->xname->len); 433 struct dentry *d_orphanage, *d_child; 434 int error = 0; 435 436 d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage)); 437 if (!d_orphanage) 438 return 0; 439 440 d_child = d_hash_and_lookup(d_orphanage, &qname); 441 if (d_child) { 442 trace_xrep_adoption_check_child(adopt->sc->mp, d_child); 443 444 if (d_is_positive(d_child)) { 445 ASSERT(d_is_negative(d_child)); 446 error = -EFSCORRUPTED; 447 } 448 449 dput(d_child); 450 } 451 452 dput(d_orphanage); 453 if (error) 454 return error; 455 456 /* 457 * Do we need to update d_parent of the dentry for the file being 458 * repaired? There shouldn't be a hashed dentry with a parent since 459 * the file had nonzero nlink but wasn't connected to any parent dir. 460 */ 461 d_child = d_find_alias(VFS_I(adopt->sc->ip)); 462 if (!d_child) 463 return 0; 464 465 trace_xrep_adoption_check_alias(adopt->sc->mp, d_child); 466 467 if (d_child->d_parent && !d_unhashed(d_child)) { 468 ASSERT(d_child->d_parent == NULL || d_unhashed(d_child)); 469 error = -EFSCORRUPTED; 470 } 471 472 dput(d_child); 473 return error; 474 } 475 476 /* 477 * Remove all negative dentries from the dcache. There should not be any 478 * positive entries, since we've maintained our lock on the orphanage 479 * directory. 480 */ 481 static void 482 xrep_adoption_zap_dcache( 483 struct xrep_adoption *adopt) 484 { 485 struct qstr qname = QSTR_INIT(adopt->xname->name, 486 adopt->xname->len); 487 struct dentry *d_orphanage, *d_child; 488 489 d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage)); 490 if (!d_orphanage) 491 return; 492 493 d_child = d_hash_and_lookup(d_orphanage, &qname); 494 while (d_child != NULL) { 495 trace_xrep_adoption_invalidate_child(adopt->sc->mp, d_child); 496 497 ASSERT(d_is_negative(d_child)); 498 d_invalidate(d_child); 499 dput(d_child); 500 d_child = d_lookup(d_orphanage, &qname); 501 } 502 503 dput(d_orphanage); 504 } 505 506 /* 507 * Move the current file to the orphanage under the computed name. 508 * 509 * Returns with a dirty transaction so that the caller can handle any other 510 * work, such as fixing up unlinked lists or resetting link counts. 511 */ 512 int 513 xrep_adoption_move( 514 struct xrep_adoption *adopt) 515 { 516 struct xfs_scrub *sc = adopt->sc; 517 bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode); 518 int error; 519 520 trace_xrep_adoption_reparent(sc->orphanage, adopt->xname, 521 sc->ip->i_ino); 522 523 error = xrep_adoption_check_dcache(adopt); 524 if (error) 525 return error; 526 527 /* Create the new name in the orphanage. */ 528 error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname, 529 sc->ip->i_ino, adopt->orphanage_blkres); 530 if (error) 531 return error; 532 533 /* 534 * Bump the link count of the orphanage if we just added a 535 * subdirectory, and update its timestamps. 536 */ 537 xfs_trans_ichgtime(sc->tp, sc->orphanage, 538 XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 539 if (isdir) 540 xfs_bumplink(sc->tp, sc->orphanage); 541 xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE); 542 543 /* Replace the dotdot entry if the child is a subdirectory. */ 544 if (isdir) { 545 error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, 546 sc->orphanage->i_ino, adopt->child_blkres); 547 if (error) 548 return error; 549 } 550 551 /* 552 * Notify dirent hooks that we moved the file to /lost+found, and 553 * finish all the deferred work so that we know the adoption is fully 554 * recorded in the log. 555 */ 556 xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname); 557 558 /* Remove negative dentries from the lost+found's dcache */ 559 xrep_adoption_zap_dcache(adopt); 560 return 0; 561 } 562 563 /* 564 * Roll to a clean scrub transaction so that we can release the orphanage, 565 * even if xrep_adoption_move was not called. 566 * 567 * Commits all the work and deferred ops attached to an adoption request and 568 * rolls to a clean scrub transaction. On success, returns 0 with the scrub 569 * context holding a clean transaction with no inodes joined. On failure, 570 * returns negative errno with no scrub transaction. All inode locks are 571 * still held after this function returns. 572 */ 573 int 574 xrep_adoption_trans_roll( 575 struct xrep_adoption *adopt) 576 { 577 struct xfs_scrub *sc = adopt->sc; 578 int error; 579 580 trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip, 581 !!(sc->tp->t_flags & XFS_TRANS_DIRTY)); 582 583 /* Finish all the deferred ops to commit all repairs. */ 584 error = xrep_defer_finish(sc); 585 if (error) 586 return error; 587 588 /* Roll the transaction once more to detach the inodes. */ 589 return xfs_trans_roll(&sc->tp); 590 } 591