1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_da_format.h" 20 #include "xfs_da_btree.h" 21 #include "xfs_dir2.h" 22 #include "xfs_attr.h" 23 #include "xfs_attr_leaf.h" 24 #include "xfs_attr_sf.h" 25 #include "xfs_attr_remote.h" 26 #include "xfs_bmap.h" 27 #include "xfs_bmap_util.h" 28 #include "xfs_exchmaps.h" 29 #include "xfs_exchrange.h" 30 #include "xfs_acl.h" 31 #include "scrub/xfs_scrub.h" 32 #include "scrub/scrub.h" 33 #include "scrub/common.h" 34 #include "scrub/trace.h" 35 #include "scrub/repair.h" 36 #include "scrub/tempfile.h" 37 #include "scrub/tempexch.h" 38 #include "scrub/xfile.h" 39 #include "scrub/xfarray.h" 40 #include "scrub/xfblob.h" 41 #include "scrub/attr.h" 42 #include "scrub/reap.h" 43 #include "scrub/attr_repair.h" 44 45 /* 46 * Extended Attribute Repair 47 * ========================= 48 * 49 * We repair extended attributes by reading the attr leaf blocks looking for 50 * attributes entries that look salvageable (name passes verifiers, value can 51 * be retrieved, etc). Each extended attribute worth salvaging is stashed in 52 * memory, and the stashed entries are periodically replayed into a temporary 53 * file to constrain memory use. Batching the construction of the temporary 54 * extended attribute structure in this fashion reduces lock cycling of the 55 * file being repaired and the temporary file. 56 * 57 * When salvaging completes, the remaining stashed attributes are replayed to 58 * the temporary file. An atomic file contents exchange is used to commit the 59 * new xattr blocks to the file being repaired. This will disrupt attrmulti 60 * cursors. 61 */ 62 63 struct xrep_xattr_key { 64 /* Cookie for retrieval of the xattr name. */ 65 xfblob_cookie name_cookie; 66 67 /* Cookie for retrieval of the xattr value. */ 68 xfblob_cookie value_cookie; 69 70 /* XFS_ATTR_* flags */ 71 int flags; 72 73 /* Length of the value and name. */ 74 uint32_t valuelen; 75 uint16_t namelen; 76 }; 77 78 /* 79 * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write 80 * them to the temp file. 81 */ 82 #define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8) 83 84 struct xrep_xattr { 85 struct xfs_scrub *sc; 86 87 /* Information for exchanging attr fork mappings at the end. */ 88 struct xrep_tempexch tx; 89 90 /* xattr keys */ 91 struct xfarray *xattr_records; 92 93 /* xattr values */ 94 struct xfblob *xattr_blobs; 95 96 /* Number of attributes that we are salvaging. */ 97 unsigned long long attrs_found; 98 }; 99 100 /* Set up to recreate the extended attributes. */ 101 int 102 xrep_setup_xattr( 103 struct xfs_scrub *sc) 104 { 105 return xrep_tempfile_create(sc, S_IFREG); 106 } 107 108 /* 109 * Decide if we want to salvage this attribute. We don't bother with 110 * incomplete or oversized keys or values. The @value parameter can be null 111 * for remote attrs. 112 */ 113 STATIC int 114 xrep_xattr_want_salvage( 115 struct xrep_xattr *rx, 116 unsigned int attr_flags, 117 const void *name, 118 int namelen, 119 const void *value, 120 int valuelen) 121 { 122 if (attr_flags & XFS_ATTR_INCOMPLETE) 123 return false; 124 if (namelen > XATTR_NAME_MAX || namelen <= 0) 125 return false; 126 if (!xfs_attr_namecheck(name, namelen)) 127 return false; 128 if (valuelen > XATTR_SIZE_MAX || valuelen < 0) 129 return false; 130 if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) 131 return false; 132 return true; 133 } 134 135 /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */ 136 STATIC int 137 xrep_xattr_salvage_key( 138 struct xrep_xattr *rx, 139 int flags, 140 unsigned char *name, 141 int namelen, 142 unsigned char *value, 143 int valuelen) 144 { 145 struct xrep_xattr_key key = { 146 .valuelen = valuelen, 147 .flags = flags & XFS_ATTR_NSP_ONDISK_MASK, 148 }; 149 unsigned int i = 0; 150 int error = 0; 151 152 if (xchk_should_terminate(rx->sc, &error)) 153 return error; 154 155 /* 156 * Truncate the name to the first character that would trip namecheck. 157 * If we no longer have a name after that, ignore this attribute. 158 */ 159 while (i < namelen && name[i] != 0) 160 i++; 161 if (i == 0) 162 return 0; 163 key.namelen = i; 164 165 trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen, 166 valuelen); 167 168 error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name, 169 key.namelen); 170 if (error) 171 return error; 172 173 error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value, 174 key.valuelen); 175 if (error) 176 return error; 177 178 error = xfarray_append(rx->xattr_records, &key); 179 if (error) 180 return error; 181 182 rx->attrs_found++; 183 return 0; 184 } 185 186 /* 187 * Record a shortform extended attribute key & value for later reinsertion 188 * into the inode. 189 */ 190 STATIC int 191 xrep_xattr_salvage_sf_attr( 192 struct xrep_xattr *rx, 193 struct xfs_attr_sf_hdr *hdr, 194 struct xfs_attr_sf_entry *sfe) 195 { 196 struct xfs_scrub *sc = rx->sc; 197 struct xchk_xattr_buf *ab = sc->buf; 198 unsigned char *name = sfe->nameval; 199 unsigned char *value = &sfe->nameval[sfe->namelen]; 200 201 if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr, 202 sfe->namelen)) 203 return 0; 204 205 if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr, 206 sfe->valuelen)) 207 return 0; 208 209 if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval, 210 sfe->namelen, value, sfe->valuelen)) 211 return 0; 212 213 return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval, 214 sfe->namelen, value, sfe->valuelen); 215 } 216 217 /* 218 * Record a local format extended attribute key & value for later reinsertion 219 * into the inode. 220 */ 221 STATIC int 222 xrep_xattr_salvage_local_attr( 223 struct xrep_xattr *rx, 224 struct xfs_attr_leaf_entry *ent, 225 unsigned int nameidx, 226 const char *buf_end, 227 struct xfs_attr_leaf_name_local *lentry) 228 { 229 struct xchk_xattr_buf *ab = rx->sc->buf; 230 unsigned char *value; 231 unsigned int valuelen; 232 unsigned int namesize; 233 234 /* 235 * Decode the leaf local entry format. If something seems wrong, we 236 * junk the attribute. 237 */ 238 value = &lentry->nameval[lentry->namelen]; 239 valuelen = be16_to_cpu(lentry->valuelen); 240 namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen); 241 if ((char *)lentry + namesize > buf_end) 242 return 0; 243 if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval, 244 lentry->namelen, value, valuelen)) 245 return 0; 246 if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) 247 return 0; 248 249 /* Try to save this attribute. */ 250 return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval, 251 lentry->namelen, value, valuelen); 252 } 253 254 /* 255 * Record a remote format extended attribute key & value for later reinsertion 256 * into the inode. 257 */ 258 STATIC int 259 xrep_xattr_salvage_remote_attr( 260 struct xrep_xattr *rx, 261 struct xfs_attr_leaf_entry *ent, 262 unsigned int nameidx, 263 const char *buf_end, 264 struct xfs_attr_leaf_name_remote *rentry, 265 unsigned int ent_idx, 266 struct xfs_buf *leaf_bp) 267 { 268 struct xchk_xattr_buf *ab = rx->sc->buf; 269 struct xfs_da_args args = { 270 .trans = rx->sc->tp, 271 .dp = rx->sc->ip, 272 .index = ent_idx, 273 .geo = rx->sc->mp->m_attr_geo, 274 .owner = rx->sc->ip->i_ino, 275 .attr_filter = ent->flags & XFS_ATTR_NSP_ONDISK_MASK, 276 .namelen = rentry->namelen, 277 .name = rentry->name, 278 .value = ab->value, 279 .valuelen = be32_to_cpu(rentry->valuelen), 280 }; 281 unsigned int namesize; 282 int error; 283 284 /* 285 * Decode the leaf remote entry format. If something seems wrong, we 286 * junk the attribute. Note that we should never find a zero-length 287 * remote attribute value. 288 */ 289 namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); 290 if ((char *)rentry + namesize > buf_end) 291 return 0; 292 if (args.valuelen == 0 || 293 !xrep_xattr_want_salvage(rx, ent->flags, rentry->name, 294 rentry->namelen, NULL, args.valuelen)) 295 return 0; 296 if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) 297 return 0; 298 299 /* 300 * Enlarge the buffer (if needed) to hold the value that we're trying 301 * to salvage from the old extended attribute data. 302 */ 303 error = xchk_setup_xattr_buf(rx->sc, args.valuelen); 304 if (error == -ENOMEM) 305 error = -EDEADLOCK; 306 if (error) 307 return error; 308 309 /* Look up the remote value and stash it for reconstruction. */ 310 error = xfs_attr3_leaf_getvalue(leaf_bp, &args); 311 if (error || args.rmtblkno == 0) 312 goto err_free; 313 314 error = xfs_attr_rmtval_get(&args); 315 if (error) 316 goto err_free; 317 318 /* Try to save this attribute. */ 319 error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name, 320 rentry->namelen, ab->value, args.valuelen); 321 err_free: 322 /* remote value was garbage, junk it */ 323 if (error == -EFSBADCRC || error == -EFSCORRUPTED) 324 error = 0; 325 return error; 326 } 327 328 /* Extract every xattr key that we can from this attr fork block. */ 329 STATIC int 330 xrep_xattr_recover_leaf( 331 struct xrep_xattr *rx, 332 struct xfs_buf *bp) 333 { 334 struct xfs_attr3_icleaf_hdr leafhdr; 335 struct xfs_scrub *sc = rx->sc; 336 struct xfs_mount *mp = sc->mp; 337 struct xfs_attr_leafblock *leaf; 338 struct xfs_attr_leaf_name_local *lentry; 339 struct xfs_attr_leaf_name_remote *rentry; 340 struct xfs_attr_leaf_entry *ent; 341 struct xfs_attr_leaf_entry *entries; 342 struct xchk_xattr_buf *ab = rx->sc->buf; 343 char *buf_end; 344 size_t off; 345 unsigned int nameidx; 346 unsigned int hdrsize; 347 int i; 348 int error = 0; 349 350 bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); 351 352 /* Check the leaf header */ 353 leaf = bp->b_addr; 354 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); 355 hdrsize = xfs_attr3_leaf_hdr_size(leaf); 356 xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize); 357 entries = xfs_attr3_leaf_entryp(leaf); 358 359 buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; 360 for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { 361 if (xchk_should_terminate(sc, &error)) 362 return error; 363 364 /* Skip key if it conflicts with something else? */ 365 off = (char *)ent - (char *)leaf; 366 if (!xchk_xattr_set_map(sc, ab->usedmap, off, 367 sizeof(xfs_attr_leaf_entry_t))) 368 continue; 369 370 /* Check the name information. */ 371 nameidx = be16_to_cpu(ent->nameidx); 372 if (nameidx < leafhdr.firstused || 373 nameidx >= mp->m_attr_geo->blksize) 374 continue; 375 376 if (ent->flags & XFS_ATTR_LOCAL) { 377 lentry = xfs_attr3_leaf_name_local(leaf, i); 378 error = xrep_xattr_salvage_local_attr(rx, ent, nameidx, 379 buf_end, lentry); 380 } else { 381 rentry = xfs_attr3_leaf_name_remote(leaf, i); 382 error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx, 383 buf_end, rentry, i, bp); 384 } 385 if (error) 386 return error; 387 } 388 389 return 0; 390 } 391 392 /* Try to recover shortform attrs. */ 393 STATIC int 394 xrep_xattr_recover_sf( 395 struct xrep_xattr *rx) 396 { 397 struct xfs_scrub *sc = rx->sc; 398 struct xchk_xattr_buf *ab = sc->buf; 399 struct xfs_attr_sf_hdr *hdr; 400 struct xfs_attr_sf_entry *sfe; 401 struct xfs_attr_sf_entry *next; 402 struct xfs_ifork *ifp; 403 unsigned char *end; 404 int i; 405 int error = 0; 406 407 ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK); 408 hdr = ifp->if_data; 409 410 bitmap_zero(ab->usedmap, ifp->if_bytes); 411 end = (unsigned char *)ifp->if_data + ifp->if_bytes; 412 xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr)); 413 414 sfe = xfs_attr_sf_firstentry(hdr); 415 if ((unsigned char *)sfe > end) 416 return 0; 417 418 for (i = 0; i < hdr->count; i++) { 419 if (xchk_should_terminate(sc, &error)) 420 return error; 421 422 next = xfs_attr_sf_nextentry(sfe); 423 if ((unsigned char *)next > end) 424 break; 425 426 if (xchk_xattr_set_map(sc, ab->usedmap, 427 (char *)sfe - (char *)hdr, 428 sizeof(struct xfs_attr_sf_entry))) { 429 /* 430 * No conflicts with the sf entry; let's save this 431 * attribute. 432 */ 433 error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe); 434 if (error) 435 return error; 436 } 437 438 sfe = next; 439 } 440 441 return 0; 442 } 443 444 /* 445 * Try to return a buffer of xattr data for a given physical extent. 446 * 447 * Because the buffer cache get function complains if it finds a buffer 448 * matching the block number but not matching the length, we must be careful to 449 * look for incore buffers (up to the maximum length of a remote value) that 450 * could be hiding anywhere in the physical range. If we find an incore 451 * buffer, we can pass that to the caller. Optionally, read a single block and 452 * pass that back. 453 * 454 * Note the subtlety that remote attr value blocks for which there is no incore 455 * buffer will be passed to the callback one block at a time. These buffers 456 * will not have any ops attached and must be staled to prevent aliasing with 457 * multiblock buffers once we drop the ILOCK. 458 */ 459 STATIC int 460 xrep_xattr_find_buf( 461 struct xfs_mount *mp, 462 xfs_fsblock_t fsbno, 463 xfs_extlen_t max_len, 464 bool can_read, 465 struct xfs_buf **bpp) 466 { 467 struct xrep_bufscan scan = { 468 .daddr = XFS_FSB_TO_DADDR(mp, fsbno), 469 .max_sectors = xrep_bufscan_max_sectors(mp, max_len), 470 .daddr_step = XFS_FSB_TO_BB(mp, 1), 471 }; 472 struct xfs_buf *bp; 473 474 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) { 475 *bpp = bp; 476 return 0; 477 } 478 479 if (!can_read) { 480 *bpp = NULL; 481 return 0; 482 } 483 484 return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1), 485 XBF_TRYLOCK, bpp, NULL); 486 } 487 488 /* 489 * Deal with a buffer that we found during our walk of the attr fork. 490 * 491 * Attribute leaf and node blocks are simple -- they're a single block, so we 492 * can walk them one at a time and we never have to worry about discontiguous 493 * multiblock buffers like we do for directories. 494 * 495 * Unfortunately, remote attr blocks add a lot of complexity here. Each disk 496 * block is totally self contained, in the sense that the v5 header provides no 497 * indication that there could be more data in the next block. The incore 498 * buffers can span multiple blocks, though they never cross extent records. 499 * However, they don't necessarily start or end on an extent record boundary. 500 * Therefore, we need a special buffer find function to walk the buffer cache 501 * for us. 502 * 503 * The caller must hold the ILOCK on the file being repaired. We use 504 * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't 505 * own the block and don't want to hang the system on a potentially garbage 506 * buffer. 507 */ 508 STATIC int 509 xrep_xattr_recover_block( 510 struct xrep_xattr *rx, 511 xfs_dablk_t dabno, 512 xfs_fsblock_t fsbno, 513 xfs_extlen_t max_len, 514 xfs_extlen_t *actual_len) 515 { 516 struct xfs_da_blkinfo *info; 517 struct xfs_buf *bp; 518 int error; 519 520 error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp); 521 if (error) 522 return error; 523 info = bp->b_addr; 524 *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length); 525 526 trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno, 527 be16_to_cpu(info->magic)); 528 529 /* 530 * If the buffer has the right magic number for an attr leaf block and 531 * passes a structure check (we don't care about checksums), salvage 532 * as much as we can from the block. */ 533 if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) && 534 xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) && 535 xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL) 536 error = xrep_xattr_recover_leaf(rx, bp); 537 538 /* 539 * If the buffer didn't already have buffer ops set, it was read in by 540 * the _find_buf function and could very well be /part/ of a multiblock 541 * remote block. Mark it stale so that it doesn't hang around in 542 * memory to cause problems. 543 */ 544 if (bp->b_ops == NULL) 545 xfs_buf_stale(bp); 546 547 xfs_buf_relse(bp); 548 return error; 549 } 550 551 /* Insert one xattr key/value. */ 552 STATIC int 553 xrep_xattr_insert_rec( 554 struct xrep_xattr *rx, 555 const struct xrep_xattr_key *key) 556 { 557 struct xfs_da_args args = { 558 .dp = rx->sc->tempip, 559 .attr_filter = key->flags, 560 .attr_flags = XATTR_CREATE, 561 .namelen = key->namelen, 562 .valuelen = key->valuelen, 563 .owner = rx->sc->ip->i_ino, 564 }; 565 struct xchk_xattr_buf *ab = rx->sc->buf; 566 int error; 567 568 /* 569 * Grab pointers to the scrub buffer so that we can use them to insert 570 * attrs into the temp file. 571 */ 572 args.name = ab->name; 573 args.value = ab->value; 574 575 /* 576 * The attribute name is stored near the end of the in-core buffer, 577 * though we reserve one more byte to ensure null termination. 578 */ 579 ab->name[XATTR_NAME_MAX] = 0; 580 581 error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name, 582 key->namelen); 583 if (error) 584 return error; 585 586 error = xfblob_free(rx->xattr_blobs, key->name_cookie); 587 if (error) 588 return error; 589 590 error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value, 591 key->valuelen); 592 if (error) 593 return error; 594 595 error = xfblob_free(rx->xattr_blobs, key->value_cookie); 596 if (error) 597 return error; 598 599 ab->name[key->namelen] = 0; 600 601 trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name, 602 key->namelen, key->valuelen); 603 604 /* 605 * xfs_attr_set creates and commits its own transaction. If the attr 606 * already exists, we'll just drop it during the rebuild. 607 */ 608 error = xfs_attr_set(&args); 609 if (error == -EEXIST) 610 error = 0; 611 612 return error; 613 } 614 615 /* 616 * Periodically flush salvaged attributes to the temporary file. This is done 617 * to reduce the memory requirements of the xattr rebuild because files can 618 * contain millions of attributes. 619 */ 620 STATIC int 621 xrep_xattr_flush_stashed( 622 struct xrep_xattr *rx) 623 { 624 xfarray_idx_t array_cur; 625 int error; 626 627 /* 628 * Entering this function, the scrub context has a reference to the 629 * inode being repaired, the temporary file, and a scrub transaction 630 * that we use during xattr salvaging to avoid livelocking if there 631 * are cycles in the xattr structures. We hold ILOCK_EXCL on both 632 * the inode being repaired, though it is not ijoined to the scrub 633 * transaction. 634 * 635 * To constrain kernel memory use, we occasionally flush salvaged 636 * xattrs from the xfarray and xfblob structures into the temporary 637 * file in preparation for exchanging the xattr structures at the end. 638 * Updating the temporary file requires a transaction, so we commit the 639 * scrub transaction and drop the two ILOCKs so that xfs_attr_set can 640 * allocate whatever transaction it wants. 641 * 642 * We still hold IOLOCK_EXCL on the inode being repaired, which 643 * prevents anyone from modifying the damaged xattr data while we 644 * repair it. 645 */ 646 error = xrep_trans_commit(rx->sc); 647 if (error) 648 return error; 649 xchk_iunlock(rx->sc, XFS_ILOCK_EXCL); 650 651 /* 652 * Take the IOLOCK of the temporary file while we modify xattrs. This 653 * isn't strictly required because the temporary file is never revealed 654 * to userspace, but we follow the same locking rules. We still hold 655 * sc->ip's IOLOCK. 656 */ 657 error = xrep_tempfile_iolock_polled(rx->sc); 658 if (error) 659 return error; 660 661 /* Add all the salvaged attrs to the temporary file. */ 662 foreach_xfarray_idx(rx->xattr_records, array_cur) { 663 struct xrep_xattr_key key; 664 665 error = xfarray_load(rx->xattr_records, array_cur, &key); 666 if (error) 667 return error; 668 669 error = xrep_xattr_insert_rec(rx, &key); 670 if (error) 671 return error; 672 } 673 674 /* Empty out both arrays now that we've added the entries. */ 675 xfarray_truncate(rx->xattr_records); 676 xfblob_truncate(rx->xattr_blobs); 677 678 xrep_tempfile_iounlock(rx->sc); 679 680 /* Recreate the salvage transaction and relock the inode. */ 681 error = xchk_trans_alloc(rx->sc, 0); 682 if (error) 683 return error; 684 xchk_ilock(rx->sc, XFS_ILOCK_EXCL); 685 return 0; 686 } 687 688 /* Decide if we've stashed too much xattr data in memory. */ 689 static inline bool 690 xrep_xattr_want_flush_stashed( 691 struct xrep_xattr *rx) 692 { 693 unsigned long long bytes; 694 695 bytes = xfarray_bytes(rx->xattr_records) + 696 xfblob_bytes(rx->xattr_blobs); 697 return bytes > XREP_XATTR_MAX_STASH_BYTES; 698 } 699 700 /* Extract as many attribute keys and values as we can. */ 701 STATIC int 702 xrep_xattr_recover( 703 struct xrep_xattr *rx) 704 { 705 struct xfs_bmbt_irec got; 706 struct xfs_scrub *sc = rx->sc; 707 struct xfs_da_geometry *geo = sc->mp->m_attr_geo; 708 xfs_fileoff_t offset; 709 xfs_extlen_t len; 710 xfs_dablk_t dabno; 711 int nmap; 712 int error; 713 714 /* 715 * Iterate each xattr leaf block in the attr fork to scan them for any 716 * attributes that we might salvage. 717 */ 718 for (offset = 0; 719 offset < XFS_MAX_FILEOFF; 720 offset = got.br_startoff + got.br_blockcount) { 721 nmap = 1; 722 error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset, 723 &got, &nmap, XFS_BMAPI_ATTRFORK); 724 if (error) 725 return error; 726 if (nmap != 1) 727 return -EFSCORRUPTED; 728 if (!xfs_bmap_is_written_extent(&got)) 729 continue; 730 731 for (dabno = round_up(got.br_startoff, geo->fsbcount); 732 dabno < got.br_startoff + got.br_blockcount; 733 dabno += len) { 734 xfs_fileoff_t curr_offset = dabno - got.br_startoff; 735 xfs_extlen_t maxlen; 736 737 if (xchk_should_terminate(rx->sc, &error)) 738 return error; 739 740 maxlen = min_t(xfs_filblks_t, INT_MAX, 741 got.br_blockcount - curr_offset); 742 error = xrep_xattr_recover_block(rx, dabno, 743 curr_offset + got.br_startblock, 744 maxlen, &len); 745 if (error) 746 return error; 747 748 if (xrep_xattr_want_flush_stashed(rx)) { 749 error = xrep_xattr_flush_stashed(rx); 750 if (error) 751 return error; 752 } 753 } 754 } 755 756 return 0; 757 } 758 759 /* 760 * Reset the extended attribute fork to a state where we can start re-adding 761 * the salvaged attributes. 762 */ 763 STATIC int 764 xrep_xattr_fork_remove( 765 struct xfs_scrub *sc, 766 struct xfs_inode *ip) 767 { 768 struct xfs_attr_sf_hdr *hdr; 769 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); 770 771 /* 772 * If the data fork is in btree format, we can't change di_forkoff 773 * because we could run afoul of the rule that the data fork isn't 774 * supposed to be in btree format if there's enough space in the fork 775 * that it could have used extents format. Instead, reinitialize the 776 * attr fork to have a shortform structure with zero attributes. 777 */ 778 if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) { 779 ifp->if_format = XFS_DINODE_FMT_LOCAL; 780 hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes, 781 XFS_ATTR_FORK); 782 hdr->count = 0; 783 hdr->totsize = cpu_to_be16(sizeof(*hdr)); 784 xfs_trans_log_inode(sc->tp, ip, 785 XFS_ILOG_CORE | XFS_ILOG_ADATA); 786 return 0; 787 } 788 789 /* If we still have attr fork extents, something's wrong. */ 790 if (ifp->if_nextents != 0) { 791 struct xfs_iext_cursor icur; 792 struct xfs_bmbt_irec irec; 793 unsigned int i = 0; 794 795 xfs_emerg(sc->mp, 796 "inode 0x%llx attr fork still has %llu attr extents, format %d?!", 797 ip->i_ino, ifp->if_nextents, ifp->if_format); 798 for_each_xfs_iext(ifp, &icur, &irec) { 799 xfs_err(sc->mp, 800 "[%u]: startoff %llu startblock %llu blockcount %llu state %u", 801 i++, irec.br_startoff, 802 irec.br_startblock, irec.br_blockcount, 803 irec.br_state); 804 } 805 ASSERT(0); 806 return -EFSCORRUPTED; 807 } 808 809 xfs_attr_fork_remove(ip, sc->tp); 810 return 0; 811 } 812 813 /* 814 * Free all the attribute fork blocks of the file being repaired and delete the 815 * fork. The caller must ILOCK the scrub file and join it to the transaction. 816 * This function returns with the inode joined to a clean transaction. 817 */ 818 int 819 xrep_xattr_reset_fork( 820 struct xfs_scrub *sc) 821 { 822 int error; 823 824 trace_xrep_xattr_reset_fork(sc->ip, sc->ip); 825 826 /* Unmap all the attr blocks. */ 827 if (xfs_ifork_has_extents(&sc->ip->i_af)) { 828 error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK); 829 if (error) 830 return error; 831 } 832 833 error = xrep_xattr_fork_remove(sc, sc->ip); 834 if (error) 835 return error; 836 837 return xfs_trans_roll_inode(&sc->tp, sc->ip); 838 } 839 840 /* 841 * Free all the attribute fork blocks of the temporary file and delete the attr 842 * fork. The caller must ILOCK the tempfile and join it to the transaction. 843 * This function returns with the inode joined to a clean scrub transaction. 844 */ 845 STATIC int 846 xrep_xattr_reset_tempfile_fork( 847 struct xfs_scrub *sc) 848 { 849 int error; 850 851 trace_xrep_xattr_reset_fork(sc->ip, sc->tempip); 852 853 /* 854 * Wipe out the attr fork of the temp file so that regular inode 855 * inactivation won't trip over the corrupt attr fork. 856 */ 857 if (xfs_ifork_has_extents(&sc->tempip->i_af)) { 858 error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK); 859 if (error) 860 return error; 861 } 862 863 return xrep_xattr_fork_remove(sc, sc->tempip); 864 } 865 866 /* 867 * Find all the extended attributes for this inode by scraping them out of the 868 * attribute key blocks by hand, and flushing them into the temp file. 869 * When we're done, free the staging memory before exchanging the xattr 870 * structures to reduce memory usage. 871 */ 872 STATIC int 873 xrep_xattr_salvage_attributes( 874 struct xrep_xattr *rx) 875 { 876 struct xfs_inode *ip = rx->sc->ip; 877 int error; 878 879 /* Short format xattrs are easy! */ 880 if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) { 881 error = xrep_xattr_recover_sf(rx); 882 if (error) 883 return error; 884 885 return xrep_xattr_flush_stashed(rx); 886 } 887 888 /* 889 * For non-inline xattr structures, the salvage function scans the 890 * buffer cache looking for potential attr leaf blocks. The scan 891 * requires the ability to lock any buffer found and runs independently 892 * of any transaction <-> buffer item <-> buffer linkage. Therefore, 893 * roll the transaction to ensure there are no buffers joined. We hold 894 * the ILOCK independently of the transaction. 895 */ 896 error = xfs_trans_roll(&rx->sc->tp); 897 if (error) 898 return error; 899 900 error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK); 901 if (error) 902 return error; 903 904 error = xrep_xattr_recover(rx); 905 if (error) 906 return error; 907 908 return xrep_xattr_flush_stashed(rx); 909 } 910 911 /* 912 * Prepare both inodes' attribute forks for an exchange. Promote the tempfile 913 * from short format to leaf format, and if the file being repaired has a short 914 * format attr fork, turn it into an empty extent list. 915 */ 916 STATIC int 917 xrep_xattr_swap_prep( 918 struct xfs_scrub *sc, 919 bool temp_local, 920 bool ip_local) 921 { 922 int error; 923 924 /* 925 * If the tempfile's attributes are in shortform format, convert that 926 * to a single leaf extent so that we can use the atomic mapping 927 * exchange. 928 */ 929 if (temp_local) { 930 struct xfs_da_args args = { 931 .dp = sc->tempip, 932 .geo = sc->mp->m_attr_geo, 933 .whichfork = XFS_ATTR_FORK, 934 .trans = sc->tp, 935 .total = 1, 936 .owner = sc->ip->i_ino, 937 }; 938 939 error = xfs_attr_shortform_to_leaf(&args); 940 if (error) 941 return error; 942 943 /* 944 * Roll the deferred log items to get us back to a clean 945 * transaction. 946 */ 947 error = xfs_defer_finish(&sc->tp); 948 if (error) 949 return error; 950 } 951 952 /* 953 * If the file being repaired had a shortform attribute fork, convert 954 * that to an empty extent list in preparation for the atomic mapping 955 * exchange. 956 */ 957 if (ip_local) { 958 struct xfs_ifork *ifp; 959 960 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); 961 962 xfs_idestroy_fork(ifp); 963 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 964 ifp->if_nextents = 0; 965 ifp->if_bytes = 0; 966 ifp->if_data = NULL; 967 ifp->if_height = 0; 968 969 xfs_trans_log_inode(sc->tp, sc->ip, 970 XFS_ILOG_CORE | XFS_ILOG_ADATA); 971 } 972 973 return 0; 974 } 975 976 /* Exchange the temporary file's attribute fork with the one being repaired. */ 977 STATIC int 978 xrep_xattr_swap( 979 struct xfs_scrub *sc, 980 struct xrep_tempexch *tx) 981 { 982 bool ip_local, temp_local; 983 int error = 0; 984 985 ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL; 986 temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL; 987 988 /* 989 * If the both files have a local format attr fork and the rebuilt 990 * xattr data would fit in the repaired file's attr fork, just copy 991 * the contents from the tempfile and declare ourselves done. 992 */ 993 if (ip_local && temp_local) { 994 int forkoff; 995 int newsize; 996 997 newsize = xfs_attr_sf_totsize(sc->tempip); 998 forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize); 999 if (forkoff > 0) { 1000 sc->ip->i_forkoff = forkoff; 1001 xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK); 1002 return 0; 1003 } 1004 } 1005 1006 /* Otherwise, make sure both attr forks are in block-mapping mode. */ 1007 error = xrep_xattr_swap_prep(sc, temp_local, ip_local); 1008 if (error) 1009 return error; 1010 1011 return xrep_tempexch_contents(sc, tx); 1012 } 1013 1014 /* 1015 * Exchange the new extended attribute data (which we created in the tempfile) 1016 * with the file being repaired. 1017 */ 1018 STATIC int 1019 xrep_xattr_rebuild_tree( 1020 struct xrep_xattr *rx) 1021 { 1022 struct xfs_scrub *sc = rx->sc; 1023 int error; 1024 1025 /* 1026 * If we didn't find any attributes to salvage, repair the file by 1027 * zapping its attr fork. 1028 */ 1029 if (rx->attrs_found == 0) { 1030 xfs_trans_ijoin(sc->tp, sc->ip, 0); 1031 error = xrep_xattr_reset_fork(sc); 1032 if (error) 1033 return error; 1034 1035 goto forget_acls; 1036 } 1037 1038 trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip); 1039 1040 /* 1041 * Commit the repair transaction and drop the ILOCKs so that we can use 1042 * the atomic file content exchange helper functions to compute the 1043 * correct resource reservations. 1044 * 1045 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr 1046 * modifications, but there's nothing to prevent userspace from reading 1047 * the attributes until we're ready for the exchange operation. Reads 1048 * will return -EIO without shutting down the fs, so we're ok with 1049 * that. 1050 */ 1051 error = xrep_trans_commit(sc); 1052 if (error) 1053 return error; 1054 1055 xchk_iunlock(sc, XFS_ILOCK_EXCL); 1056 1057 /* 1058 * Take the IOLOCK on the temporary file so that we can run xattr 1059 * operations with the same locks held as we would for a normal file. 1060 * We still hold sc->ip's IOLOCK. 1061 */ 1062 error = xrep_tempfile_iolock_polled(rx->sc); 1063 if (error) 1064 return error; 1065 1066 /* Allocate exchange transaction and lock both inodes. */ 1067 error = xrep_tempexch_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx); 1068 if (error) 1069 return error; 1070 1071 /* 1072 * Exchange the blocks mapped by the tempfile's attr fork with the file 1073 * being repaired. The old attr blocks will then be attached to the 1074 * tempfile, so reap its attr fork. 1075 */ 1076 error = xrep_xattr_swap(sc, &rx->tx); 1077 if (error) 1078 return error; 1079 1080 error = xrep_xattr_reset_tempfile_fork(sc); 1081 if (error) 1082 return error; 1083 1084 /* 1085 * Roll to get a transaction without any inodes joined to it. Then we 1086 * can drop the tempfile's ILOCK and IOLOCK before doing more work on 1087 * the scrub target file. 1088 */ 1089 error = xfs_trans_roll(&sc->tp); 1090 if (error) 1091 return error; 1092 1093 xrep_tempfile_iunlock(sc); 1094 xrep_tempfile_iounlock(sc); 1095 1096 forget_acls: 1097 /* Invalidate cached ACLs now that we've reloaded all the xattrs. */ 1098 xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE); 1099 xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT); 1100 return 0; 1101 } 1102 1103 /* Tear down all the incore scan stuff we created. */ 1104 STATIC void 1105 xrep_xattr_teardown( 1106 struct xrep_xattr *rx) 1107 { 1108 xfblob_destroy(rx->xattr_blobs); 1109 xfarray_destroy(rx->xattr_records); 1110 kfree(rx); 1111 } 1112 1113 /* Set up the filesystem scan so we can regenerate extended attributes. */ 1114 STATIC int 1115 xrep_xattr_setup_scan( 1116 struct xfs_scrub *sc, 1117 struct xrep_xattr **rxp) 1118 { 1119 struct xrep_xattr *rx; 1120 char *descr; 1121 int max_len; 1122 int error; 1123 1124 rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS); 1125 if (!rx) 1126 return -ENOMEM; 1127 rx->sc = sc; 1128 1129 /* 1130 * Allocate enough memory to handle loading local attr values from the 1131 * xfblob data while flushing stashed attrs to the temporary file. 1132 * We only realloc the buffer when salvaging remote attr values. 1133 */ 1134 max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize); 1135 error = xchk_setup_xattr_buf(rx->sc, max_len); 1136 if (error == -ENOMEM) 1137 error = -EDEADLOCK; 1138 if (error) 1139 goto out_rx; 1140 1141 /* Set up some staging for salvaged attribute keys and values */ 1142 descr = xchk_xfile_ino_descr(sc, "xattr keys"); 1143 error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), 1144 &rx->xattr_records); 1145 kfree(descr); 1146 if (error) 1147 goto out_rx; 1148 1149 descr = xchk_xfile_ino_descr(sc, "xattr names"); 1150 error = xfblob_create(descr, &rx->xattr_blobs); 1151 kfree(descr); 1152 if (error) 1153 goto out_keys; 1154 1155 *rxp = rx; 1156 return 0; 1157 out_keys: 1158 xfarray_destroy(rx->xattr_records); 1159 out_rx: 1160 kfree(rx); 1161 return error; 1162 } 1163 1164 /* 1165 * Repair the extended attribute metadata. 1166 * 1167 * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer. 1168 * The buffer cache in XFS can't handle aliased multiblock buffers, so this 1169 * might misbehave if the attr fork is crosslinked with other filesystem 1170 * metadata. 1171 */ 1172 int 1173 xrep_xattr( 1174 struct xfs_scrub *sc) 1175 { 1176 struct xrep_xattr *rx = NULL; 1177 int error; 1178 1179 if (!xfs_inode_hasattr(sc->ip)) 1180 return -ENOENT; 1181 1182 /* The rmapbt is required to reap the old attr fork. */ 1183 if (!xfs_has_rmapbt(sc->mp)) 1184 return -EOPNOTSUPP; 1185 1186 error = xrep_xattr_setup_scan(sc, &rx); 1187 if (error) 1188 return error; 1189 1190 ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); 1191 1192 error = xrep_xattr_salvage_attributes(rx); 1193 if (error) 1194 goto out_scan; 1195 1196 /* Last chance to abort before we start committing fixes. */ 1197 if (xchk_should_terminate(sc, &error)) 1198 goto out_scan; 1199 1200 error = xrep_xattr_rebuild_tree(rx); 1201 if (error) 1202 goto out_scan; 1203 1204 out_scan: 1205 xrep_xattr_teardown(rx); 1206 return error; 1207 } 1208