1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_da_format.h" 17 #include "xfs_da_btree.h" 18 #include "xfs_inode.h" 19 #include "xfs_trans.h" 20 #include "xfs_bmap.h" 21 #include "xfs_attr.h" 22 #include "xfs_attr_remote.h" 23 #include "xfs_trace.h" 24 #include "xfs_error.h" 25 26 #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 27 28 /* 29 * Remote Attribute Values 30 * ======================= 31 * 32 * Remote extended attribute values are conceptually simple -- they're written 33 * to data blocks mapped by an inode's attribute fork, and they have an upper 34 * size limit of 64k. Setting a value does not involve the XFS log. 35 * 36 * However, on a v5 filesystem, maximally sized remote attr values require one 37 * block more than 64k worth of space to hold both the remote attribute value 38 * header (64 bytes). On a 4k block filesystem this results in a 68k buffer; 39 * on a 64k block filesystem, this would be a 128k buffer. Note that the log 40 * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k). 41 * Therefore, we /must/ ensure that remote attribute value buffers never touch 42 * the logging system and therefore never have a log item. 43 */ 44 45 /* 46 * Each contiguous block has a header, so it is not just a simple attribute 47 * length to FSB conversion. 48 */ 49 int 50 xfs_attr3_rmt_blocks( 51 struct xfs_mount *mp, 52 int attrlen) 53 { 54 if (xfs_sb_version_hascrc(&mp->m_sb)) { 55 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize); 56 return (attrlen + buflen - 1) / buflen; 57 } 58 return XFS_B_TO_FSB(mp, attrlen); 59 } 60 61 /* 62 * Checking of the remote attribute header is split into two parts. The verifier 63 * does CRC, location and bounds checking, the unpacking function checks the 64 * attribute parameters and owner. 65 */ 66 static xfs_failaddr_t 67 xfs_attr3_rmt_hdr_ok( 68 void *ptr, 69 xfs_ino_t ino, 70 uint32_t offset, 71 uint32_t size, 72 xfs_daddr_t bno) 73 { 74 struct xfs_attr3_rmt_hdr *rmt = ptr; 75 76 if (bno != be64_to_cpu(rmt->rm_blkno)) 77 return __this_address; 78 if (offset != be32_to_cpu(rmt->rm_offset)) 79 return __this_address; 80 if (size != be32_to_cpu(rmt->rm_bytes)) 81 return __this_address; 82 if (ino != be64_to_cpu(rmt->rm_owner)) 83 return __this_address; 84 85 /* ok */ 86 return NULL; 87 } 88 89 static xfs_failaddr_t 90 xfs_attr3_rmt_verify( 91 struct xfs_mount *mp, 92 struct xfs_buf *bp, 93 void *ptr, 94 int fsbsize, 95 xfs_daddr_t bno) 96 { 97 struct xfs_attr3_rmt_hdr *rmt = ptr; 98 99 if (!xfs_sb_version_hascrc(&mp->m_sb)) 100 return __this_address; 101 if (!xfs_verify_magic(bp, rmt->rm_magic)) 102 return __this_address; 103 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid)) 104 return __this_address; 105 if (be64_to_cpu(rmt->rm_blkno) != bno) 106 return __this_address; 107 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 108 return __this_address; 109 if (be32_to_cpu(rmt->rm_offset) + 110 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX) 111 return __this_address; 112 if (rmt->rm_owner == 0) 113 return __this_address; 114 115 return NULL; 116 } 117 118 static int 119 __xfs_attr3_rmt_read_verify( 120 struct xfs_buf *bp, 121 bool check_crc, 122 xfs_failaddr_t *failaddr) 123 { 124 struct xfs_mount *mp = bp->b_mount; 125 char *ptr; 126 int len; 127 xfs_daddr_t bno; 128 int blksize = mp->m_attr_geo->blksize; 129 130 /* no verification of non-crc buffers */ 131 if (!xfs_sb_version_hascrc(&mp->m_sb)) 132 return 0; 133 134 ptr = bp->b_addr; 135 bno = bp->b_bn; 136 len = BBTOB(bp->b_length); 137 ASSERT(len >= blksize); 138 139 while (len > 0) { 140 if (check_crc && 141 !xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { 142 *failaddr = __this_address; 143 return -EFSBADCRC; 144 } 145 *failaddr = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 146 if (*failaddr) 147 return -EFSCORRUPTED; 148 len -= blksize; 149 ptr += blksize; 150 bno += BTOBB(blksize); 151 } 152 153 if (len != 0) { 154 *failaddr = __this_address; 155 return -EFSCORRUPTED; 156 } 157 158 return 0; 159 } 160 161 static void 162 xfs_attr3_rmt_read_verify( 163 struct xfs_buf *bp) 164 { 165 xfs_failaddr_t fa; 166 int error; 167 168 error = __xfs_attr3_rmt_read_verify(bp, true, &fa); 169 if (error) 170 xfs_verifier_error(bp, error, fa); 171 } 172 173 static xfs_failaddr_t 174 xfs_attr3_rmt_verify_struct( 175 struct xfs_buf *bp) 176 { 177 xfs_failaddr_t fa; 178 int error; 179 180 error = __xfs_attr3_rmt_read_verify(bp, false, &fa); 181 return error ? fa : NULL; 182 } 183 184 static void 185 xfs_attr3_rmt_write_verify( 186 struct xfs_buf *bp) 187 { 188 struct xfs_mount *mp = bp->b_mount; 189 xfs_failaddr_t fa; 190 int blksize = mp->m_attr_geo->blksize; 191 char *ptr; 192 int len; 193 xfs_daddr_t bno; 194 195 /* no verification of non-crc buffers */ 196 if (!xfs_sb_version_hascrc(&mp->m_sb)) 197 return; 198 199 ptr = bp->b_addr; 200 bno = bp->b_bn; 201 len = BBTOB(bp->b_length); 202 ASSERT(len >= blksize); 203 204 while (len > 0) { 205 struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr; 206 207 fa = xfs_attr3_rmt_verify(mp, bp, ptr, blksize, bno); 208 if (fa) { 209 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 210 return; 211 } 212 213 /* 214 * Ensure we aren't writing bogus LSNs to disk. See 215 * xfs_attr3_rmt_hdr_set() for the explanation. 216 */ 217 if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) { 218 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 219 return; 220 } 221 xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); 222 223 len -= blksize; 224 ptr += blksize; 225 bno += BTOBB(blksize); 226 } 227 228 if (len != 0) 229 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 230 } 231 232 const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 233 .name = "xfs_attr3_rmt", 234 .magic = { 0, cpu_to_be32(XFS_ATTR3_RMT_MAGIC) }, 235 .verify_read = xfs_attr3_rmt_read_verify, 236 .verify_write = xfs_attr3_rmt_write_verify, 237 .verify_struct = xfs_attr3_rmt_verify_struct, 238 }; 239 240 STATIC int 241 xfs_attr3_rmt_hdr_set( 242 struct xfs_mount *mp, 243 void *ptr, 244 xfs_ino_t ino, 245 uint32_t offset, 246 uint32_t size, 247 xfs_daddr_t bno) 248 { 249 struct xfs_attr3_rmt_hdr *rmt = ptr; 250 251 if (!xfs_sb_version_hascrc(&mp->m_sb)) 252 return 0; 253 254 rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC); 255 rmt->rm_offset = cpu_to_be32(offset); 256 rmt->rm_bytes = cpu_to_be32(size); 257 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid); 258 rmt->rm_owner = cpu_to_be64(ino); 259 rmt->rm_blkno = cpu_to_be64(bno); 260 261 /* 262 * Remote attribute blocks are written synchronously, so we don't 263 * have an LSN that we can stamp in them that makes any sense to log 264 * recovery. To ensure that log recovery handles overwrites of these 265 * blocks sanely (i.e. once they've been freed and reallocated as some 266 * other type of metadata) we need to ensure that the LSN has a value 267 * that tells log recovery to ignore the LSN and overwrite the buffer 268 * with whatever is in it's log. To do this, we use the magic 269 * NULLCOMMITLSN to indicate that the LSN is invalid. 270 */ 271 rmt->rm_lsn = cpu_to_be64(NULLCOMMITLSN); 272 273 return sizeof(struct xfs_attr3_rmt_hdr); 274 } 275 276 /* 277 * Helper functions to copy attribute data in and out of the one disk extents 278 */ 279 STATIC int 280 xfs_attr_rmtval_copyout( 281 struct xfs_mount *mp, 282 struct xfs_buf *bp, 283 xfs_ino_t ino, 284 int *offset, 285 int *valuelen, 286 uint8_t **dst) 287 { 288 char *src = bp->b_addr; 289 xfs_daddr_t bno = bp->b_bn; 290 int len = BBTOB(bp->b_length); 291 int blksize = mp->m_attr_geo->blksize; 292 293 ASSERT(len >= blksize); 294 295 while (len > 0 && *valuelen > 0) { 296 int hdr_size = 0; 297 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 298 299 byte_cnt = min(*valuelen, byte_cnt); 300 301 if (xfs_sb_version_hascrc(&mp->m_sb)) { 302 if (xfs_attr3_rmt_hdr_ok(src, ino, *offset, 303 byte_cnt, bno)) { 304 xfs_alert(mp, 305 "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", 306 bno, *offset, byte_cnt, ino); 307 return -EFSCORRUPTED; 308 } 309 hdr_size = sizeof(struct xfs_attr3_rmt_hdr); 310 } 311 312 memcpy(*dst, src + hdr_size, byte_cnt); 313 314 /* roll buffer forwards */ 315 len -= blksize; 316 src += blksize; 317 bno += BTOBB(blksize); 318 319 /* roll attribute data forwards */ 320 *valuelen -= byte_cnt; 321 *dst += byte_cnt; 322 *offset += byte_cnt; 323 } 324 return 0; 325 } 326 327 STATIC void 328 xfs_attr_rmtval_copyin( 329 struct xfs_mount *mp, 330 struct xfs_buf *bp, 331 xfs_ino_t ino, 332 int *offset, 333 int *valuelen, 334 uint8_t **src) 335 { 336 char *dst = bp->b_addr; 337 xfs_daddr_t bno = bp->b_bn; 338 int len = BBTOB(bp->b_length); 339 int blksize = mp->m_attr_geo->blksize; 340 341 ASSERT(len >= blksize); 342 343 while (len > 0 && *valuelen > 0) { 344 int hdr_size; 345 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize); 346 347 byte_cnt = min(*valuelen, byte_cnt); 348 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, 349 byte_cnt, bno); 350 351 memcpy(dst + hdr_size, *src, byte_cnt); 352 353 /* 354 * If this is the last block, zero the remainder of it. 355 * Check that we are actually the last block, too. 356 */ 357 if (byte_cnt + hdr_size < blksize) { 358 ASSERT(*valuelen - byte_cnt == 0); 359 ASSERT(len == blksize); 360 memset(dst + hdr_size + byte_cnt, 0, 361 blksize - hdr_size - byte_cnt); 362 } 363 364 /* roll buffer forwards */ 365 len -= blksize; 366 dst += blksize; 367 bno += BTOBB(blksize); 368 369 /* roll attribute data forwards */ 370 *valuelen -= byte_cnt; 371 *src += byte_cnt; 372 *offset += byte_cnt; 373 } 374 } 375 376 /* 377 * Read the value associated with an attribute from the out-of-line buffer 378 * that we stored it in. 379 * 380 * Returns 0 on successful retrieval, otherwise an error. 381 */ 382 int 383 xfs_attr_rmtval_get( 384 struct xfs_da_args *args) 385 { 386 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 387 struct xfs_mount *mp = args->dp->i_mount; 388 struct xfs_buf *bp; 389 xfs_dablk_t lblkno = args->rmtblkno; 390 uint8_t *dst = args->value; 391 int valuelen; 392 int nmap; 393 int error; 394 int blkcnt = args->rmtblkcnt; 395 int i; 396 int offset = 0; 397 398 trace_xfs_attr_rmtval_get(args); 399 400 ASSERT(!(args->flags & ATTR_KERNOVAL)); 401 ASSERT(args->rmtvaluelen == args->valuelen); 402 403 valuelen = args->rmtvaluelen; 404 while (valuelen > 0) { 405 nmap = ATTR_RMTVALUE_MAPSIZE; 406 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 407 blkcnt, map, &nmap, 408 XFS_BMAPI_ATTRFORK); 409 if (error) 410 return error; 411 ASSERT(nmap >= 1); 412 413 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 414 xfs_daddr_t dblkno; 415 int dblkcnt; 416 417 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 418 (map[i].br_startblock != HOLESTARTBLOCK)); 419 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 420 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 421 error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 422 0, &bp, &xfs_attr3_rmt_buf_ops); 423 if (error) 424 return error; 425 426 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, 427 &offset, &valuelen, 428 &dst); 429 xfs_buf_relse(bp); 430 if (error) 431 return error; 432 433 /* roll attribute extent map forwards */ 434 lblkno += map[i].br_blockcount; 435 blkcnt -= map[i].br_blockcount; 436 } 437 } 438 ASSERT(valuelen == 0); 439 return 0; 440 } 441 442 /* 443 * Write the value associated with an attribute into the out-of-line buffer 444 * that we have defined for it. 445 */ 446 int 447 xfs_attr_rmtval_set( 448 struct xfs_da_args *args) 449 { 450 struct xfs_inode *dp = args->dp; 451 struct xfs_mount *mp = dp->i_mount; 452 struct xfs_bmbt_irec map; 453 xfs_dablk_t lblkno; 454 xfs_fileoff_t lfileoff = 0; 455 uint8_t *src = args->value; 456 int blkcnt; 457 int valuelen; 458 int nmap; 459 int error; 460 int offset = 0; 461 462 trace_xfs_attr_rmtval_set(args); 463 464 /* 465 * Find a "hole" in the attribute address space large enough for 466 * us to drop the new attribute's value into. Because CRC enable 467 * attributes have headers, we can't just do a straight byte to FSB 468 * conversion and have to take the header space into account. 469 */ 470 blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); 471 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 472 XFS_ATTR_FORK); 473 if (error) 474 return error; 475 476 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 477 args->rmtblkcnt = blkcnt; 478 479 /* 480 * Roll through the "value", allocating blocks on disk as required. 481 */ 482 while (blkcnt > 0) { 483 /* 484 * Allocate a single extent, up to the size of the value. 485 * 486 * Note that we have to consider this a data allocation as we 487 * write the remote attribute without logging the contents. 488 * Hence we must ensure that we aren't using blocks that are on 489 * the busy list so that we don't overwrite blocks which have 490 * recently been freed but their transactions are not yet 491 * committed to disk. If we overwrite the contents of a busy 492 * extent and then crash then the block may not contain the 493 * correct metadata after log recovery occurs. 494 */ 495 nmap = 1; 496 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 497 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 498 &nmap); 499 if (error) 500 return error; 501 error = xfs_defer_finish(&args->trans); 502 if (error) 503 return error; 504 505 ASSERT(nmap == 1); 506 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 507 (map.br_startblock != HOLESTARTBLOCK)); 508 lblkno += map.br_blockcount; 509 blkcnt -= map.br_blockcount; 510 511 /* 512 * Start the next trans in the chain. 513 */ 514 error = xfs_trans_roll_inode(&args->trans, dp); 515 if (error) 516 return error; 517 } 518 519 /* 520 * Roll through the "value", copying the attribute value to the 521 * already-allocated blocks. Blocks are written synchronously 522 * so that we can know they are all on disk before we turn off 523 * the INCOMPLETE flag. 524 */ 525 lblkno = args->rmtblkno; 526 blkcnt = args->rmtblkcnt; 527 valuelen = args->rmtvaluelen; 528 while (valuelen > 0) { 529 struct xfs_buf *bp; 530 xfs_daddr_t dblkno; 531 int dblkcnt; 532 533 ASSERT(blkcnt > 0); 534 535 nmap = 1; 536 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 537 blkcnt, &map, &nmap, 538 XFS_BMAPI_ATTRFORK); 539 if (error) 540 return error; 541 ASSERT(nmap == 1); 542 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 543 (map.br_startblock != HOLESTARTBLOCK)); 544 545 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 546 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 547 548 error = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, &bp); 549 if (error) 550 return error; 551 bp->b_ops = &xfs_attr3_rmt_buf_ops; 552 553 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset, 554 &valuelen, &src); 555 556 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 557 xfs_buf_relse(bp); 558 if (error) 559 return error; 560 561 562 /* roll attribute extent map forwards */ 563 lblkno += map.br_blockcount; 564 blkcnt -= map.br_blockcount; 565 } 566 ASSERT(valuelen == 0); 567 return 0; 568 } 569 570 /* Mark stale any incore buffers for the remote value. */ 571 int 572 xfs_attr_rmtval_stale( 573 struct xfs_inode *ip, 574 struct xfs_bmbt_irec *map, 575 xfs_buf_flags_t incore_flags) 576 { 577 struct xfs_mount *mp = ip->i_mount; 578 struct xfs_buf *bp; 579 580 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 581 582 if (XFS_IS_CORRUPT(mp, map->br_startblock == DELAYSTARTBLOCK) || 583 XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) 584 return -EFSCORRUPTED; 585 586 bp = xfs_buf_incore(mp->m_ddev_targp, 587 XFS_FSB_TO_DADDR(mp, map->br_startblock), 588 XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags); 589 if (bp) { 590 xfs_buf_stale(bp); 591 xfs_buf_relse(bp); 592 } 593 594 return 0; 595 } 596 597 /* 598 * Remove the value associated with an attribute by deleting the 599 * out-of-line buffer that it is stored on. 600 */ 601 int 602 xfs_attr_rmtval_remove( 603 struct xfs_da_args *args) 604 { 605 xfs_dablk_t lblkno; 606 int blkcnt; 607 int error; 608 int done; 609 610 trace_xfs_attr_rmtval_remove(args); 611 612 /* 613 * Roll through the "value", invalidating the attribute value's blocks. 614 */ 615 lblkno = args->rmtblkno; 616 blkcnt = args->rmtblkcnt; 617 while (blkcnt > 0) { 618 struct xfs_bmbt_irec map; 619 int nmap; 620 621 /* 622 * Try to remember where we decided to put the value. 623 */ 624 nmap = 1; 625 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 626 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); 627 if (error) 628 return error; 629 if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1)) 630 return -EFSCORRUPTED; 631 error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK); 632 if (error) 633 return error; 634 635 lblkno += map.br_blockcount; 636 blkcnt -= map.br_blockcount; 637 } 638 639 /* 640 * Keep de-allocating extents until the remote-value region is gone. 641 */ 642 lblkno = args->rmtblkno; 643 blkcnt = args->rmtblkcnt; 644 done = 0; 645 while (!done) { 646 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 647 XFS_BMAPI_ATTRFORK, 1, &done); 648 if (error) 649 return error; 650 error = xfs_defer_finish(&args->trans); 651 if (error) 652 return error; 653 654 /* 655 * Close out trans and start the next one in the chain. 656 */ 657 error = xfs_trans_roll_inode(&args->trans, args->dp); 658 if (error) 659 return error; 660 } 661 return 0; 662 } 663