1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 23 #include <linux/iversion.h> 24 25 /* 26 * If we are doing readahead on an inode buffer, we might be in log recovery 27 * reading an inode allocation buffer that hasn't yet been replayed, and hence 28 * has not had the inode cores stamped into it. Hence for readahead, the buffer 29 * may be potentially invalid. 30 * 31 * If the readahead buffer is invalid, we need to mark it with an error and 32 * clear the DONE status of the buffer so that a followup read will re-read it 33 * from disk. We don't report the error otherwise to avoid warnings during log 34 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 35 * because all we want to do is say readahead failed; there is no-one to report 36 * the error to, so this will distinguish it from a non-ra verifier failure. 37 * Changes to this readahead error behaviour also need to be reflected in 38 * xfs_dquot_buf_readahead_verify(). 39 */ 40 static void 41 xfs_inode_buf_verify( 42 struct xfs_buf *bp, 43 bool readahead) 44 { 45 struct xfs_mount *mp = bp->b_mount; 46 int i; 47 int ni; 48 49 /* 50 * Validate the magic number and version of every inode in the buffer 51 */ 52 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 53 for (i = 0; i < ni; i++) { 54 struct xfs_dinode *dip; 55 xfs_agino_t unlinked_ino; 56 int di_ok; 57 58 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 59 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 60 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 61 xfs_dinode_good_version(mp, dip->di_version) && 62 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 63 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 64 XFS_ERRTAG_ITOBP_INOTOBP))) { 65 if (readahead) { 66 bp->b_flags &= ~XBF_DONE; 67 xfs_buf_ioerror(bp, -EIO); 68 return; 69 } 70 71 #ifdef DEBUG 72 xfs_alert(mp, 73 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 74 (unsigned long long)xfs_buf_daddr(bp), i, 75 be16_to_cpu(dip->di_magic)); 76 #endif 77 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 78 __func__, dip, sizeof(*dip), 79 NULL); 80 return; 81 } 82 } 83 } 84 85 86 static void 87 xfs_inode_buf_read_verify( 88 struct xfs_buf *bp) 89 { 90 xfs_inode_buf_verify(bp, false); 91 } 92 93 static void 94 xfs_inode_buf_readahead_verify( 95 struct xfs_buf *bp) 96 { 97 xfs_inode_buf_verify(bp, true); 98 } 99 100 static void 101 xfs_inode_buf_write_verify( 102 struct xfs_buf *bp) 103 { 104 xfs_inode_buf_verify(bp, false); 105 } 106 107 const struct xfs_buf_ops xfs_inode_buf_ops = { 108 .name = "xfs_inode", 109 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 110 cpu_to_be16(XFS_DINODE_MAGIC) }, 111 .verify_read = xfs_inode_buf_read_verify, 112 .verify_write = xfs_inode_buf_write_verify, 113 }; 114 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 116 .name = "xfs_inode_ra", 117 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 118 cpu_to_be16(XFS_DINODE_MAGIC) }, 119 .verify_read = xfs_inode_buf_readahead_verify, 120 .verify_write = xfs_inode_buf_write_verify, 121 }; 122 123 124 /* 125 * This routine is called to map an inode to the buffer containing the on-disk 126 * version of the inode. It returns a pointer to the buffer containing the 127 * on-disk inode in the bpp parameter. 128 */ 129 int 130 xfs_imap_to_bp( 131 struct xfs_mount *mp, 132 struct xfs_trans *tp, 133 struct xfs_imap *imap, 134 struct xfs_buf **bpp) 135 { 136 int error; 137 138 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 139 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 140 if (xfs_metadata_is_sick(error)) 141 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 142 XFS_SICK_AG_INODES); 143 return error; 144 } 145 146 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 147 { 148 struct timespec64 tv; 149 uint32_t n; 150 151 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 152 tv.tv_nsec = n; 153 154 return tv; 155 } 156 157 /* Convert an ondisk timestamp to an incore timestamp. */ 158 struct timespec64 159 xfs_inode_from_disk_ts( 160 struct xfs_dinode *dip, 161 const xfs_timestamp_t ts) 162 { 163 struct timespec64 tv; 164 struct xfs_legacy_timestamp *lts; 165 166 if (xfs_dinode_has_bigtime(dip)) 167 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 168 169 lts = (struct xfs_legacy_timestamp *)&ts; 170 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 171 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 172 173 return tv; 174 } 175 176 int 177 xfs_inode_from_disk( 178 struct xfs_inode *ip, 179 struct xfs_dinode *from) 180 { 181 struct inode *inode = VFS_I(ip); 182 int error; 183 xfs_failaddr_t fa; 184 185 ASSERT(ip->i_cowfp == NULL); 186 187 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 188 if (fa) { 189 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 190 sizeof(*from), fa); 191 return -EFSCORRUPTED; 192 } 193 194 /* 195 * First get the permanent information that is needed to allocate an 196 * inode. If the inode is unused, mode is zero and we shouldn't mess 197 * with the uninitialized part of it. 198 */ 199 if (!xfs_has_v3inodes(ip->i_mount)) 200 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 201 inode->i_generation = be32_to_cpu(from->di_gen); 202 inode->i_mode = be16_to_cpu(from->di_mode); 203 if (!inode->i_mode) 204 return 0; 205 206 /* 207 * Convert v1 inodes immediately to v2 inode format as this is the 208 * minimum inode version format we support in the rest of the code. 209 * They will also be unconditionally written back to disk as v2 inodes. 210 */ 211 if (unlikely(from->di_version == 1)) { 212 set_nlink(inode, be16_to_cpu(from->di_onlink)); 213 ip->i_projid = 0; 214 } else { 215 set_nlink(inode, be32_to_cpu(from->di_nlink)); 216 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 217 be16_to_cpu(from->di_projid_lo); 218 } 219 220 i_uid_write(inode, be32_to_cpu(from->di_uid)); 221 i_gid_write(inode, be32_to_cpu(from->di_gid)); 222 223 /* 224 * Time is signed, so need to convert to signed 32 bit before 225 * storing in inode timestamp which may be 64 bit. Otherwise 226 * a time before epoch is converted to a time long after epoch 227 * on 64 bit systems. 228 */ 229 inode_set_atime_to_ts(inode, 230 xfs_inode_from_disk_ts(from, from->di_atime)); 231 inode_set_mtime_to_ts(inode, 232 xfs_inode_from_disk_ts(from, from->di_mtime)); 233 inode_set_ctime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_ctime)); 235 236 ip->i_disk_size = be64_to_cpu(from->di_size); 237 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 238 ip->i_extsize = be32_to_cpu(from->di_extsize); 239 ip->i_forkoff = from->di_forkoff; 240 ip->i_diflags = be16_to_cpu(from->di_flags); 241 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 242 243 if (from->di_dmevmask || from->di_dmstate) 244 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 245 246 if (xfs_has_v3inodes(ip->i_mount)) { 247 inode_set_iversion_queried(inode, 248 be64_to_cpu(from->di_changecount)); 249 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 250 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 251 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 252 } 253 254 error = xfs_iformat_data_fork(ip, from); 255 if (error) 256 return error; 257 if (from->di_forkoff) { 258 error = xfs_iformat_attr_fork(ip, from); 259 if (error) 260 goto out_destroy_data_fork; 261 } 262 if (xfs_is_reflink_inode(ip)) 263 xfs_ifork_init_cow(ip); 264 return 0; 265 266 out_destroy_data_fork: 267 xfs_idestroy_fork(&ip->i_df); 268 return error; 269 } 270 271 /* Convert an incore timestamp to an ondisk timestamp. */ 272 static inline xfs_timestamp_t 273 xfs_inode_to_disk_ts( 274 struct xfs_inode *ip, 275 const struct timespec64 tv) 276 { 277 struct xfs_legacy_timestamp *lts; 278 xfs_timestamp_t ts; 279 280 if (xfs_inode_has_bigtime(ip)) 281 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 282 283 lts = (struct xfs_legacy_timestamp *)&ts; 284 lts->t_sec = cpu_to_be32(tv.tv_sec); 285 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 286 287 return ts; 288 } 289 290 static inline void 291 xfs_inode_to_disk_iext_counters( 292 struct xfs_inode *ip, 293 struct xfs_dinode *to) 294 { 295 if (xfs_inode_has_large_extent_counts(ip)) { 296 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 297 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 298 /* 299 * We might be upgrading the inode to use larger extent counters 300 * than was previously used. Hence zero the unused field. 301 */ 302 to->di_nrext64_pad = cpu_to_be16(0); 303 } else { 304 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 305 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 306 } 307 } 308 309 void 310 xfs_inode_to_disk( 311 struct xfs_inode *ip, 312 struct xfs_dinode *to, 313 xfs_lsn_t lsn) 314 { 315 struct inode *inode = VFS_I(ip); 316 317 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 318 to->di_onlink = 0; 319 320 to->di_format = xfs_ifork_format(&ip->i_df); 321 to->di_uid = cpu_to_be32(i_uid_read(inode)); 322 to->di_gid = cpu_to_be32(i_gid_read(inode)); 323 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 324 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 325 326 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 327 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 328 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 329 to->di_nlink = cpu_to_be32(inode->i_nlink); 330 to->di_gen = cpu_to_be32(inode->i_generation); 331 to->di_mode = cpu_to_be16(inode->i_mode); 332 333 to->di_size = cpu_to_be64(ip->i_disk_size); 334 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 335 to->di_extsize = cpu_to_be32(ip->i_extsize); 336 to->di_forkoff = ip->i_forkoff; 337 to->di_aformat = xfs_ifork_format(&ip->i_af); 338 to->di_flags = cpu_to_be16(ip->i_diflags); 339 340 if (xfs_has_v3inodes(ip->i_mount)) { 341 to->di_version = 3; 342 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 343 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 344 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 345 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 346 to->di_ino = cpu_to_be64(ip->i_ino); 347 to->di_lsn = cpu_to_be64(lsn); 348 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 349 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 350 to->di_v3_pad = 0; 351 } else { 352 to->di_version = 2; 353 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 354 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 355 } 356 357 xfs_inode_to_disk_iext_counters(ip, to); 358 } 359 360 static xfs_failaddr_t 361 xfs_dinode_verify_fork( 362 struct xfs_dinode *dip, 363 struct xfs_mount *mp, 364 int whichfork) 365 { 366 xfs_extnum_t di_nextents; 367 xfs_extnum_t max_extents; 368 mode_t mode = be16_to_cpu(dip->di_mode); 369 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 370 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 371 372 di_nextents = xfs_dfork_nextents(dip, whichfork); 373 374 /* 375 * For fork types that can contain local data, check that the fork 376 * format matches the size of local data contained within the fork. 377 */ 378 if (whichfork == XFS_DATA_FORK) { 379 /* 380 * A directory small enough to fit in the inode must be stored 381 * in local format. The directory sf <-> extents conversion 382 * code updates the directory size accordingly. 383 */ 384 if (S_ISDIR(mode)) { 385 if (be64_to_cpu(dip->di_size) <= fork_size && 386 fork_format != XFS_DINODE_FMT_LOCAL) 387 return __this_address; 388 } 389 390 /* 391 * A symlink with a target small enough to fit in the inode can 392 * be stored in extents format if xattrs were added (thus 393 * converting the data fork from shortform to remote format) 394 * and then removed. 395 */ 396 if (S_ISLNK(mode)) { 397 if (be64_to_cpu(dip->di_size) <= fork_size && 398 fork_format != XFS_DINODE_FMT_EXTENTS && 399 fork_format != XFS_DINODE_FMT_LOCAL) 400 return __this_address; 401 } 402 403 /* 404 * For all types, check that when the size says the fork should 405 * be in extent or btree format, the inode isn't claiming to be 406 * in local format. 407 */ 408 if (be64_to_cpu(dip->di_size) > fork_size && 409 fork_format == XFS_DINODE_FMT_LOCAL) 410 return __this_address; 411 } 412 413 switch (fork_format) { 414 case XFS_DINODE_FMT_LOCAL: 415 /* 416 * No local regular files yet. 417 */ 418 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 419 return __this_address; 420 if (di_nextents) 421 return __this_address; 422 break; 423 case XFS_DINODE_FMT_EXTENTS: 424 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 425 return __this_address; 426 break; 427 case XFS_DINODE_FMT_BTREE: 428 max_extents = xfs_iext_max_nextents( 429 xfs_dinode_has_large_extent_counts(dip), 430 whichfork); 431 if (di_nextents > max_extents) 432 return __this_address; 433 break; 434 default: 435 return __this_address; 436 } 437 return NULL; 438 } 439 440 static xfs_failaddr_t 441 xfs_dinode_verify_forkoff( 442 struct xfs_dinode *dip, 443 struct xfs_mount *mp) 444 { 445 if (!dip->di_forkoff) 446 return NULL; 447 448 switch (dip->di_format) { 449 case XFS_DINODE_FMT_DEV: 450 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 451 return __this_address; 452 break; 453 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 454 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 455 case XFS_DINODE_FMT_BTREE: 456 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 457 return __this_address; 458 break; 459 default: 460 return __this_address; 461 } 462 return NULL; 463 } 464 465 static xfs_failaddr_t 466 xfs_dinode_verify_nrext64( 467 struct xfs_mount *mp, 468 struct xfs_dinode *dip) 469 { 470 if (xfs_dinode_has_large_extent_counts(dip)) { 471 if (!xfs_has_large_extent_counts(mp)) 472 return __this_address; 473 if (dip->di_nrext64_pad != 0) 474 return __this_address; 475 } else if (dip->di_version >= 3) { 476 if (dip->di_v3_pad != 0) 477 return __this_address; 478 } 479 480 return NULL; 481 } 482 483 xfs_failaddr_t 484 xfs_dinode_verify( 485 struct xfs_mount *mp, 486 xfs_ino_t ino, 487 struct xfs_dinode *dip) 488 { 489 xfs_failaddr_t fa; 490 uint16_t mode; 491 uint16_t flags; 492 uint64_t flags2; 493 uint64_t di_size; 494 xfs_extnum_t nextents; 495 xfs_extnum_t naextents; 496 xfs_filblks_t nblocks; 497 498 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 499 return __this_address; 500 501 /* Verify v3 integrity information first */ 502 if (dip->di_version >= 3) { 503 if (!xfs_has_v3inodes(mp)) 504 return __this_address; 505 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 506 XFS_DINODE_CRC_OFF)) 507 return __this_address; 508 if (be64_to_cpu(dip->di_ino) != ino) 509 return __this_address; 510 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 511 return __this_address; 512 } 513 514 if (dip->di_version > 1) { 515 if (dip->di_onlink) 516 return __this_address; 517 } else { 518 if (dip->di_nlink) 519 return __this_address; 520 } 521 522 /* don't allow invalid i_size */ 523 di_size = be64_to_cpu(dip->di_size); 524 if (di_size & (1ULL << 63)) 525 return __this_address; 526 527 mode = be16_to_cpu(dip->di_mode); 528 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 529 return __this_address; 530 531 /* No zero-length symlinks/dirs. */ 532 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) 533 return __this_address; 534 535 fa = xfs_dinode_verify_nrext64(mp, dip); 536 if (fa) 537 return fa; 538 539 nextents = xfs_dfork_data_extents(dip); 540 naextents = xfs_dfork_attr_extents(dip); 541 nblocks = be64_to_cpu(dip->di_nblocks); 542 543 /* Fork checks carried over from xfs_iformat_fork */ 544 if (mode && nextents + naextents > nblocks) 545 return __this_address; 546 547 if (nextents + naextents == 0 && nblocks != 0) 548 return __this_address; 549 550 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 551 return __this_address; 552 553 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 554 return __this_address; 555 556 flags = be16_to_cpu(dip->di_flags); 557 558 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 559 return __this_address; 560 561 /* check for illegal values of forkoff */ 562 fa = xfs_dinode_verify_forkoff(dip, mp); 563 if (fa) 564 return fa; 565 566 /* Do we have appropriate data fork formats for the mode? */ 567 switch (mode & S_IFMT) { 568 case S_IFIFO: 569 case S_IFCHR: 570 case S_IFBLK: 571 case S_IFSOCK: 572 if (dip->di_format != XFS_DINODE_FMT_DEV) 573 return __this_address; 574 break; 575 case S_IFREG: 576 case S_IFLNK: 577 case S_IFDIR: 578 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 579 if (fa) 580 return fa; 581 break; 582 case 0: 583 /* Uninitialized inode ok. */ 584 break; 585 default: 586 return __this_address; 587 } 588 589 if (dip->di_forkoff) { 590 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 591 if (fa) 592 return fa; 593 } else { 594 /* 595 * If there is no fork offset, this may be a freshly-made inode 596 * in a new disk cluster, in which case di_aformat is zeroed. 597 * Otherwise, such an inode must be in EXTENTS format; this goes 598 * for freed inodes as well. 599 */ 600 switch (dip->di_aformat) { 601 case 0: 602 case XFS_DINODE_FMT_EXTENTS: 603 break; 604 default: 605 return __this_address; 606 } 607 if (naextents) 608 return __this_address; 609 } 610 611 /* extent size hint validation */ 612 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 613 mode, flags); 614 if (fa) 615 return fa; 616 617 /* only version 3 or greater inodes are extensively verified here */ 618 if (dip->di_version < 3) 619 return NULL; 620 621 flags2 = be64_to_cpu(dip->di_flags2); 622 623 /* don't allow reflink/cowextsize if we don't have reflink */ 624 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 625 !xfs_has_reflink(mp)) 626 return __this_address; 627 628 /* only regular files get reflink */ 629 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 630 return __this_address; 631 632 /* don't let reflink and realtime mix */ 633 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 634 return __this_address; 635 636 /* COW extent size hint validation */ 637 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 638 mode, flags, flags2); 639 if (fa) 640 return fa; 641 642 /* bigtime iflag can only happen on bigtime filesystems */ 643 if (xfs_dinode_has_bigtime(dip) && 644 !xfs_has_bigtime(mp)) 645 return __this_address; 646 647 return NULL; 648 } 649 650 void 651 xfs_dinode_calc_crc( 652 struct xfs_mount *mp, 653 struct xfs_dinode *dip) 654 { 655 uint32_t crc; 656 657 if (dip->di_version < 3) 658 return; 659 660 ASSERT(xfs_has_crc(mp)); 661 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 662 XFS_DINODE_CRC_OFF); 663 dip->di_crc = xfs_end_cksum(crc); 664 } 665 666 /* 667 * Validate di_extsize hint. 668 * 669 * 1. Extent size hint is only valid for directories and regular files. 670 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 671 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 672 * 4. Hint cannot be larger than MAXTEXTLEN. 673 * 5. Can be changed on directories at any time. 674 * 6. Hint value of 0 turns off hints, clears inode flags. 675 * 7. Extent size must be a multiple of the appropriate block size. 676 * For realtime files, this is the rt extent size. 677 * 8. For non-realtime files, the extent size hint must be limited 678 * to half the AG size to avoid alignment extending the extent beyond the 679 * limits of the AG. 680 */ 681 xfs_failaddr_t 682 xfs_inode_validate_extsize( 683 struct xfs_mount *mp, 684 uint32_t extsize, 685 uint16_t mode, 686 uint16_t flags) 687 { 688 bool rt_flag; 689 bool hint_flag; 690 bool inherit_flag; 691 uint32_t extsize_bytes; 692 uint32_t blocksize_bytes; 693 694 rt_flag = (flags & XFS_DIFLAG_REALTIME); 695 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 696 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 697 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 698 699 /* 700 * This comment describes a historic gap in this verifier function. 701 * 702 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 703 * function has never checked that the extent size hint is an integer 704 * multiple of the realtime extent size. Since we allow users to set 705 * this combination on non-rt filesystems /and/ to change the rt 706 * extent size when adding a rt device to a filesystem, the net effect 707 * is that users can configure a filesystem anticipating one rt 708 * geometry and change their minds later. Directories do not use the 709 * extent size hint, so this is harmless for them. 710 * 711 * If a directory with a misaligned extent size hint is allowed to 712 * propagate that hint into a new regular realtime file, the result 713 * is that the inode cluster buffer verifier will trigger a corruption 714 * shutdown the next time it is run, because the verifier has always 715 * enforced the alignment rule for regular files. 716 * 717 * Because we allow administrators to set a new rt extent size when 718 * adding a rt section, we cannot add a check to this verifier because 719 * that will result a new source of directory corruption errors when 720 * reading an existing filesystem. Instead, we rely on callers to 721 * decide when alignment checks are appropriate, and fix things up as 722 * needed. 723 */ 724 725 if (rt_flag) 726 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 727 else 728 blocksize_bytes = mp->m_sb.sb_blocksize; 729 730 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 731 return __this_address; 732 733 if (hint_flag && !S_ISREG(mode)) 734 return __this_address; 735 736 if (inherit_flag && !S_ISDIR(mode)) 737 return __this_address; 738 739 if ((hint_flag || inherit_flag) && extsize == 0) 740 return __this_address; 741 742 /* free inodes get flags set to zero but extsize remains */ 743 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 744 return __this_address; 745 746 if (extsize_bytes % blocksize_bytes) 747 return __this_address; 748 749 if (extsize > XFS_MAX_BMBT_EXTLEN) 750 return __this_address; 751 752 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 753 return __this_address; 754 755 return NULL; 756 } 757 758 /* 759 * Validate di_cowextsize hint. 760 * 761 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 762 * The inode does not have to have any shared blocks, but it must be a v3. 763 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 764 * for a directory, the hint is propagated to new files. 765 * 3. Can be changed on files & directories at any time. 766 * 4. Hint value of 0 turns off hints, clears inode flags. 767 * 5. Extent size must be a multiple of the appropriate block size. 768 * 6. The extent size hint must be limited to half the AG size to avoid 769 * alignment extending the extent beyond the limits of the AG. 770 */ 771 xfs_failaddr_t 772 xfs_inode_validate_cowextsize( 773 struct xfs_mount *mp, 774 uint32_t cowextsize, 775 uint16_t mode, 776 uint16_t flags, 777 uint64_t flags2) 778 { 779 bool rt_flag; 780 bool hint_flag; 781 uint32_t cowextsize_bytes; 782 783 rt_flag = (flags & XFS_DIFLAG_REALTIME); 784 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 785 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 786 787 if (hint_flag && !xfs_has_reflink(mp)) 788 return __this_address; 789 790 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 791 return __this_address; 792 793 if (hint_flag && cowextsize == 0) 794 return __this_address; 795 796 /* free inodes get flags set to zero but cowextsize remains */ 797 if (mode && !hint_flag && cowextsize != 0) 798 return __this_address; 799 800 if (hint_flag && rt_flag) 801 return __this_address; 802 803 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 804 return __this_address; 805 806 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 807 return __this_address; 808 809 if (cowextsize > mp->m_sb.sb_agblocks / 2) 810 return __this_address; 811 812 return NULL; 813 } 814