1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 23 #include <linux/iversion.h> 24 25 /* 26 * If we are doing readahead on an inode buffer, we might be in log recovery 27 * reading an inode allocation buffer that hasn't yet been replayed, and hence 28 * has not had the inode cores stamped into it. Hence for readahead, the buffer 29 * may be potentially invalid. 30 * 31 * If the readahead buffer is invalid, we need to mark it with an error and 32 * clear the DONE status of the buffer so that a followup read will re-read it 33 * from disk. We don't report the error otherwise to avoid warnings during log 34 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 35 * because all we want to do is say readahead failed; there is no-one to report 36 * the error to, so this will distinguish it from a non-ra verifier failure. 37 * Changes to this readahead error behaviour also need to be reflected in 38 * xfs_dquot_buf_readahead_verify(). 39 */ 40 static void 41 xfs_inode_buf_verify( 42 struct xfs_buf *bp, 43 bool readahead) 44 { 45 struct xfs_mount *mp = bp->b_mount; 46 int i; 47 int ni; 48 49 /* 50 * Validate the magic number and version of every inode in the buffer 51 */ 52 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 53 for (i = 0; i < ni; i++) { 54 struct xfs_dinode *dip; 55 xfs_agino_t unlinked_ino; 56 int di_ok; 57 58 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 59 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 60 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 61 xfs_dinode_good_version(mp, dip->di_version) && 62 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 63 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 64 XFS_ERRTAG_ITOBP_INOTOBP))) { 65 if (readahead) { 66 bp->b_flags &= ~XBF_DONE; 67 xfs_buf_ioerror(bp, -EIO); 68 return; 69 } 70 71 #ifdef DEBUG 72 xfs_alert(mp, 73 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 74 (unsigned long long)xfs_buf_daddr(bp), i, 75 be16_to_cpu(dip->di_magic)); 76 #endif 77 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 78 __func__, dip, sizeof(*dip), 79 NULL); 80 return; 81 } 82 } 83 } 84 85 86 static void 87 xfs_inode_buf_read_verify( 88 struct xfs_buf *bp) 89 { 90 xfs_inode_buf_verify(bp, false); 91 } 92 93 static void 94 xfs_inode_buf_readahead_verify( 95 struct xfs_buf *bp) 96 { 97 xfs_inode_buf_verify(bp, true); 98 } 99 100 static void 101 xfs_inode_buf_write_verify( 102 struct xfs_buf *bp) 103 { 104 xfs_inode_buf_verify(bp, false); 105 } 106 107 const struct xfs_buf_ops xfs_inode_buf_ops = { 108 .name = "xfs_inode", 109 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 110 cpu_to_be16(XFS_DINODE_MAGIC) }, 111 .verify_read = xfs_inode_buf_read_verify, 112 .verify_write = xfs_inode_buf_write_verify, 113 }; 114 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 116 .name = "xfs_inode_ra", 117 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 118 cpu_to_be16(XFS_DINODE_MAGIC) }, 119 .verify_read = xfs_inode_buf_readahead_verify, 120 .verify_write = xfs_inode_buf_write_verify, 121 }; 122 123 124 /* 125 * This routine is called to map an inode to the buffer containing the on-disk 126 * version of the inode. It returns a pointer to the buffer containing the 127 * on-disk inode in the bpp parameter. 128 */ 129 int 130 xfs_imap_to_bp( 131 struct xfs_mount *mp, 132 struct xfs_trans *tp, 133 struct xfs_imap *imap, 134 struct xfs_buf **bpp) 135 { 136 int error; 137 138 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 139 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 140 if (xfs_metadata_is_sick(error)) 141 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 142 XFS_SICK_AG_INODES); 143 return error; 144 } 145 146 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 147 { 148 struct timespec64 tv; 149 uint32_t n; 150 151 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 152 tv.tv_nsec = n; 153 154 return tv; 155 } 156 157 /* Convert an ondisk timestamp to an incore timestamp. */ 158 struct timespec64 159 xfs_inode_from_disk_ts( 160 struct xfs_dinode *dip, 161 const xfs_timestamp_t ts) 162 { 163 struct timespec64 tv; 164 struct xfs_legacy_timestamp *lts; 165 166 if (xfs_dinode_has_bigtime(dip)) 167 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 168 169 lts = (struct xfs_legacy_timestamp *)&ts; 170 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 171 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 172 173 return tv; 174 } 175 176 int 177 xfs_inode_from_disk( 178 struct xfs_inode *ip, 179 struct xfs_dinode *from) 180 { 181 struct inode *inode = VFS_I(ip); 182 int error; 183 xfs_failaddr_t fa; 184 185 ASSERT(ip->i_cowfp == NULL); 186 187 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 188 if (fa) { 189 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 190 sizeof(*from), fa); 191 return -EFSCORRUPTED; 192 } 193 194 /* 195 * First get the permanent information that is needed to allocate an 196 * inode. If the inode is unused, mode is zero and we shouldn't mess 197 * with the uninitialized part of it. 198 */ 199 if (!xfs_has_v3inodes(ip->i_mount)) 200 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 201 inode->i_generation = be32_to_cpu(from->di_gen); 202 inode->i_mode = be16_to_cpu(from->di_mode); 203 if (!inode->i_mode) 204 return 0; 205 206 /* 207 * Convert v1 inodes immediately to v2 inode format as this is the 208 * minimum inode version format we support in the rest of the code. 209 * They will also be unconditionally written back to disk as v2 inodes. 210 */ 211 if (unlikely(from->di_version == 1)) { 212 set_nlink(inode, be16_to_cpu(from->di_onlink)); 213 ip->i_projid = 0; 214 } else { 215 set_nlink(inode, be32_to_cpu(from->di_nlink)); 216 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 217 be16_to_cpu(from->di_projid_lo); 218 } 219 220 i_uid_write(inode, be32_to_cpu(from->di_uid)); 221 i_gid_write(inode, be32_to_cpu(from->di_gid)); 222 223 /* 224 * Time is signed, so need to convert to signed 32 bit before 225 * storing in inode timestamp which may be 64 bit. Otherwise 226 * a time before epoch is converted to a time long after epoch 227 * on 64 bit systems. 228 */ 229 inode_set_atime_to_ts(inode, 230 xfs_inode_from_disk_ts(from, from->di_atime)); 231 inode_set_mtime_to_ts(inode, 232 xfs_inode_from_disk_ts(from, from->di_mtime)); 233 inode_set_ctime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_ctime)); 235 236 ip->i_disk_size = be64_to_cpu(from->di_size); 237 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 238 ip->i_extsize = be32_to_cpu(from->di_extsize); 239 ip->i_forkoff = from->di_forkoff; 240 ip->i_diflags = be16_to_cpu(from->di_flags); 241 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 242 243 if (from->di_dmevmask || from->di_dmstate) 244 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 245 246 if (xfs_has_v3inodes(ip->i_mount)) { 247 inode_set_iversion_queried(inode, 248 be64_to_cpu(from->di_changecount)); 249 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 250 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 251 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 252 } 253 254 error = xfs_iformat_data_fork(ip, from); 255 if (error) 256 return error; 257 if (from->di_forkoff) { 258 error = xfs_iformat_attr_fork(ip, from); 259 if (error) 260 goto out_destroy_data_fork; 261 } 262 if (xfs_is_reflink_inode(ip)) 263 xfs_ifork_init_cow(ip); 264 return 0; 265 266 out_destroy_data_fork: 267 xfs_idestroy_fork(&ip->i_df); 268 return error; 269 } 270 271 /* Convert an incore timestamp to an ondisk timestamp. */ 272 static inline xfs_timestamp_t 273 xfs_inode_to_disk_ts( 274 struct xfs_inode *ip, 275 const struct timespec64 tv) 276 { 277 struct xfs_legacy_timestamp *lts; 278 xfs_timestamp_t ts; 279 280 if (xfs_inode_has_bigtime(ip)) 281 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 282 283 lts = (struct xfs_legacy_timestamp *)&ts; 284 lts->t_sec = cpu_to_be32(tv.tv_sec); 285 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 286 287 return ts; 288 } 289 290 static inline void 291 xfs_inode_to_disk_iext_counters( 292 struct xfs_inode *ip, 293 struct xfs_dinode *to) 294 { 295 if (xfs_inode_has_large_extent_counts(ip)) { 296 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 297 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 298 /* 299 * We might be upgrading the inode to use larger extent counters 300 * than was previously used. Hence zero the unused field. 301 */ 302 to->di_nrext64_pad = cpu_to_be16(0); 303 } else { 304 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 305 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 306 } 307 } 308 309 void 310 xfs_inode_to_disk( 311 struct xfs_inode *ip, 312 struct xfs_dinode *to, 313 xfs_lsn_t lsn) 314 { 315 struct inode *inode = VFS_I(ip); 316 317 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 318 to->di_onlink = 0; 319 320 to->di_format = xfs_ifork_format(&ip->i_df); 321 to->di_uid = cpu_to_be32(i_uid_read(inode)); 322 to->di_gid = cpu_to_be32(i_gid_read(inode)); 323 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 324 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 325 326 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 327 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 328 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 329 to->di_nlink = cpu_to_be32(inode->i_nlink); 330 to->di_gen = cpu_to_be32(inode->i_generation); 331 to->di_mode = cpu_to_be16(inode->i_mode); 332 333 to->di_size = cpu_to_be64(ip->i_disk_size); 334 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 335 to->di_extsize = cpu_to_be32(ip->i_extsize); 336 to->di_forkoff = ip->i_forkoff; 337 to->di_aformat = xfs_ifork_format(&ip->i_af); 338 to->di_flags = cpu_to_be16(ip->i_diflags); 339 340 if (xfs_has_v3inodes(ip->i_mount)) { 341 to->di_version = 3; 342 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 343 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 344 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 345 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 346 to->di_ino = cpu_to_be64(ip->i_ino); 347 to->di_lsn = cpu_to_be64(lsn); 348 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 349 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 350 to->di_v3_pad = 0; 351 } else { 352 to->di_version = 2; 353 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 354 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 355 } 356 357 xfs_inode_to_disk_iext_counters(ip, to); 358 } 359 360 static xfs_failaddr_t 361 xfs_dinode_verify_fork( 362 struct xfs_dinode *dip, 363 struct xfs_mount *mp, 364 int whichfork) 365 { 366 xfs_extnum_t di_nextents; 367 xfs_extnum_t max_extents; 368 mode_t mode = be16_to_cpu(dip->di_mode); 369 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 370 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 371 372 di_nextents = xfs_dfork_nextents(dip, whichfork); 373 374 /* 375 * For fork types that can contain local data, check that the fork 376 * format matches the size of local data contained within the fork. 377 */ 378 if (whichfork == XFS_DATA_FORK) { 379 /* 380 * A directory small enough to fit in the inode must be stored 381 * in local format. The directory sf <-> extents conversion 382 * code updates the directory size accordingly. Directories 383 * being truncated have zero size and are not subject to this 384 * check. 385 */ 386 if (S_ISDIR(mode)) { 387 if (dip->di_size && 388 be64_to_cpu(dip->di_size) <= fork_size && 389 fork_format != XFS_DINODE_FMT_LOCAL) 390 return __this_address; 391 } 392 393 /* 394 * A symlink with a target small enough to fit in the inode can 395 * be stored in extents format if xattrs were added (thus 396 * converting the data fork from shortform to remote format) 397 * and then removed. 398 */ 399 if (S_ISLNK(mode)) { 400 if (be64_to_cpu(dip->di_size) <= fork_size && 401 fork_format != XFS_DINODE_FMT_EXTENTS && 402 fork_format != XFS_DINODE_FMT_LOCAL) 403 return __this_address; 404 } 405 406 /* 407 * For all types, check that when the size says the fork should 408 * be in extent or btree format, the inode isn't claiming to be 409 * in local format. 410 */ 411 if (be64_to_cpu(dip->di_size) > fork_size && 412 fork_format == XFS_DINODE_FMT_LOCAL) 413 return __this_address; 414 } 415 416 switch (fork_format) { 417 case XFS_DINODE_FMT_LOCAL: 418 /* 419 * No local regular files yet. 420 */ 421 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 422 return __this_address; 423 if (di_nextents) 424 return __this_address; 425 break; 426 case XFS_DINODE_FMT_EXTENTS: 427 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 428 return __this_address; 429 break; 430 case XFS_DINODE_FMT_BTREE: 431 max_extents = xfs_iext_max_nextents( 432 xfs_dinode_has_large_extent_counts(dip), 433 whichfork); 434 if (di_nextents > max_extents) 435 return __this_address; 436 break; 437 default: 438 return __this_address; 439 } 440 return NULL; 441 } 442 443 static xfs_failaddr_t 444 xfs_dinode_verify_forkoff( 445 struct xfs_dinode *dip, 446 struct xfs_mount *mp) 447 { 448 if (!dip->di_forkoff) 449 return NULL; 450 451 switch (dip->di_format) { 452 case XFS_DINODE_FMT_DEV: 453 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 454 return __this_address; 455 break; 456 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 457 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 458 case XFS_DINODE_FMT_BTREE: 459 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 460 return __this_address; 461 break; 462 default: 463 return __this_address; 464 } 465 return NULL; 466 } 467 468 static xfs_failaddr_t 469 xfs_dinode_verify_nrext64( 470 struct xfs_mount *mp, 471 struct xfs_dinode *dip) 472 { 473 if (xfs_dinode_has_large_extent_counts(dip)) { 474 if (!xfs_has_large_extent_counts(mp)) 475 return __this_address; 476 if (dip->di_nrext64_pad != 0) 477 return __this_address; 478 } else if (dip->di_version >= 3) { 479 if (dip->di_v3_pad != 0) 480 return __this_address; 481 } 482 483 return NULL; 484 } 485 486 xfs_failaddr_t 487 xfs_dinode_verify( 488 struct xfs_mount *mp, 489 xfs_ino_t ino, 490 struct xfs_dinode *dip) 491 { 492 xfs_failaddr_t fa; 493 uint16_t mode; 494 uint16_t flags; 495 uint64_t flags2; 496 uint64_t di_size; 497 xfs_extnum_t nextents; 498 xfs_extnum_t naextents; 499 xfs_filblks_t nblocks; 500 501 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 502 return __this_address; 503 504 /* Verify v3 integrity information first */ 505 if (dip->di_version >= 3) { 506 if (!xfs_has_v3inodes(mp)) 507 return __this_address; 508 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 509 XFS_DINODE_CRC_OFF)) 510 return __this_address; 511 if (be64_to_cpu(dip->di_ino) != ino) 512 return __this_address; 513 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 514 return __this_address; 515 } 516 517 if (dip->di_version > 1) { 518 if (dip->di_onlink) 519 return __this_address; 520 } else { 521 if (dip->di_nlink) 522 return __this_address; 523 } 524 525 /* don't allow invalid i_size */ 526 di_size = be64_to_cpu(dip->di_size); 527 if (di_size & (1ULL << 63)) 528 return __this_address; 529 530 mode = be16_to_cpu(dip->di_mode); 531 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 532 return __this_address; 533 534 /* 535 * No zero-length symlinks/dirs unless they're unlinked and hence being 536 * inactivated. 537 */ 538 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { 539 if (dip->di_version > 1) { 540 if (dip->di_nlink) 541 return __this_address; 542 } else { 543 if (dip->di_onlink) 544 return __this_address; 545 } 546 } 547 548 fa = xfs_dinode_verify_nrext64(mp, dip); 549 if (fa) 550 return fa; 551 552 nextents = xfs_dfork_data_extents(dip); 553 naextents = xfs_dfork_attr_extents(dip); 554 nblocks = be64_to_cpu(dip->di_nblocks); 555 556 /* Fork checks carried over from xfs_iformat_fork */ 557 if (mode && nextents + naextents > nblocks) 558 return __this_address; 559 560 if (nextents + naextents == 0 && nblocks != 0) 561 return __this_address; 562 563 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 564 return __this_address; 565 566 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 567 return __this_address; 568 569 flags = be16_to_cpu(dip->di_flags); 570 571 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 572 return __this_address; 573 574 /* check for illegal values of forkoff */ 575 fa = xfs_dinode_verify_forkoff(dip, mp); 576 if (fa) 577 return fa; 578 579 /* Do we have appropriate data fork formats for the mode? */ 580 switch (mode & S_IFMT) { 581 case S_IFIFO: 582 case S_IFCHR: 583 case S_IFBLK: 584 case S_IFSOCK: 585 if (dip->di_format != XFS_DINODE_FMT_DEV) 586 return __this_address; 587 break; 588 case S_IFREG: 589 case S_IFLNK: 590 case S_IFDIR: 591 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 592 if (fa) 593 return fa; 594 break; 595 case 0: 596 /* Uninitialized inode ok. */ 597 break; 598 default: 599 return __this_address; 600 } 601 602 if (dip->di_forkoff) { 603 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 604 if (fa) 605 return fa; 606 } else { 607 /* 608 * If there is no fork offset, this may be a freshly-made inode 609 * in a new disk cluster, in which case di_aformat is zeroed. 610 * Otherwise, such an inode must be in EXTENTS format; this goes 611 * for freed inodes as well. 612 */ 613 switch (dip->di_aformat) { 614 case 0: 615 case XFS_DINODE_FMT_EXTENTS: 616 break; 617 default: 618 return __this_address; 619 } 620 if (naextents) 621 return __this_address; 622 } 623 624 /* extent size hint validation */ 625 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 626 mode, flags); 627 if (fa) 628 return fa; 629 630 /* only version 3 or greater inodes are extensively verified here */ 631 if (dip->di_version < 3) 632 return NULL; 633 634 flags2 = be64_to_cpu(dip->di_flags2); 635 636 /* don't allow reflink/cowextsize if we don't have reflink */ 637 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 638 !xfs_has_reflink(mp)) 639 return __this_address; 640 641 /* only regular files get reflink */ 642 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 643 return __this_address; 644 645 /* don't let reflink and realtime mix */ 646 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 647 return __this_address; 648 649 /* COW extent size hint validation */ 650 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 651 mode, flags, flags2); 652 if (fa) 653 return fa; 654 655 /* bigtime iflag can only happen on bigtime filesystems */ 656 if (xfs_dinode_has_bigtime(dip) && 657 !xfs_has_bigtime(mp)) 658 return __this_address; 659 660 return NULL; 661 } 662 663 void 664 xfs_dinode_calc_crc( 665 struct xfs_mount *mp, 666 struct xfs_dinode *dip) 667 { 668 uint32_t crc; 669 670 if (dip->di_version < 3) 671 return; 672 673 ASSERT(xfs_has_crc(mp)); 674 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 675 XFS_DINODE_CRC_OFF); 676 dip->di_crc = xfs_end_cksum(crc); 677 } 678 679 /* 680 * Validate di_extsize hint. 681 * 682 * 1. Extent size hint is only valid for directories and regular files. 683 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 684 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 685 * 4. Hint cannot be larger than MAXTEXTLEN. 686 * 5. Can be changed on directories at any time. 687 * 6. Hint value of 0 turns off hints, clears inode flags. 688 * 7. Extent size must be a multiple of the appropriate block size. 689 * For realtime files, this is the rt extent size. 690 * 8. For non-realtime files, the extent size hint must be limited 691 * to half the AG size to avoid alignment extending the extent beyond the 692 * limits of the AG. 693 */ 694 xfs_failaddr_t 695 xfs_inode_validate_extsize( 696 struct xfs_mount *mp, 697 uint32_t extsize, 698 uint16_t mode, 699 uint16_t flags) 700 { 701 bool rt_flag; 702 bool hint_flag; 703 bool inherit_flag; 704 uint32_t extsize_bytes; 705 uint32_t blocksize_bytes; 706 707 rt_flag = (flags & XFS_DIFLAG_REALTIME); 708 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 709 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 710 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 711 712 /* 713 * This comment describes a historic gap in this verifier function. 714 * 715 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 716 * function has never checked that the extent size hint is an integer 717 * multiple of the realtime extent size. Since we allow users to set 718 * this combination on non-rt filesystems /and/ to change the rt 719 * extent size when adding a rt device to a filesystem, the net effect 720 * is that users can configure a filesystem anticipating one rt 721 * geometry and change their minds later. Directories do not use the 722 * extent size hint, so this is harmless for them. 723 * 724 * If a directory with a misaligned extent size hint is allowed to 725 * propagate that hint into a new regular realtime file, the result 726 * is that the inode cluster buffer verifier will trigger a corruption 727 * shutdown the next time it is run, because the verifier has always 728 * enforced the alignment rule for regular files. 729 * 730 * Because we allow administrators to set a new rt extent size when 731 * adding a rt section, we cannot add a check to this verifier because 732 * that will result a new source of directory corruption errors when 733 * reading an existing filesystem. Instead, we rely on callers to 734 * decide when alignment checks are appropriate, and fix things up as 735 * needed. 736 */ 737 738 if (rt_flag) 739 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 740 else 741 blocksize_bytes = mp->m_sb.sb_blocksize; 742 743 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 744 return __this_address; 745 746 if (hint_flag && !S_ISREG(mode)) 747 return __this_address; 748 749 if (inherit_flag && !S_ISDIR(mode)) 750 return __this_address; 751 752 if ((hint_flag || inherit_flag) && extsize == 0) 753 return __this_address; 754 755 /* free inodes get flags set to zero but extsize remains */ 756 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 757 return __this_address; 758 759 if (extsize_bytes % blocksize_bytes) 760 return __this_address; 761 762 if (extsize > XFS_MAX_BMBT_EXTLEN) 763 return __this_address; 764 765 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 766 return __this_address; 767 768 return NULL; 769 } 770 771 /* 772 * Validate di_cowextsize hint. 773 * 774 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 775 * The inode does not have to have any shared blocks, but it must be a v3. 776 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 777 * for a directory, the hint is propagated to new files. 778 * 3. Can be changed on files & directories at any time. 779 * 4. Hint value of 0 turns off hints, clears inode flags. 780 * 5. Extent size must be a multiple of the appropriate block size. 781 * 6. The extent size hint must be limited to half the AG size to avoid 782 * alignment extending the extent beyond the limits of the AG. 783 */ 784 xfs_failaddr_t 785 xfs_inode_validate_cowextsize( 786 struct xfs_mount *mp, 787 uint32_t cowextsize, 788 uint16_t mode, 789 uint16_t flags, 790 uint64_t flags2) 791 { 792 bool rt_flag; 793 bool hint_flag; 794 uint32_t cowextsize_bytes; 795 796 rt_flag = (flags & XFS_DIFLAG_REALTIME); 797 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 798 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 799 800 if (hint_flag && !xfs_has_reflink(mp)) 801 return __this_address; 802 803 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 804 return __this_address; 805 806 if (hint_flag && cowextsize == 0) 807 return __this_address; 808 809 /* free inodes get flags set to zero but cowextsize remains */ 810 if (mode && !hint_flag && cowextsize != 0) 811 return __this_address; 812 813 if (hint_flag && rt_flag) 814 return __this_address; 815 816 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 817 return __this_address; 818 819 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 820 return __this_address; 821 822 if (cowextsize > mp->m_sb.sb_agblocks / 2) 823 return __this_address; 824 825 return NULL; 826 } 827