1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 #include "xfs_metafile.h" 23 24 #include <linux/iversion.h> 25 26 /* 27 * If we are doing readahead on an inode buffer, we might be in log recovery 28 * reading an inode allocation buffer that hasn't yet been replayed, and hence 29 * has not had the inode cores stamped into it. Hence for readahead, the buffer 30 * may be potentially invalid. 31 * 32 * If the readahead buffer is invalid, we need to mark it with an error and 33 * clear the DONE status of the buffer so that a followup read will re-read it 34 * from disk. We don't report the error otherwise to avoid warnings during log 35 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 36 * because all we want to do is say readahead failed; there is no-one to report 37 * the error to, so this will distinguish it from a non-ra verifier failure. 38 * Changes to this readahead error behaviour also need to be reflected in 39 * xfs_dquot_buf_readahead_verify(). 40 */ 41 static void 42 xfs_inode_buf_verify( 43 struct xfs_buf *bp, 44 bool readahead) 45 { 46 struct xfs_mount *mp = bp->b_mount; 47 int i; 48 int ni; 49 50 /* 51 * Validate the magic number and version of every inode in the buffer 52 */ 53 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 54 for (i = 0; i < ni; i++) { 55 struct xfs_dinode *dip; 56 xfs_agino_t unlinked_ino; 57 int di_ok; 58 59 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 60 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 61 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 62 xfs_dinode_good_version(mp, dip->di_version) && 63 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 64 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 65 XFS_ERRTAG_ITOBP_INOTOBP))) { 66 if (readahead) { 67 bp->b_flags &= ~XBF_DONE; 68 xfs_buf_ioerror(bp, -EIO); 69 return; 70 } 71 72 #ifdef DEBUG 73 xfs_alert(mp, 74 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 75 (unsigned long long)xfs_buf_daddr(bp), i, 76 be16_to_cpu(dip->di_magic)); 77 #endif 78 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 79 __func__, dip, sizeof(*dip), 80 NULL); 81 return; 82 } 83 } 84 } 85 86 87 static void 88 xfs_inode_buf_read_verify( 89 struct xfs_buf *bp) 90 { 91 xfs_inode_buf_verify(bp, false); 92 } 93 94 static void 95 xfs_inode_buf_readahead_verify( 96 struct xfs_buf *bp) 97 { 98 xfs_inode_buf_verify(bp, true); 99 } 100 101 static void 102 xfs_inode_buf_write_verify( 103 struct xfs_buf *bp) 104 { 105 xfs_inode_buf_verify(bp, false); 106 } 107 108 const struct xfs_buf_ops xfs_inode_buf_ops = { 109 .name = "xfs_inode", 110 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 111 cpu_to_be16(XFS_DINODE_MAGIC) }, 112 .verify_read = xfs_inode_buf_read_verify, 113 .verify_write = xfs_inode_buf_write_verify, 114 }; 115 116 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 117 .name = "xfs_inode_ra", 118 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 119 cpu_to_be16(XFS_DINODE_MAGIC) }, 120 .verify_read = xfs_inode_buf_readahead_verify, 121 .verify_write = xfs_inode_buf_write_verify, 122 }; 123 124 125 /* 126 * This routine is called to map an inode to the buffer containing the on-disk 127 * version of the inode. It returns a pointer to the buffer containing the 128 * on-disk inode in the bpp parameter. 129 */ 130 int 131 xfs_imap_to_bp( 132 struct xfs_mount *mp, 133 struct xfs_trans *tp, 134 struct xfs_imap *imap, 135 struct xfs_buf **bpp) 136 { 137 int error; 138 139 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 140 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 141 if (xfs_metadata_is_sick(error)) 142 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 143 XFS_SICK_AG_INODES); 144 return error; 145 } 146 147 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 148 { 149 struct timespec64 tv; 150 uint32_t n; 151 152 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 153 tv.tv_nsec = n; 154 155 return tv; 156 } 157 158 /* Convert an ondisk timestamp to an incore timestamp. */ 159 struct timespec64 160 xfs_inode_from_disk_ts( 161 struct xfs_dinode *dip, 162 const xfs_timestamp_t ts) 163 { 164 struct timespec64 tv; 165 struct xfs_legacy_timestamp *lts; 166 167 if (xfs_dinode_has_bigtime(dip)) 168 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 169 170 lts = (struct xfs_legacy_timestamp *)&ts; 171 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 172 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 173 174 return tv; 175 } 176 177 int 178 xfs_inode_from_disk( 179 struct xfs_inode *ip, 180 struct xfs_dinode *from) 181 { 182 struct inode *inode = VFS_I(ip); 183 int error; 184 xfs_failaddr_t fa; 185 186 ASSERT(ip->i_cowfp == NULL); 187 188 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 189 if (fa) { 190 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 191 sizeof(*from), fa); 192 return -EFSCORRUPTED; 193 } 194 195 /* 196 * First get the permanent information that is needed to allocate an 197 * inode. If the inode is unused, mode is zero and we shouldn't mess 198 * with the uninitialized part of it. 199 */ 200 if (!xfs_has_v3inodes(ip->i_mount)) 201 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 202 inode->i_generation = be32_to_cpu(from->di_gen); 203 inode->i_mode = be16_to_cpu(from->di_mode); 204 if (!inode->i_mode) 205 return 0; 206 207 /* 208 * Convert v1 inodes immediately to v2 inode format as this is the 209 * minimum inode version format we support in the rest of the code. 210 * They will also be unconditionally written back to disk as v2 inodes. 211 */ 212 if (unlikely(from->di_version == 1)) { 213 /* di_metatype used to be di_onlink */ 214 set_nlink(inode, be16_to_cpu(from->di_metatype)); 215 ip->i_projid = 0; 216 } else { 217 set_nlink(inode, be32_to_cpu(from->di_nlink)); 218 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 219 be16_to_cpu(from->di_projid_lo); 220 if (xfs_dinode_is_metadir(from)) 221 ip->i_metatype = be16_to_cpu(from->di_metatype); 222 } 223 224 i_uid_write(inode, be32_to_cpu(from->di_uid)); 225 i_gid_write(inode, be32_to_cpu(from->di_gid)); 226 227 /* 228 * Time is signed, so need to convert to signed 32 bit before 229 * storing in inode timestamp which may be 64 bit. Otherwise 230 * a time before epoch is converted to a time long after epoch 231 * on 64 bit systems. 232 */ 233 inode_set_atime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_atime)); 235 inode_set_mtime_to_ts(inode, 236 xfs_inode_from_disk_ts(from, from->di_mtime)); 237 inode_set_ctime_to_ts(inode, 238 xfs_inode_from_disk_ts(from, from->di_ctime)); 239 240 ip->i_disk_size = be64_to_cpu(from->di_size); 241 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 242 ip->i_extsize = be32_to_cpu(from->di_extsize); 243 ip->i_forkoff = from->di_forkoff; 244 ip->i_diflags = be16_to_cpu(from->di_flags); 245 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 246 247 if (from->di_dmevmask || from->di_dmstate) 248 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 249 250 if (xfs_has_v3inodes(ip->i_mount)) { 251 inode_set_iversion_queried(inode, 252 be64_to_cpu(from->di_changecount)); 253 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 254 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 255 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 256 } 257 258 error = xfs_iformat_data_fork(ip, from); 259 if (error) 260 return error; 261 if (from->di_forkoff) { 262 error = xfs_iformat_attr_fork(ip, from); 263 if (error) 264 goto out_destroy_data_fork; 265 } 266 if (xfs_is_reflink_inode(ip)) 267 xfs_ifork_init_cow(ip); 268 return 0; 269 270 out_destroy_data_fork: 271 xfs_idestroy_fork(&ip->i_df); 272 return error; 273 } 274 275 /* Convert an incore timestamp to an ondisk timestamp. */ 276 static inline xfs_timestamp_t 277 xfs_inode_to_disk_ts( 278 struct xfs_inode *ip, 279 const struct timespec64 tv) 280 { 281 struct xfs_legacy_timestamp *lts; 282 xfs_timestamp_t ts; 283 284 if (xfs_inode_has_bigtime(ip)) 285 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 286 287 lts = (struct xfs_legacy_timestamp *)&ts; 288 lts->t_sec = cpu_to_be32(tv.tv_sec); 289 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 290 291 return ts; 292 } 293 294 static inline void 295 xfs_inode_to_disk_iext_counters( 296 struct xfs_inode *ip, 297 struct xfs_dinode *to) 298 { 299 if (xfs_inode_has_large_extent_counts(ip)) { 300 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 301 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 302 /* 303 * We might be upgrading the inode to use larger extent counters 304 * than was previously used. Hence zero the unused field. 305 */ 306 to->di_nrext64_pad = cpu_to_be16(0); 307 } else { 308 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 309 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 310 } 311 } 312 313 void 314 xfs_inode_to_disk( 315 struct xfs_inode *ip, 316 struct xfs_dinode *to, 317 xfs_lsn_t lsn) 318 { 319 struct inode *inode = VFS_I(ip); 320 321 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 322 if (xfs_is_metadir_inode(ip)) 323 to->di_metatype = cpu_to_be16(ip->i_metatype); 324 else 325 to->di_metatype = 0; 326 327 to->di_format = xfs_ifork_format(&ip->i_df); 328 to->di_uid = cpu_to_be32(i_uid_read(inode)); 329 to->di_gid = cpu_to_be32(i_gid_read(inode)); 330 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 331 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 332 333 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 334 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 335 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 336 to->di_nlink = cpu_to_be32(inode->i_nlink); 337 to->di_gen = cpu_to_be32(inode->i_generation); 338 to->di_mode = cpu_to_be16(inode->i_mode); 339 340 to->di_size = cpu_to_be64(ip->i_disk_size); 341 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 342 to->di_extsize = cpu_to_be32(ip->i_extsize); 343 to->di_forkoff = ip->i_forkoff; 344 to->di_aformat = xfs_ifork_format(&ip->i_af); 345 to->di_flags = cpu_to_be16(ip->i_diflags); 346 347 if (xfs_has_v3inodes(ip->i_mount)) { 348 to->di_version = 3; 349 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 350 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 351 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 352 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 353 to->di_ino = cpu_to_be64(ip->i_ino); 354 to->di_lsn = cpu_to_be64(lsn); 355 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 356 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 357 to->di_v3_pad = 0; 358 } else { 359 to->di_version = 2; 360 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 361 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 362 } 363 364 xfs_inode_to_disk_iext_counters(ip, to); 365 } 366 367 static xfs_failaddr_t 368 xfs_dinode_verify_fork( 369 struct xfs_dinode *dip, 370 struct xfs_mount *mp, 371 int whichfork) 372 { 373 xfs_extnum_t di_nextents; 374 xfs_extnum_t max_extents; 375 mode_t mode = be16_to_cpu(dip->di_mode); 376 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 377 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 378 379 di_nextents = xfs_dfork_nextents(dip, whichfork); 380 381 /* 382 * For fork types that can contain local data, check that the fork 383 * format matches the size of local data contained within the fork. 384 */ 385 if (whichfork == XFS_DATA_FORK) { 386 /* 387 * A directory small enough to fit in the inode must be stored 388 * in local format. The directory sf <-> extents conversion 389 * code updates the directory size accordingly. Directories 390 * being truncated have zero size and are not subject to this 391 * check. 392 */ 393 if (S_ISDIR(mode)) { 394 if (dip->di_size && 395 be64_to_cpu(dip->di_size) <= fork_size && 396 fork_format != XFS_DINODE_FMT_LOCAL) 397 return __this_address; 398 } 399 400 /* 401 * A symlink with a target small enough to fit in the inode can 402 * be stored in extents format if xattrs were added (thus 403 * converting the data fork from shortform to remote format) 404 * and then removed. 405 */ 406 if (S_ISLNK(mode)) { 407 if (be64_to_cpu(dip->di_size) <= fork_size && 408 fork_format != XFS_DINODE_FMT_EXTENTS && 409 fork_format != XFS_DINODE_FMT_LOCAL) 410 return __this_address; 411 } 412 413 /* 414 * For all types, check that when the size says the fork should 415 * be in extent or btree format, the inode isn't claiming to be 416 * in local format. 417 */ 418 if (be64_to_cpu(dip->di_size) > fork_size && 419 fork_format == XFS_DINODE_FMT_LOCAL) 420 return __this_address; 421 } 422 423 switch (fork_format) { 424 case XFS_DINODE_FMT_LOCAL: 425 /* 426 * No local regular files yet. 427 */ 428 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 429 return __this_address; 430 if (di_nextents) 431 return __this_address; 432 break; 433 case XFS_DINODE_FMT_EXTENTS: 434 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 435 return __this_address; 436 break; 437 case XFS_DINODE_FMT_BTREE: 438 max_extents = xfs_iext_max_nextents( 439 xfs_dinode_has_large_extent_counts(dip), 440 whichfork); 441 if (di_nextents > max_extents) 442 return __this_address; 443 break; 444 default: 445 return __this_address; 446 } 447 return NULL; 448 } 449 450 static xfs_failaddr_t 451 xfs_dinode_verify_forkoff( 452 struct xfs_dinode *dip, 453 struct xfs_mount *mp) 454 { 455 if (!dip->di_forkoff) 456 return NULL; 457 458 switch (dip->di_format) { 459 case XFS_DINODE_FMT_DEV: 460 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 461 return __this_address; 462 break; 463 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 464 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 465 case XFS_DINODE_FMT_BTREE: 466 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 467 return __this_address; 468 break; 469 default: 470 return __this_address; 471 } 472 return NULL; 473 } 474 475 static xfs_failaddr_t 476 xfs_dinode_verify_nrext64( 477 struct xfs_mount *mp, 478 struct xfs_dinode *dip) 479 { 480 if (xfs_dinode_has_large_extent_counts(dip)) { 481 if (!xfs_has_large_extent_counts(mp)) 482 return __this_address; 483 if (dip->di_nrext64_pad != 0) 484 return __this_address; 485 } else if (dip->di_version >= 3) { 486 if (dip->di_v3_pad != 0) 487 return __this_address; 488 } 489 490 return NULL; 491 } 492 493 /* 494 * Validate all the picky requirements we have for a file that claims to be 495 * filesystem metadata. 496 */ 497 xfs_failaddr_t 498 xfs_dinode_verify_metadir( 499 struct xfs_mount *mp, 500 struct xfs_dinode *dip, 501 uint16_t mode, 502 uint16_t flags, 503 uint64_t flags2) 504 { 505 if (!xfs_has_metadir(mp)) 506 return __this_address; 507 508 /* V5 filesystem only */ 509 if (dip->di_version < 3) 510 return __this_address; 511 512 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 513 return __this_address; 514 515 /* V3 inode fields that are always zero */ 516 if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad) 517 return __this_address; 518 if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter) 519 return __this_address; 520 521 /* Metadata files can only be directories or regular files */ 522 if (!S_ISDIR(mode) && !S_ISREG(mode)) 523 return __this_address; 524 525 /* They must have zero access permissions */ 526 if (mode & 0777) 527 return __this_address; 528 529 /* DMAPI event and state masks are zero */ 530 if (dip->di_dmevmask || dip->di_dmstate) 531 return __this_address; 532 533 /* 534 * User and group IDs must be zero. The project ID is used for 535 * grouping inodes. Metadata inodes are never accounted to quotas. 536 */ 537 if (dip->di_uid || dip->di_gid) 538 return __this_address; 539 540 /* Mandatory inode flags must be set */ 541 if (S_ISDIR(mode)) { 542 if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS) 543 return __this_address; 544 } else { 545 if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS) 546 return __this_address; 547 } 548 549 /* dax flags2 must not be set */ 550 if (flags2 & XFS_DIFLAG2_DAX) 551 return __this_address; 552 553 return NULL; 554 } 555 556 xfs_failaddr_t 557 xfs_dinode_verify( 558 struct xfs_mount *mp, 559 xfs_ino_t ino, 560 struct xfs_dinode *dip) 561 { 562 xfs_failaddr_t fa; 563 uint16_t mode; 564 uint16_t flags; 565 uint64_t flags2; 566 uint64_t di_size; 567 xfs_extnum_t nextents; 568 xfs_extnum_t naextents; 569 xfs_filblks_t nblocks; 570 571 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 572 return __this_address; 573 574 /* Verify v3 integrity information first */ 575 if (dip->di_version >= 3) { 576 if (!xfs_has_v3inodes(mp)) 577 return __this_address; 578 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 579 XFS_DINODE_CRC_OFF)) 580 return __this_address; 581 if (be64_to_cpu(dip->di_ino) != ino) 582 return __this_address; 583 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 584 return __this_address; 585 } 586 587 /* 588 * Historical note: xfsprogs in the 3.2 era set up its incore inodes to 589 * have di_nlink track the link count, even if the actual filesystem 590 * only supported V1 inodes (i.e. di_onlink). When writing out the 591 * ondisk inode, it would set both the ondisk di_nlink and di_onlink to 592 * the the incore di_nlink value, which is why we cannot check for 593 * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with 594 * di_onlink==0, so we can check that. 595 */ 596 if (dip->di_version == 2) { 597 if (dip->di_metatype) 598 return __this_address; 599 } else if (dip->di_version >= 3) { 600 if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) 601 return __this_address; 602 } 603 604 /* don't allow invalid i_size */ 605 di_size = be64_to_cpu(dip->di_size); 606 if (di_size & (1ULL << 63)) 607 return __this_address; 608 609 mode = be16_to_cpu(dip->di_mode); 610 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 611 return __this_address; 612 613 /* 614 * No zero-length symlinks/dirs unless they're unlinked and hence being 615 * inactivated. 616 */ 617 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { 618 if (dip->di_version > 1) { 619 if (dip->di_nlink) 620 return __this_address; 621 } else { 622 /* di_metatype used to be di_onlink */ 623 if (dip->di_metatype) 624 return __this_address; 625 } 626 } 627 628 fa = xfs_dinode_verify_nrext64(mp, dip); 629 if (fa) 630 return fa; 631 632 nextents = xfs_dfork_data_extents(dip); 633 naextents = xfs_dfork_attr_extents(dip); 634 nblocks = be64_to_cpu(dip->di_nblocks); 635 636 /* Fork checks carried over from xfs_iformat_fork */ 637 if (mode && nextents + naextents > nblocks) 638 return __this_address; 639 640 if (nextents + naextents == 0 && nblocks != 0) 641 return __this_address; 642 643 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 644 return __this_address; 645 646 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 647 return __this_address; 648 649 flags = be16_to_cpu(dip->di_flags); 650 651 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 652 return __this_address; 653 654 /* check for illegal values of forkoff */ 655 fa = xfs_dinode_verify_forkoff(dip, mp); 656 if (fa) 657 return fa; 658 659 /* Do we have appropriate data fork formats for the mode? */ 660 switch (mode & S_IFMT) { 661 case S_IFIFO: 662 case S_IFCHR: 663 case S_IFBLK: 664 case S_IFSOCK: 665 if (dip->di_format != XFS_DINODE_FMT_DEV) 666 return __this_address; 667 break; 668 case S_IFREG: 669 case S_IFLNK: 670 case S_IFDIR: 671 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 672 if (fa) 673 return fa; 674 break; 675 case 0: 676 /* Uninitialized inode ok. */ 677 break; 678 default: 679 return __this_address; 680 } 681 682 if (dip->di_forkoff) { 683 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 684 if (fa) 685 return fa; 686 } else { 687 /* 688 * If there is no fork offset, this may be a freshly-made inode 689 * in a new disk cluster, in which case di_aformat is zeroed. 690 * Otherwise, such an inode must be in EXTENTS format; this goes 691 * for freed inodes as well. 692 */ 693 switch (dip->di_aformat) { 694 case 0: 695 case XFS_DINODE_FMT_EXTENTS: 696 break; 697 default: 698 return __this_address; 699 } 700 if (naextents) 701 return __this_address; 702 } 703 704 /* extent size hint validation */ 705 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 706 mode, flags); 707 if (fa) 708 return fa; 709 710 /* only version 3 or greater inodes are extensively verified here */ 711 if (dip->di_version < 3) 712 return NULL; 713 714 flags2 = be64_to_cpu(dip->di_flags2); 715 716 /* don't allow reflink/cowextsize if we don't have reflink */ 717 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 718 !xfs_has_reflink(mp)) 719 return __this_address; 720 721 /* only regular files get reflink */ 722 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 723 return __this_address; 724 725 /* don't let reflink and realtime mix */ 726 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 727 return __this_address; 728 729 /* COW extent size hint validation */ 730 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 731 mode, flags, flags2); 732 if (fa) 733 return fa; 734 735 /* bigtime iflag can only happen on bigtime filesystems */ 736 if (xfs_dinode_has_bigtime(dip) && 737 !xfs_has_bigtime(mp)) 738 return __this_address; 739 740 if (flags2 & XFS_DIFLAG2_METADATA) { 741 fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2); 742 if (fa) 743 return fa; 744 } 745 746 return NULL; 747 } 748 749 void 750 xfs_dinode_calc_crc( 751 struct xfs_mount *mp, 752 struct xfs_dinode *dip) 753 { 754 uint32_t crc; 755 756 if (dip->di_version < 3) 757 return; 758 759 ASSERT(xfs_has_crc(mp)); 760 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 761 XFS_DINODE_CRC_OFF); 762 dip->di_crc = xfs_end_cksum(crc); 763 } 764 765 /* 766 * Validate di_extsize hint. 767 * 768 * 1. Extent size hint is only valid for directories and regular files. 769 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 770 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 771 * 4. Hint cannot be larger than MAXTEXTLEN. 772 * 5. Can be changed on directories at any time. 773 * 6. Hint value of 0 turns off hints, clears inode flags. 774 * 7. Extent size must be a multiple of the appropriate block size. 775 * For realtime files, this is the rt extent size. 776 * 8. For non-realtime files, the extent size hint must be limited 777 * to half the AG size to avoid alignment extending the extent beyond the 778 * limits of the AG. 779 */ 780 xfs_failaddr_t 781 xfs_inode_validate_extsize( 782 struct xfs_mount *mp, 783 uint32_t extsize, 784 uint16_t mode, 785 uint16_t flags) 786 { 787 bool rt_flag; 788 bool hint_flag; 789 bool inherit_flag; 790 uint32_t extsize_bytes; 791 uint32_t blocksize_bytes; 792 793 rt_flag = (flags & XFS_DIFLAG_REALTIME); 794 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 795 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 796 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 797 798 /* 799 * This comment describes a historic gap in this verifier function. 800 * 801 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 802 * function has never checked that the extent size hint is an integer 803 * multiple of the realtime extent size. Since we allow users to set 804 * this combination on non-rt filesystems /and/ to change the rt 805 * extent size when adding a rt device to a filesystem, the net effect 806 * is that users can configure a filesystem anticipating one rt 807 * geometry and change their minds later. Directories do not use the 808 * extent size hint, so this is harmless for them. 809 * 810 * If a directory with a misaligned extent size hint is allowed to 811 * propagate that hint into a new regular realtime file, the result 812 * is that the inode cluster buffer verifier will trigger a corruption 813 * shutdown the next time it is run, because the verifier has always 814 * enforced the alignment rule for regular files. 815 * 816 * Because we allow administrators to set a new rt extent size when 817 * adding a rt section, we cannot add a check to this verifier because 818 * that will result a new source of directory corruption errors when 819 * reading an existing filesystem. Instead, we rely on callers to 820 * decide when alignment checks are appropriate, and fix things up as 821 * needed. 822 */ 823 824 if (rt_flag) 825 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 826 else 827 blocksize_bytes = mp->m_sb.sb_blocksize; 828 829 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 830 return __this_address; 831 832 if (hint_flag && !S_ISREG(mode)) 833 return __this_address; 834 835 if (inherit_flag && !S_ISDIR(mode)) 836 return __this_address; 837 838 if ((hint_flag || inherit_flag) && extsize == 0) 839 return __this_address; 840 841 /* free inodes get flags set to zero but extsize remains */ 842 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 843 return __this_address; 844 845 if (extsize_bytes % blocksize_bytes) 846 return __this_address; 847 848 if (extsize > XFS_MAX_BMBT_EXTLEN) 849 return __this_address; 850 851 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 852 return __this_address; 853 854 return NULL; 855 } 856 857 /* 858 * Validate di_cowextsize hint. 859 * 860 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 861 * The inode does not have to have any shared blocks, but it must be a v3. 862 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 863 * for a directory, the hint is propagated to new files. 864 * 3. Can be changed on files & directories at any time. 865 * 4. Hint value of 0 turns off hints, clears inode flags. 866 * 5. Extent size must be a multiple of the appropriate block size. 867 * 6. The extent size hint must be limited to half the AG size to avoid 868 * alignment extending the extent beyond the limits of the AG. 869 */ 870 xfs_failaddr_t 871 xfs_inode_validate_cowextsize( 872 struct xfs_mount *mp, 873 uint32_t cowextsize, 874 uint16_t mode, 875 uint16_t flags, 876 uint64_t flags2) 877 { 878 bool rt_flag; 879 bool hint_flag; 880 uint32_t cowextsize_bytes; 881 882 rt_flag = (flags & XFS_DIFLAG_REALTIME); 883 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 884 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 885 886 if (hint_flag && !xfs_has_reflink(mp)) 887 return __this_address; 888 889 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 890 return __this_address; 891 892 if (hint_flag && cowextsize == 0) 893 return __this_address; 894 895 /* free inodes get flags set to zero but cowextsize remains */ 896 if (mode && !hint_flag && cowextsize != 0) 897 return __this_address; 898 899 if (hint_flag && rt_flag) 900 return __this_address; 901 902 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 903 return __this_address; 904 905 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 906 return __this_address; 907 908 if (cowextsize > mp->m_sb.sb_agblocks / 2) 909 return __this_address; 910 911 return NULL; 912 } 913