1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 #include "xfs_metafile.h" 23 24 #include <linux/iversion.h> 25 26 /* 27 * If we are doing readahead on an inode buffer, we might be in log recovery 28 * reading an inode allocation buffer that hasn't yet been replayed, and hence 29 * has not had the inode cores stamped into it. Hence for readahead, the buffer 30 * may be potentially invalid. 31 * 32 * If the readahead buffer is invalid, we need to mark it with an error and 33 * clear the DONE status of the buffer so that a followup read will re-read it 34 * from disk. We don't report the error otherwise to avoid warnings during log 35 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 36 * because all we want to do is say readahead failed; there is no-one to report 37 * the error to, so this will distinguish it from a non-ra verifier failure. 38 * Changes to this readahead error behaviour also need to be reflected in 39 * xfs_dquot_buf_readahead_verify(). 40 */ 41 static void 42 xfs_inode_buf_verify( 43 struct xfs_buf *bp, 44 bool readahead) 45 { 46 struct xfs_mount *mp = bp->b_mount; 47 int i; 48 int ni; 49 50 /* 51 * Validate the magic number and version of every inode in the buffer 52 */ 53 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 54 for (i = 0; i < ni; i++) { 55 struct xfs_dinode *dip; 56 xfs_agino_t unlinked_ino; 57 int di_ok; 58 59 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 60 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 61 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 62 xfs_dinode_good_version(mp, dip->di_version) && 63 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 64 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 65 XFS_ERRTAG_ITOBP_INOTOBP))) { 66 if (readahead) { 67 bp->b_flags &= ~XBF_DONE; 68 xfs_buf_ioerror(bp, -EIO); 69 return; 70 } 71 72 #ifdef DEBUG 73 xfs_alert(mp, 74 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 75 (unsigned long long)xfs_buf_daddr(bp), i, 76 be16_to_cpu(dip->di_magic)); 77 #endif 78 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 79 __func__, dip, sizeof(*dip), 80 NULL); 81 return; 82 } 83 } 84 } 85 86 87 static void 88 xfs_inode_buf_read_verify( 89 struct xfs_buf *bp) 90 { 91 xfs_inode_buf_verify(bp, false); 92 } 93 94 static void 95 xfs_inode_buf_readahead_verify( 96 struct xfs_buf *bp) 97 { 98 xfs_inode_buf_verify(bp, true); 99 } 100 101 static void 102 xfs_inode_buf_write_verify( 103 struct xfs_buf *bp) 104 { 105 xfs_inode_buf_verify(bp, false); 106 } 107 108 const struct xfs_buf_ops xfs_inode_buf_ops = { 109 .name = "xfs_inode", 110 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 111 cpu_to_be16(XFS_DINODE_MAGIC) }, 112 .verify_read = xfs_inode_buf_read_verify, 113 .verify_write = xfs_inode_buf_write_verify, 114 }; 115 116 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 117 .name = "xfs_inode_ra", 118 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 119 cpu_to_be16(XFS_DINODE_MAGIC) }, 120 .verify_read = xfs_inode_buf_readahead_verify, 121 .verify_write = xfs_inode_buf_write_verify, 122 }; 123 124 125 /* 126 * This routine is called to map an inode to the buffer containing the on-disk 127 * version of the inode. It returns a pointer to the buffer containing the 128 * on-disk inode in the bpp parameter. 129 */ 130 int 131 xfs_imap_to_bp( 132 struct xfs_mount *mp, 133 struct xfs_trans *tp, 134 struct xfs_imap *imap, 135 struct xfs_buf **bpp) 136 { 137 int error; 138 139 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 140 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 141 if (xfs_metadata_is_sick(error)) 142 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 143 XFS_SICK_AG_INODES); 144 return error; 145 } 146 147 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 148 { 149 struct timespec64 tv; 150 uint32_t n; 151 152 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 153 tv.tv_nsec = n; 154 155 return tv; 156 } 157 158 /* Convert an ondisk timestamp to an incore timestamp. */ 159 struct timespec64 160 xfs_inode_from_disk_ts( 161 struct xfs_dinode *dip, 162 const xfs_timestamp_t ts) 163 { 164 struct timespec64 tv; 165 struct xfs_legacy_timestamp *lts; 166 167 if (xfs_dinode_has_bigtime(dip)) 168 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 169 170 lts = (struct xfs_legacy_timestamp *)&ts; 171 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 172 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 173 174 return tv; 175 } 176 177 int 178 xfs_inode_from_disk( 179 struct xfs_inode *ip, 180 struct xfs_dinode *from) 181 { 182 struct inode *inode = VFS_I(ip); 183 int error; 184 xfs_failaddr_t fa; 185 186 ASSERT(ip->i_cowfp == NULL); 187 188 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 189 if (fa) { 190 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 191 sizeof(*from), fa); 192 return -EFSCORRUPTED; 193 } 194 195 /* 196 * First get the permanent information that is needed to allocate an 197 * inode. If the inode is unused, mode is zero and we shouldn't mess 198 * with the uninitialized part of it. 199 */ 200 if (!xfs_has_v3inodes(ip->i_mount)) 201 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 202 inode->i_generation = be32_to_cpu(from->di_gen); 203 inode->i_mode = be16_to_cpu(from->di_mode); 204 if (!inode->i_mode) 205 return 0; 206 207 /* 208 * Convert v1 inodes immediately to v2 inode format as this is the 209 * minimum inode version format we support in the rest of the code. 210 * They will also be unconditionally written back to disk as v2 inodes. 211 */ 212 if (unlikely(from->di_version == 1)) { 213 /* di_metatype used to be di_onlink */ 214 set_nlink(inode, be16_to_cpu(from->di_metatype)); 215 ip->i_projid = 0; 216 } else { 217 set_nlink(inode, be32_to_cpu(from->di_nlink)); 218 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 219 be16_to_cpu(from->di_projid_lo); 220 if (xfs_dinode_is_metadir(from)) 221 ip->i_metatype = be16_to_cpu(from->di_metatype); 222 } 223 224 i_uid_write(inode, be32_to_cpu(from->di_uid)); 225 i_gid_write(inode, be32_to_cpu(from->di_gid)); 226 227 /* 228 * Time is signed, so need to convert to signed 32 bit before 229 * storing in inode timestamp which may be 64 bit. Otherwise 230 * a time before epoch is converted to a time long after epoch 231 * on 64 bit systems. 232 */ 233 inode_set_atime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_atime)); 235 inode_set_mtime_to_ts(inode, 236 xfs_inode_from_disk_ts(from, from->di_mtime)); 237 inode_set_ctime_to_ts(inode, 238 xfs_inode_from_disk_ts(from, from->di_ctime)); 239 240 ip->i_disk_size = be64_to_cpu(from->di_size); 241 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 242 ip->i_extsize = be32_to_cpu(from->di_extsize); 243 ip->i_forkoff = from->di_forkoff; 244 ip->i_diflags = be16_to_cpu(from->di_flags); 245 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 246 247 if (from->di_dmevmask || from->di_dmstate) 248 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 249 250 if (xfs_has_v3inodes(ip->i_mount)) { 251 inode_set_iversion_queried(inode, 252 be64_to_cpu(from->di_changecount)); 253 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 254 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 255 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 256 } 257 258 error = xfs_iformat_data_fork(ip, from); 259 if (error) 260 return error; 261 if (from->di_forkoff) { 262 error = xfs_iformat_attr_fork(ip, from); 263 if (error) 264 goto out_destroy_data_fork; 265 } 266 if (xfs_is_reflink_inode(ip)) 267 xfs_ifork_init_cow(ip); 268 return 0; 269 270 out_destroy_data_fork: 271 xfs_idestroy_fork(&ip->i_df); 272 return error; 273 } 274 275 /* Convert an incore timestamp to an ondisk timestamp. */ 276 static inline xfs_timestamp_t 277 xfs_inode_to_disk_ts( 278 struct xfs_inode *ip, 279 const struct timespec64 tv) 280 { 281 struct xfs_legacy_timestamp *lts; 282 xfs_timestamp_t ts; 283 284 if (xfs_inode_has_bigtime(ip)) 285 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 286 287 lts = (struct xfs_legacy_timestamp *)&ts; 288 lts->t_sec = cpu_to_be32(tv.tv_sec); 289 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 290 291 return ts; 292 } 293 294 static inline void 295 xfs_inode_to_disk_iext_counters( 296 struct xfs_inode *ip, 297 struct xfs_dinode *to) 298 { 299 if (xfs_inode_has_large_extent_counts(ip)) { 300 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 301 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 302 /* 303 * We might be upgrading the inode to use larger extent counters 304 * than was previously used. Hence zero the unused field. 305 */ 306 to->di_nrext64_pad = cpu_to_be16(0); 307 } else { 308 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 309 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 310 } 311 } 312 313 void 314 xfs_inode_to_disk( 315 struct xfs_inode *ip, 316 struct xfs_dinode *to, 317 xfs_lsn_t lsn) 318 { 319 struct inode *inode = VFS_I(ip); 320 321 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 322 if (xfs_is_metadir_inode(ip)) 323 to->di_metatype = cpu_to_be16(ip->i_metatype); 324 else 325 to->di_metatype = 0; 326 327 to->di_format = xfs_ifork_format(&ip->i_df); 328 to->di_uid = cpu_to_be32(i_uid_read(inode)); 329 to->di_gid = cpu_to_be32(i_gid_read(inode)); 330 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 331 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 332 333 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 334 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 335 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 336 to->di_nlink = cpu_to_be32(inode->i_nlink); 337 to->di_gen = cpu_to_be32(inode->i_generation); 338 to->di_mode = cpu_to_be16(inode->i_mode); 339 340 to->di_size = cpu_to_be64(ip->i_disk_size); 341 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 342 to->di_extsize = cpu_to_be32(ip->i_extsize); 343 to->di_forkoff = ip->i_forkoff; 344 to->di_aformat = xfs_ifork_format(&ip->i_af); 345 to->di_flags = cpu_to_be16(ip->i_diflags); 346 347 if (xfs_has_v3inodes(ip->i_mount)) { 348 to->di_version = 3; 349 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 350 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 351 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 352 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 353 to->di_ino = cpu_to_be64(ip->i_ino); 354 to->di_lsn = cpu_to_be64(lsn); 355 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 356 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 357 to->di_v3_pad = 0; 358 } else { 359 to->di_version = 2; 360 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 361 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 362 } 363 364 xfs_inode_to_disk_iext_counters(ip, to); 365 } 366 367 static xfs_failaddr_t 368 xfs_dinode_verify_fork( 369 struct xfs_dinode *dip, 370 struct xfs_mount *mp, 371 int whichfork) 372 { 373 xfs_extnum_t di_nextents; 374 xfs_extnum_t max_extents; 375 mode_t mode = be16_to_cpu(dip->di_mode); 376 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 377 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 378 379 di_nextents = xfs_dfork_nextents(dip, whichfork); 380 381 /* 382 * For fork types that can contain local data, check that the fork 383 * format matches the size of local data contained within the fork. 384 */ 385 if (whichfork == XFS_DATA_FORK) { 386 /* 387 * A directory small enough to fit in the inode must be stored 388 * in local format. The directory sf <-> extents conversion 389 * code updates the directory size accordingly. Directories 390 * being truncated have zero size and are not subject to this 391 * check. 392 */ 393 if (S_ISDIR(mode)) { 394 if (dip->di_size && 395 be64_to_cpu(dip->di_size) <= fork_size && 396 fork_format != XFS_DINODE_FMT_LOCAL) 397 return __this_address; 398 } 399 400 /* 401 * A symlink with a target small enough to fit in the inode can 402 * be stored in extents format if xattrs were added (thus 403 * converting the data fork from shortform to remote format) 404 * and then removed. 405 */ 406 if (S_ISLNK(mode)) { 407 if (be64_to_cpu(dip->di_size) <= fork_size && 408 fork_format != XFS_DINODE_FMT_EXTENTS && 409 fork_format != XFS_DINODE_FMT_LOCAL) 410 return __this_address; 411 } 412 413 /* 414 * For all types, check that when the size says the fork should 415 * be in extent or btree format, the inode isn't claiming to be 416 * in local format. 417 */ 418 if (be64_to_cpu(dip->di_size) > fork_size && 419 fork_format == XFS_DINODE_FMT_LOCAL) 420 return __this_address; 421 } 422 423 switch (fork_format) { 424 case XFS_DINODE_FMT_LOCAL: 425 /* 426 * No local regular files yet. 427 */ 428 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 429 return __this_address; 430 if (di_nextents) 431 return __this_address; 432 break; 433 case XFS_DINODE_FMT_EXTENTS: 434 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 435 return __this_address; 436 break; 437 case XFS_DINODE_FMT_BTREE: 438 max_extents = xfs_iext_max_nextents( 439 xfs_dinode_has_large_extent_counts(dip), 440 whichfork); 441 if (di_nextents > max_extents) 442 return __this_address; 443 break; 444 case XFS_DINODE_FMT_META_BTREE: 445 if (!xfs_has_metadir(mp)) 446 return __this_address; 447 if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA))) 448 return __this_address; 449 switch (be16_to_cpu(dip->di_metatype)) { 450 case XFS_METAFILE_RTRMAP: 451 /* 452 * growfs must create the rtrmap inodes before adding a 453 * realtime volume to the filesystem, so we cannot use 454 * the rtrmapbt predicate here. 455 */ 456 if (!xfs_has_rmapbt(mp)) 457 return __this_address; 458 break; 459 case XFS_METAFILE_RTREFCOUNT: 460 /* same comment about growfs and rmap inodes applies */ 461 if (!xfs_has_reflink(mp)) 462 return __this_address; 463 break; 464 default: 465 return __this_address; 466 } 467 break; 468 default: 469 return __this_address; 470 } 471 return NULL; 472 } 473 474 static xfs_failaddr_t 475 xfs_dinode_verify_forkoff( 476 struct xfs_dinode *dip, 477 struct xfs_mount *mp) 478 { 479 if (!dip->di_forkoff) 480 return NULL; 481 482 switch (dip->di_format) { 483 case XFS_DINODE_FMT_DEV: 484 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 485 return __this_address; 486 break; 487 case XFS_DINODE_FMT_META_BTREE: 488 if (!xfs_has_metadir(mp) || !xfs_has_parent(mp)) 489 return __this_address; 490 fallthrough; 491 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 492 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 493 case XFS_DINODE_FMT_BTREE: 494 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 495 return __this_address; 496 break; 497 default: 498 return __this_address; 499 } 500 return NULL; 501 } 502 503 static xfs_failaddr_t 504 xfs_dinode_verify_nrext64( 505 struct xfs_mount *mp, 506 struct xfs_dinode *dip) 507 { 508 if (xfs_dinode_has_large_extent_counts(dip)) { 509 if (!xfs_has_large_extent_counts(mp)) 510 return __this_address; 511 if (dip->di_nrext64_pad != 0) 512 return __this_address; 513 } else if (dip->di_version >= 3) { 514 if (dip->di_v3_pad != 0) 515 return __this_address; 516 } 517 518 return NULL; 519 } 520 521 /* 522 * Validate all the picky requirements we have for a file that claims to be 523 * filesystem metadata. 524 */ 525 xfs_failaddr_t 526 xfs_dinode_verify_metadir( 527 struct xfs_mount *mp, 528 struct xfs_dinode *dip, 529 uint16_t mode, 530 uint16_t flags, 531 uint64_t flags2) 532 { 533 if (!xfs_has_metadir(mp)) 534 return __this_address; 535 536 /* V5 filesystem only */ 537 if (dip->di_version < 3) 538 return __this_address; 539 540 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 541 return __this_address; 542 543 /* V3 inode fields that are always zero */ 544 if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad) 545 return __this_address; 546 if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter) 547 return __this_address; 548 549 /* Metadata files can only be directories or regular files */ 550 if (!S_ISDIR(mode) && !S_ISREG(mode)) 551 return __this_address; 552 553 /* They must have zero access permissions */ 554 if (mode & 0777) 555 return __this_address; 556 557 /* DMAPI event and state masks are zero */ 558 if (dip->di_dmevmask || dip->di_dmstate) 559 return __this_address; 560 561 /* 562 * User and group IDs must be zero. The project ID is used for 563 * grouping inodes. Metadata inodes are never accounted to quotas. 564 */ 565 if (dip->di_uid || dip->di_gid) 566 return __this_address; 567 568 /* Mandatory inode flags must be set */ 569 if (S_ISDIR(mode)) { 570 if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS) 571 return __this_address; 572 } else { 573 if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS) 574 return __this_address; 575 } 576 577 /* dax flags2 must not be set */ 578 if (flags2 & XFS_DIFLAG2_DAX) 579 return __this_address; 580 581 return NULL; 582 } 583 584 xfs_failaddr_t 585 xfs_dinode_verify( 586 struct xfs_mount *mp, 587 xfs_ino_t ino, 588 struct xfs_dinode *dip) 589 { 590 xfs_failaddr_t fa; 591 uint16_t mode; 592 uint16_t flags; 593 uint64_t flags2; 594 uint64_t di_size; 595 xfs_extnum_t nextents; 596 xfs_extnum_t naextents; 597 xfs_filblks_t nblocks; 598 599 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 600 return __this_address; 601 602 /* Verify v3 integrity information first */ 603 if (dip->di_version >= 3) { 604 if (!xfs_has_v3inodes(mp)) 605 return __this_address; 606 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 607 XFS_DINODE_CRC_OFF)) 608 return __this_address; 609 if (be64_to_cpu(dip->di_ino) != ino) 610 return __this_address; 611 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 612 return __this_address; 613 } 614 615 /* 616 * Historical note: xfsprogs in the 3.2 era set up its incore inodes to 617 * have di_nlink track the link count, even if the actual filesystem 618 * only supported V1 inodes (i.e. di_onlink). When writing out the 619 * ondisk inode, it would set both the ondisk di_nlink and di_onlink to 620 * the the incore di_nlink value, which is why we cannot check for 621 * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with 622 * di_onlink==0, so we can check that. 623 */ 624 if (dip->di_version == 2) { 625 if (dip->di_metatype) 626 return __this_address; 627 } else if (dip->di_version >= 3) { 628 if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) 629 return __this_address; 630 } 631 632 /* don't allow invalid i_size */ 633 di_size = be64_to_cpu(dip->di_size); 634 if (di_size & (1ULL << 63)) 635 return __this_address; 636 637 mode = be16_to_cpu(dip->di_mode); 638 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 639 return __this_address; 640 641 /* 642 * No zero-length symlinks/dirs unless they're unlinked and hence being 643 * inactivated. 644 */ 645 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) { 646 if (dip->di_version > 1) { 647 if (dip->di_nlink) 648 return __this_address; 649 } else { 650 /* di_metatype used to be di_onlink */ 651 if (dip->di_metatype) 652 return __this_address; 653 } 654 } 655 656 fa = xfs_dinode_verify_nrext64(mp, dip); 657 if (fa) 658 return fa; 659 660 nextents = xfs_dfork_data_extents(dip); 661 naextents = xfs_dfork_attr_extents(dip); 662 nblocks = be64_to_cpu(dip->di_nblocks); 663 664 /* Fork checks carried over from xfs_iformat_fork */ 665 if (mode && nextents + naextents > nblocks) 666 return __this_address; 667 668 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 669 return __this_address; 670 671 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 672 return __this_address; 673 674 flags = be16_to_cpu(dip->di_flags); 675 676 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 677 return __this_address; 678 679 /* check for illegal values of forkoff */ 680 fa = xfs_dinode_verify_forkoff(dip, mp); 681 if (fa) 682 return fa; 683 684 /* Do we have appropriate data fork formats for the mode? */ 685 switch (mode & S_IFMT) { 686 case S_IFIFO: 687 case S_IFCHR: 688 case S_IFBLK: 689 case S_IFSOCK: 690 if (dip->di_format != XFS_DINODE_FMT_DEV) 691 return __this_address; 692 break; 693 case S_IFREG: 694 case S_IFLNK: 695 case S_IFDIR: 696 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 697 if (fa) 698 return fa; 699 break; 700 case 0: 701 /* Uninitialized inode ok. */ 702 break; 703 default: 704 return __this_address; 705 } 706 707 if (dip->di_forkoff) { 708 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 709 if (fa) 710 return fa; 711 } else { 712 /* 713 * If there is no fork offset, this may be a freshly-made inode 714 * in a new disk cluster, in which case di_aformat is zeroed. 715 * Otherwise, such an inode must be in EXTENTS format; this goes 716 * for freed inodes as well. 717 */ 718 switch (dip->di_aformat) { 719 case 0: 720 case XFS_DINODE_FMT_EXTENTS: 721 break; 722 default: 723 return __this_address; 724 } 725 if (naextents) 726 return __this_address; 727 } 728 729 /* extent size hint validation */ 730 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 731 mode, flags); 732 if (fa) 733 return fa; 734 735 /* only version 3 or greater inodes are extensively verified here */ 736 if (dip->di_version < 3) 737 return NULL; 738 739 flags2 = be64_to_cpu(dip->di_flags2); 740 741 /* don't allow reflink/cowextsize if we don't have reflink */ 742 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 743 !xfs_has_reflink(mp)) 744 return __this_address; 745 746 /* only regular files get reflink */ 747 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 748 return __this_address; 749 750 /* don't let reflink and realtime mix */ 751 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME) && 752 !xfs_has_rtreflink(mp)) 753 return __this_address; 754 755 /* COW extent size hint validation */ 756 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 757 mode, flags, flags2); 758 if (fa) 759 return fa; 760 761 /* bigtime iflag can only happen on bigtime filesystems */ 762 if (xfs_dinode_has_bigtime(dip) && 763 !xfs_has_bigtime(mp)) 764 return __this_address; 765 766 if (flags2 & XFS_DIFLAG2_METADATA) { 767 fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2); 768 if (fa) 769 return fa; 770 } 771 772 /* metadata inodes containing btrees always have zero extent count */ 773 if (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK) != XFS_DINODE_FMT_META_BTREE) { 774 if (nextents + naextents == 0 && nblocks != 0) 775 return __this_address; 776 } 777 778 return NULL; 779 } 780 781 void 782 xfs_dinode_calc_crc( 783 struct xfs_mount *mp, 784 struct xfs_dinode *dip) 785 { 786 uint32_t crc; 787 788 if (dip->di_version < 3) 789 return; 790 791 ASSERT(xfs_has_crc(mp)); 792 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 793 XFS_DINODE_CRC_OFF); 794 dip->di_crc = xfs_end_cksum(crc); 795 } 796 797 /* 798 * Validate di_extsize hint. 799 * 800 * 1. Extent size hint is only valid for directories and regular files. 801 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 802 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 803 * 4. Hint cannot be larger than MAXTEXTLEN. 804 * 5. Can be changed on directories at any time. 805 * 6. Hint value of 0 turns off hints, clears inode flags. 806 * 7. Extent size must be a multiple of the appropriate block size. 807 * For realtime files, this is the rt extent size. 808 * 8. For non-realtime files, the extent size hint must be limited 809 * to half the AG size to avoid alignment extending the extent beyond the 810 * limits of the AG. 811 */ 812 xfs_failaddr_t 813 xfs_inode_validate_extsize( 814 struct xfs_mount *mp, 815 uint32_t extsize, 816 uint16_t mode, 817 uint16_t flags) 818 { 819 bool rt_flag; 820 bool hint_flag; 821 bool inherit_flag; 822 uint32_t extsize_bytes; 823 uint32_t blocksize_bytes; 824 825 rt_flag = (flags & XFS_DIFLAG_REALTIME); 826 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 827 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 828 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 829 830 /* 831 * This comment describes a historic gap in this verifier function. 832 * 833 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 834 * function has never checked that the extent size hint is an integer 835 * multiple of the realtime extent size. Since we allow users to set 836 * this combination on non-rt filesystems /and/ to change the rt 837 * extent size when adding a rt device to a filesystem, the net effect 838 * is that users can configure a filesystem anticipating one rt 839 * geometry and change their minds later. Directories do not use the 840 * extent size hint, so this is harmless for them. 841 * 842 * If a directory with a misaligned extent size hint is allowed to 843 * propagate that hint into a new regular realtime file, the result 844 * is that the inode cluster buffer verifier will trigger a corruption 845 * shutdown the next time it is run, because the verifier has always 846 * enforced the alignment rule for regular files. 847 * 848 * Because we allow administrators to set a new rt extent size when 849 * adding a rt section, we cannot add a check to this verifier because 850 * that will result a new source of directory corruption errors when 851 * reading an existing filesystem. Instead, we rely on callers to 852 * decide when alignment checks are appropriate, and fix things up as 853 * needed. 854 */ 855 856 if (rt_flag) 857 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 858 else 859 blocksize_bytes = mp->m_sb.sb_blocksize; 860 861 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 862 return __this_address; 863 864 if (hint_flag && !S_ISREG(mode)) 865 return __this_address; 866 867 if (inherit_flag && !S_ISDIR(mode)) 868 return __this_address; 869 870 if ((hint_flag || inherit_flag) && extsize == 0) 871 return __this_address; 872 873 /* free inodes get flags set to zero but extsize remains */ 874 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 875 return __this_address; 876 877 if (extsize_bytes % blocksize_bytes) 878 return __this_address; 879 880 if (extsize > XFS_MAX_BMBT_EXTLEN) 881 return __this_address; 882 883 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 884 return __this_address; 885 886 return NULL; 887 } 888 889 /* 890 * Validate di_cowextsize hint. 891 * 892 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 893 * The inode does not have to have any shared blocks, but it must be a v3. 894 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 895 * for a directory, the hint is propagated to new files. 896 * 3. Can be changed on files & directories at any time. 897 * 4. Hint value of 0 turns off hints, clears inode flags. 898 * 5. Extent size must be a multiple of the appropriate block size. 899 * 6. The extent size hint must be limited to half the AG size to avoid 900 * alignment extending the extent beyond the limits of the AG. 901 */ 902 xfs_failaddr_t 903 xfs_inode_validate_cowextsize( 904 struct xfs_mount *mp, 905 uint32_t cowextsize, 906 uint16_t mode, 907 uint16_t flags, 908 uint64_t flags2) 909 { 910 bool rt_flag; 911 bool hint_flag; 912 uint32_t cowextsize_bytes; 913 uint32_t blocksize_bytes; 914 915 rt_flag = (flags & XFS_DIFLAG_REALTIME); 916 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 917 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 918 919 /* 920 * Similar to extent size hints, a directory can be configured to 921 * propagate realtime status and a CoW extent size hint to newly 922 * created files even if there is no realtime device, and the hints on 923 * disk can become misaligned if the sysadmin changes the rt extent 924 * size while adding the realtime device. 925 * 926 * Therefore, we can only enforce the rextsize alignment check against 927 * regular realtime files, and rely on callers to decide when alignment 928 * checks are appropriate, and fix things up as needed. 929 */ 930 931 if (rt_flag) 932 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 933 else 934 blocksize_bytes = mp->m_sb.sb_blocksize; 935 936 if (hint_flag && !xfs_has_reflink(mp)) 937 return __this_address; 938 939 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 940 return __this_address; 941 942 if (hint_flag && cowextsize == 0) 943 return __this_address; 944 945 /* free inodes get flags set to zero but cowextsize remains */ 946 if (mode && !hint_flag && cowextsize != 0) 947 return __this_address; 948 949 if (cowextsize_bytes % blocksize_bytes) 950 return __this_address; 951 952 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 953 return __this_address; 954 955 if (!rt_flag && cowextsize > mp->m_sb.sb_agblocks / 2) 956 return __this_address; 957 958 return NULL; 959 } 960