1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 23 #include <linux/iversion.h> 24 25 /* 26 * If we are doing readahead on an inode buffer, we might be in log recovery 27 * reading an inode allocation buffer that hasn't yet been replayed, and hence 28 * has not had the inode cores stamped into it. Hence for readahead, the buffer 29 * may be potentially invalid. 30 * 31 * If the readahead buffer is invalid, we need to mark it with an error and 32 * clear the DONE status of the buffer so that a followup read will re-read it 33 * from disk. We don't report the error otherwise to avoid warnings during log 34 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 35 * because all we want to do is say readahead failed; there is no-one to report 36 * the error to, so this will distinguish it from a non-ra verifier failure. 37 * Changes to this readahead error behaviour also need to be reflected in 38 * xfs_dquot_buf_readahead_verify(). 39 */ 40 static void 41 xfs_inode_buf_verify( 42 struct xfs_buf *bp, 43 bool readahead) 44 { 45 struct xfs_mount *mp = bp->b_mount; 46 int i; 47 int ni; 48 49 /* 50 * Validate the magic number and version of every inode in the buffer 51 */ 52 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 53 for (i = 0; i < ni; i++) { 54 struct xfs_dinode *dip; 55 xfs_agino_t unlinked_ino; 56 int di_ok; 57 58 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 59 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 60 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 61 xfs_dinode_good_version(mp, dip->di_version) && 62 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 63 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 64 XFS_ERRTAG_ITOBP_INOTOBP))) { 65 if (readahead) { 66 bp->b_flags &= ~XBF_DONE; 67 xfs_buf_ioerror(bp, -EIO); 68 return; 69 } 70 71 #ifdef DEBUG 72 xfs_alert(mp, 73 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 74 (unsigned long long)xfs_buf_daddr(bp), i, 75 be16_to_cpu(dip->di_magic)); 76 #endif 77 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 78 __func__, dip, sizeof(*dip), 79 NULL); 80 return; 81 } 82 } 83 } 84 85 86 static void 87 xfs_inode_buf_read_verify( 88 struct xfs_buf *bp) 89 { 90 xfs_inode_buf_verify(bp, false); 91 } 92 93 static void 94 xfs_inode_buf_readahead_verify( 95 struct xfs_buf *bp) 96 { 97 xfs_inode_buf_verify(bp, true); 98 } 99 100 static void 101 xfs_inode_buf_write_verify( 102 struct xfs_buf *bp) 103 { 104 xfs_inode_buf_verify(bp, false); 105 } 106 107 const struct xfs_buf_ops xfs_inode_buf_ops = { 108 .name = "xfs_inode", 109 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 110 cpu_to_be16(XFS_DINODE_MAGIC) }, 111 .verify_read = xfs_inode_buf_read_verify, 112 .verify_write = xfs_inode_buf_write_verify, 113 }; 114 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 116 .name = "xfs_inode_ra", 117 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 118 cpu_to_be16(XFS_DINODE_MAGIC) }, 119 .verify_read = xfs_inode_buf_readahead_verify, 120 .verify_write = xfs_inode_buf_write_verify, 121 }; 122 123 124 /* 125 * This routine is called to map an inode to the buffer containing the on-disk 126 * version of the inode. It returns a pointer to the buffer containing the 127 * on-disk inode in the bpp parameter. 128 */ 129 int 130 xfs_imap_to_bp( 131 struct xfs_mount *mp, 132 struct xfs_trans *tp, 133 struct xfs_imap *imap, 134 struct xfs_buf **bpp) 135 { 136 int error; 137 138 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 139 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 140 if (xfs_metadata_is_sick(error)) 141 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 142 XFS_SICK_AG_INODES); 143 return error; 144 } 145 146 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 147 { 148 struct timespec64 tv; 149 uint32_t n; 150 151 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 152 tv.tv_nsec = n; 153 154 return tv; 155 } 156 157 /* Convert an ondisk timestamp to an incore timestamp. */ 158 struct timespec64 159 xfs_inode_from_disk_ts( 160 struct xfs_dinode *dip, 161 const xfs_timestamp_t ts) 162 { 163 struct timespec64 tv; 164 struct xfs_legacy_timestamp *lts; 165 166 if (xfs_dinode_has_bigtime(dip)) 167 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 168 169 lts = (struct xfs_legacy_timestamp *)&ts; 170 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 171 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 172 173 return tv; 174 } 175 176 int 177 xfs_inode_from_disk( 178 struct xfs_inode *ip, 179 struct xfs_dinode *from) 180 { 181 struct inode *inode = VFS_I(ip); 182 int error; 183 xfs_failaddr_t fa; 184 185 ASSERT(ip->i_cowfp == NULL); 186 187 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 188 if (fa) { 189 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 190 sizeof(*from), fa); 191 return -EFSCORRUPTED; 192 } 193 194 /* 195 * First get the permanent information that is needed to allocate an 196 * inode. If the inode is unused, mode is zero and we shouldn't mess 197 * with the uninitialized part of it. 198 */ 199 if (!xfs_has_v3inodes(ip->i_mount)) 200 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 201 inode->i_generation = be32_to_cpu(from->di_gen); 202 inode->i_mode = be16_to_cpu(from->di_mode); 203 if (!inode->i_mode) 204 return 0; 205 206 /* 207 * Convert v1 inodes immediately to v2 inode format as this is the 208 * minimum inode version format we support in the rest of the code. 209 * They will also be unconditionally written back to disk as v2 inodes. 210 */ 211 if (unlikely(from->di_version == 1)) { 212 set_nlink(inode, be16_to_cpu(from->di_onlink)); 213 ip->i_projid = 0; 214 } else { 215 set_nlink(inode, be32_to_cpu(from->di_nlink)); 216 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 217 be16_to_cpu(from->di_projid_lo); 218 } 219 220 i_uid_write(inode, be32_to_cpu(from->di_uid)); 221 i_gid_write(inode, be32_to_cpu(from->di_gid)); 222 223 /* 224 * Time is signed, so need to convert to signed 32 bit before 225 * storing in inode timestamp which may be 64 bit. Otherwise 226 * a time before epoch is converted to a time long after epoch 227 * on 64 bit systems. 228 */ 229 inode_set_atime_to_ts(inode, 230 xfs_inode_from_disk_ts(from, from->di_atime)); 231 inode_set_mtime_to_ts(inode, 232 xfs_inode_from_disk_ts(from, from->di_mtime)); 233 inode_set_ctime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_ctime)); 235 236 ip->i_disk_size = be64_to_cpu(from->di_size); 237 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 238 ip->i_extsize = be32_to_cpu(from->di_extsize); 239 ip->i_forkoff = from->di_forkoff; 240 ip->i_diflags = be16_to_cpu(from->di_flags); 241 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 242 243 if (from->di_dmevmask || from->di_dmstate) 244 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 245 246 if (xfs_has_v3inodes(ip->i_mount)) { 247 inode_set_iversion_queried(inode, 248 be64_to_cpu(from->di_changecount)); 249 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 250 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 251 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 252 } 253 254 error = xfs_iformat_data_fork(ip, from); 255 if (error) 256 return error; 257 if (from->di_forkoff) { 258 error = xfs_iformat_attr_fork(ip, from); 259 if (error) 260 goto out_destroy_data_fork; 261 } 262 if (xfs_is_reflink_inode(ip)) 263 xfs_ifork_init_cow(ip); 264 return 0; 265 266 out_destroy_data_fork: 267 xfs_idestroy_fork(&ip->i_df); 268 return error; 269 } 270 271 /* Convert an incore timestamp to an ondisk timestamp. */ 272 static inline xfs_timestamp_t 273 xfs_inode_to_disk_ts( 274 struct xfs_inode *ip, 275 const struct timespec64 tv) 276 { 277 struct xfs_legacy_timestamp *lts; 278 xfs_timestamp_t ts; 279 280 if (xfs_inode_has_bigtime(ip)) 281 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 282 283 lts = (struct xfs_legacy_timestamp *)&ts; 284 lts->t_sec = cpu_to_be32(tv.tv_sec); 285 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 286 287 return ts; 288 } 289 290 static inline void 291 xfs_inode_to_disk_iext_counters( 292 struct xfs_inode *ip, 293 struct xfs_dinode *to) 294 { 295 if (xfs_inode_has_large_extent_counts(ip)) { 296 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 297 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 298 /* 299 * We might be upgrading the inode to use larger extent counters 300 * than was previously used. Hence zero the unused field. 301 */ 302 to->di_nrext64_pad = cpu_to_be16(0); 303 } else { 304 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 305 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 306 } 307 } 308 309 void 310 xfs_inode_to_disk( 311 struct xfs_inode *ip, 312 struct xfs_dinode *to, 313 xfs_lsn_t lsn) 314 { 315 struct inode *inode = VFS_I(ip); 316 317 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 318 to->di_onlink = 0; 319 320 to->di_format = xfs_ifork_format(&ip->i_df); 321 to->di_uid = cpu_to_be32(i_uid_read(inode)); 322 to->di_gid = cpu_to_be32(i_gid_read(inode)); 323 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 324 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 325 326 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 327 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 328 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 329 to->di_nlink = cpu_to_be32(inode->i_nlink); 330 to->di_gen = cpu_to_be32(inode->i_generation); 331 to->di_mode = cpu_to_be16(inode->i_mode); 332 333 to->di_size = cpu_to_be64(ip->i_disk_size); 334 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 335 to->di_extsize = cpu_to_be32(ip->i_extsize); 336 to->di_forkoff = ip->i_forkoff; 337 to->di_aformat = xfs_ifork_format(&ip->i_af); 338 to->di_flags = cpu_to_be16(ip->i_diflags); 339 340 if (xfs_has_v3inodes(ip->i_mount)) { 341 to->di_version = 3; 342 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 343 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 344 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 345 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 346 to->di_ino = cpu_to_be64(ip->i_ino); 347 to->di_lsn = cpu_to_be64(lsn); 348 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 349 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 350 to->di_v3_pad = 0; 351 } else { 352 to->di_version = 2; 353 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 354 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 355 } 356 357 xfs_inode_to_disk_iext_counters(ip, to); 358 } 359 360 static xfs_failaddr_t 361 xfs_dinode_verify_fork( 362 struct xfs_dinode *dip, 363 struct xfs_mount *mp, 364 int whichfork) 365 { 366 xfs_extnum_t di_nextents; 367 xfs_extnum_t max_extents; 368 mode_t mode = be16_to_cpu(dip->di_mode); 369 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 370 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 371 372 di_nextents = xfs_dfork_nextents(dip, whichfork); 373 374 /* 375 * For fork types that can contain local data, check that the fork 376 * format matches the size of local data contained within the fork. 377 * 378 * For all types, check that when the size says the should be in extent 379 * or btree format, the inode isn't claiming it is in local format. 380 */ 381 if (whichfork == XFS_DATA_FORK) { 382 if (S_ISDIR(mode) || S_ISLNK(mode)) { 383 if (be64_to_cpu(dip->di_size) <= fork_size && 384 fork_format != XFS_DINODE_FMT_LOCAL) 385 return __this_address; 386 } 387 388 if (be64_to_cpu(dip->di_size) > fork_size && 389 fork_format == XFS_DINODE_FMT_LOCAL) 390 return __this_address; 391 } 392 393 switch (fork_format) { 394 case XFS_DINODE_FMT_LOCAL: 395 /* 396 * No local regular files yet. 397 */ 398 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 399 return __this_address; 400 if (di_nextents) 401 return __this_address; 402 break; 403 case XFS_DINODE_FMT_EXTENTS: 404 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 405 return __this_address; 406 break; 407 case XFS_DINODE_FMT_BTREE: 408 max_extents = xfs_iext_max_nextents( 409 xfs_dinode_has_large_extent_counts(dip), 410 whichfork); 411 if (di_nextents > max_extents) 412 return __this_address; 413 break; 414 default: 415 return __this_address; 416 } 417 return NULL; 418 } 419 420 static xfs_failaddr_t 421 xfs_dinode_verify_forkoff( 422 struct xfs_dinode *dip, 423 struct xfs_mount *mp) 424 { 425 if (!dip->di_forkoff) 426 return NULL; 427 428 switch (dip->di_format) { 429 case XFS_DINODE_FMT_DEV: 430 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 431 return __this_address; 432 break; 433 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 434 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 435 case XFS_DINODE_FMT_BTREE: 436 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 437 return __this_address; 438 break; 439 default: 440 return __this_address; 441 } 442 return NULL; 443 } 444 445 static xfs_failaddr_t 446 xfs_dinode_verify_nrext64( 447 struct xfs_mount *mp, 448 struct xfs_dinode *dip) 449 { 450 if (xfs_dinode_has_large_extent_counts(dip)) { 451 if (!xfs_has_large_extent_counts(mp)) 452 return __this_address; 453 if (dip->di_nrext64_pad != 0) 454 return __this_address; 455 } else if (dip->di_version >= 3) { 456 if (dip->di_v3_pad != 0) 457 return __this_address; 458 } 459 460 return NULL; 461 } 462 463 xfs_failaddr_t 464 xfs_dinode_verify( 465 struct xfs_mount *mp, 466 xfs_ino_t ino, 467 struct xfs_dinode *dip) 468 { 469 xfs_failaddr_t fa; 470 uint16_t mode; 471 uint16_t flags; 472 uint64_t flags2; 473 uint64_t di_size; 474 xfs_extnum_t nextents; 475 xfs_extnum_t naextents; 476 xfs_filblks_t nblocks; 477 478 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 479 return __this_address; 480 481 /* Verify v3 integrity information first */ 482 if (dip->di_version >= 3) { 483 if (!xfs_has_v3inodes(mp)) 484 return __this_address; 485 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 486 XFS_DINODE_CRC_OFF)) 487 return __this_address; 488 if (be64_to_cpu(dip->di_ino) != ino) 489 return __this_address; 490 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 491 return __this_address; 492 } 493 494 if (dip->di_version > 1) { 495 if (dip->di_onlink) 496 return __this_address; 497 } else { 498 if (dip->di_nlink) 499 return __this_address; 500 } 501 502 /* don't allow invalid i_size */ 503 di_size = be64_to_cpu(dip->di_size); 504 if (di_size & (1ULL << 63)) 505 return __this_address; 506 507 mode = be16_to_cpu(dip->di_mode); 508 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 509 return __this_address; 510 511 /* No zero-length symlinks/dirs. */ 512 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) 513 return __this_address; 514 515 fa = xfs_dinode_verify_nrext64(mp, dip); 516 if (fa) 517 return fa; 518 519 nextents = xfs_dfork_data_extents(dip); 520 naextents = xfs_dfork_attr_extents(dip); 521 nblocks = be64_to_cpu(dip->di_nblocks); 522 523 /* Fork checks carried over from xfs_iformat_fork */ 524 if (mode && nextents + naextents > nblocks) 525 return __this_address; 526 527 if (nextents + naextents == 0 && nblocks != 0) 528 return __this_address; 529 530 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 531 return __this_address; 532 533 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 534 return __this_address; 535 536 flags = be16_to_cpu(dip->di_flags); 537 538 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 539 return __this_address; 540 541 /* check for illegal values of forkoff */ 542 fa = xfs_dinode_verify_forkoff(dip, mp); 543 if (fa) 544 return fa; 545 546 /* Do we have appropriate data fork formats for the mode? */ 547 switch (mode & S_IFMT) { 548 case S_IFIFO: 549 case S_IFCHR: 550 case S_IFBLK: 551 case S_IFSOCK: 552 if (dip->di_format != XFS_DINODE_FMT_DEV) 553 return __this_address; 554 break; 555 case S_IFREG: 556 case S_IFLNK: 557 case S_IFDIR: 558 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 559 if (fa) 560 return fa; 561 break; 562 case 0: 563 /* Uninitialized inode ok. */ 564 break; 565 default: 566 return __this_address; 567 } 568 569 if (dip->di_forkoff) { 570 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 571 if (fa) 572 return fa; 573 } else { 574 /* 575 * If there is no fork offset, this may be a freshly-made inode 576 * in a new disk cluster, in which case di_aformat is zeroed. 577 * Otherwise, such an inode must be in EXTENTS format; this goes 578 * for freed inodes as well. 579 */ 580 switch (dip->di_aformat) { 581 case 0: 582 case XFS_DINODE_FMT_EXTENTS: 583 break; 584 default: 585 return __this_address; 586 } 587 if (naextents) 588 return __this_address; 589 } 590 591 /* extent size hint validation */ 592 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 593 mode, flags); 594 if (fa) 595 return fa; 596 597 /* only version 3 or greater inodes are extensively verified here */ 598 if (dip->di_version < 3) 599 return NULL; 600 601 flags2 = be64_to_cpu(dip->di_flags2); 602 603 /* don't allow reflink/cowextsize if we don't have reflink */ 604 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 605 !xfs_has_reflink(mp)) 606 return __this_address; 607 608 /* only regular files get reflink */ 609 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 610 return __this_address; 611 612 /* don't let reflink and realtime mix */ 613 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 614 return __this_address; 615 616 /* COW extent size hint validation */ 617 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 618 mode, flags, flags2); 619 if (fa) 620 return fa; 621 622 /* bigtime iflag can only happen on bigtime filesystems */ 623 if (xfs_dinode_has_bigtime(dip) && 624 !xfs_has_bigtime(mp)) 625 return __this_address; 626 627 return NULL; 628 } 629 630 void 631 xfs_dinode_calc_crc( 632 struct xfs_mount *mp, 633 struct xfs_dinode *dip) 634 { 635 uint32_t crc; 636 637 if (dip->di_version < 3) 638 return; 639 640 ASSERT(xfs_has_crc(mp)); 641 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 642 XFS_DINODE_CRC_OFF); 643 dip->di_crc = xfs_end_cksum(crc); 644 } 645 646 /* 647 * Validate di_extsize hint. 648 * 649 * 1. Extent size hint is only valid for directories and regular files. 650 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 651 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 652 * 4. Hint cannot be larger than MAXTEXTLEN. 653 * 5. Can be changed on directories at any time. 654 * 6. Hint value of 0 turns off hints, clears inode flags. 655 * 7. Extent size must be a multiple of the appropriate block size. 656 * For realtime files, this is the rt extent size. 657 * 8. For non-realtime files, the extent size hint must be limited 658 * to half the AG size to avoid alignment extending the extent beyond the 659 * limits of the AG. 660 */ 661 xfs_failaddr_t 662 xfs_inode_validate_extsize( 663 struct xfs_mount *mp, 664 uint32_t extsize, 665 uint16_t mode, 666 uint16_t flags) 667 { 668 bool rt_flag; 669 bool hint_flag; 670 bool inherit_flag; 671 uint32_t extsize_bytes; 672 uint32_t blocksize_bytes; 673 674 rt_flag = (flags & XFS_DIFLAG_REALTIME); 675 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 676 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 677 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 678 679 /* 680 * This comment describes a historic gap in this verifier function. 681 * 682 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 683 * function has never checked that the extent size hint is an integer 684 * multiple of the realtime extent size. Since we allow users to set 685 * this combination on non-rt filesystems /and/ to change the rt 686 * extent size when adding a rt device to a filesystem, the net effect 687 * is that users can configure a filesystem anticipating one rt 688 * geometry and change their minds later. Directories do not use the 689 * extent size hint, so this is harmless for them. 690 * 691 * If a directory with a misaligned extent size hint is allowed to 692 * propagate that hint into a new regular realtime file, the result 693 * is that the inode cluster buffer verifier will trigger a corruption 694 * shutdown the next time it is run, because the verifier has always 695 * enforced the alignment rule for regular files. 696 * 697 * Because we allow administrators to set a new rt extent size when 698 * adding a rt section, we cannot add a check to this verifier because 699 * that will result a new source of directory corruption errors when 700 * reading an existing filesystem. Instead, we rely on callers to 701 * decide when alignment checks are appropriate, and fix things up as 702 * needed. 703 */ 704 705 if (rt_flag) 706 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 707 else 708 blocksize_bytes = mp->m_sb.sb_blocksize; 709 710 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 711 return __this_address; 712 713 if (hint_flag && !S_ISREG(mode)) 714 return __this_address; 715 716 if (inherit_flag && !S_ISDIR(mode)) 717 return __this_address; 718 719 if ((hint_flag || inherit_flag) && extsize == 0) 720 return __this_address; 721 722 /* free inodes get flags set to zero but extsize remains */ 723 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 724 return __this_address; 725 726 if (extsize_bytes % blocksize_bytes) 727 return __this_address; 728 729 if (extsize > XFS_MAX_BMBT_EXTLEN) 730 return __this_address; 731 732 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 733 return __this_address; 734 735 return NULL; 736 } 737 738 /* 739 * Validate di_cowextsize hint. 740 * 741 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 742 * The inode does not have to have any shared blocks, but it must be a v3. 743 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 744 * for a directory, the hint is propagated to new files. 745 * 3. Can be changed on files & directories at any time. 746 * 4. Hint value of 0 turns off hints, clears inode flags. 747 * 5. Extent size must be a multiple of the appropriate block size. 748 * 6. The extent size hint must be limited to half the AG size to avoid 749 * alignment extending the extent beyond the limits of the AG. 750 */ 751 xfs_failaddr_t 752 xfs_inode_validate_cowextsize( 753 struct xfs_mount *mp, 754 uint32_t cowextsize, 755 uint16_t mode, 756 uint16_t flags, 757 uint64_t flags2) 758 { 759 bool rt_flag; 760 bool hint_flag; 761 uint32_t cowextsize_bytes; 762 763 rt_flag = (flags & XFS_DIFLAG_REALTIME); 764 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 765 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 766 767 if (hint_flag && !xfs_has_reflink(mp)) 768 return __this_address; 769 770 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 771 return __this_address; 772 773 if (hint_flag && cowextsize == 0) 774 return __this_address; 775 776 /* free inodes get flags set to zero but cowextsize remains */ 777 if (mode && !hint_flag && cowextsize != 0) 778 return __this_address; 779 780 if (hint_flag && rt_flag) 781 return __this_address; 782 783 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 784 return __this_address; 785 786 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 787 return __this_address; 788 789 if (cowextsize > mp->m_sb.sb_agblocks / 2) 790 return __this_address; 791 792 return NULL; 793 } 794