1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_ag.h" 14 #include "xfs_inode.h" 15 #include "xfs_errortag.h" 16 #include "xfs_error.h" 17 #include "xfs_icache.h" 18 #include "xfs_trans.h" 19 #include "xfs_ialloc.h" 20 #include "xfs_dir2.h" 21 #include "xfs_health.h" 22 23 #include <linux/iversion.h> 24 25 /* 26 * If we are doing readahead on an inode buffer, we might be in log recovery 27 * reading an inode allocation buffer that hasn't yet been replayed, and hence 28 * has not had the inode cores stamped into it. Hence for readahead, the buffer 29 * may be potentially invalid. 30 * 31 * If the readahead buffer is invalid, we need to mark it with an error and 32 * clear the DONE status of the buffer so that a followup read will re-read it 33 * from disk. We don't report the error otherwise to avoid warnings during log 34 * recovery and we don't get unnecessary panics on debug kernels. We use EIO here 35 * because all we want to do is say readahead failed; there is no-one to report 36 * the error to, so this will distinguish it from a non-ra verifier failure. 37 * Changes to this readahead error behaviour also need to be reflected in 38 * xfs_dquot_buf_readahead_verify(). 39 */ 40 static void 41 xfs_inode_buf_verify( 42 struct xfs_buf *bp, 43 bool readahead) 44 { 45 struct xfs_mount *mp = bp->b_mount; 46 int i; 47 int ni; 48 49 /* 50 * Validate the magic number and version of every inode in the buffer 51 */ 52 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 53 for (i = 0; i < ni; i++) { 54 struct xfs_dinode *dip; 55 xfs_agino_t unlinked_ino; 56 int di_ok; 57 58 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); 59 unlinked_ino = be32_to_cpu(dip->di_next_unlinked); 60 di_ok = xfs_verify_magic16(bp, dip->di_magic) && 61 xfs_dinode_good_version(mp, dip->di_version) && 62 xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); 63 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 64 XFS_ERRTAG_ITOBP_INOTOBP))) { 65 if (readahead) { 66 bp->b_flags &= ~XBF_DONE; 67 xfs_buf_ioerror(bp, -EIO); 68 return; 69 } 70 71 #ifdef DEBUG 72 xfs_alert(mp, 73 "bad inode magic/vsn daddr %lld #%d (magic=%x)", 74 (unsigned long long)xfs_buf_daddr(bp), i, 75 be16_to_cpu(dip->di_magic)); 76 #endif 77 xfs_buf_verifier_error(bp, -EFSCORRUPTED, 78 __func__, dip, sizeof(*dip), 79 NULL); 80 return; 81 } 82 } 83 } 84 85 86 static void 87 xfs_inode_buf_read_verify( 88 struct xfs_buf *bp) 89 { 90 xfs_inode_buf_verify(bp, false); 91 } 92 93 static void 94 xfs_inode_buf_readahead_verify( 95 struct xfs_buf *bp) 96 { 97 xfs_inode_buf_verify(bp, true); 98 } 99 100 static void 101 xfs_inode_buf_write_verify( 102 struct xfs_buf *bp) 103 { 104 xfs_inode_buf_verify(bp, false); 105 } 106 107 const struct xfs_buf_ops xfs_inode_buf_ops = { 108 .name = "xfs_inode", 109 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 110 cpu_to_be16(XFS_DINODE_MAGIC) }, 111 .verify_read = xfs_inode_buf_read_verify, 112 .verify_write = xfs_inode_buf_write_verify, 113 }; 114 115 const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 116 .name = "xfs_inode_ra", 117 .magic16 = { cpu_to_be16(XFS_DINODE_MAGIC), 118 cpu_to_be16(XFS_DINODE_MAGIC) }, 119 .verify_read = xfs_inode_buf_readahead_verify, 120 .verify_write = xfs_inode_buf_write_verify, 121 }; 122 123 124 /* 125 * This routine is called to map an inode to the buffer containing the on-disk 126 * version of the inode. It returns a pointer to the buffer containing the 127 * on-disk inode in the bpp parameter. 128 */ 129 int 130 xfs_imap_to_bp( 131 struct xfs_mount *mp, 132 struct xfs_trans *tp, 133 struct xfs_imap *imap, 134 struct xfs_buf **bpp) 135 { 136 int error; 137 138 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, 139 imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); 140 if (xfs_metadata_is_sick(error)) 141 xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), 142 XFS_SICK_AG_INODES); 143 return error; 144 } 145 146 static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) 147 { 148 struct timespec64 tv; 149 uint32_t n; 150 151 tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n)); 152 tv.tv_nsec = n; 153 154 return tv; 155 } 156 157 /* Convert an ondisk timestamp to an incore timestamp. */ 158 struct timespec64 159 xfs_inode_from_disk_ts( 160 struct xfs_dinode *dip, 161 const xfs_timestamp_t ts) 162 { 163 struct timespec64 tv; 164 struct xfs_legacy_timestamp *lts; 165 166 if (xfs_dinode_has_bigtime(dip)) 167 return xfs_inode_decode_bigtime(be64_to_cpu(ts)); 168 169 lts = (struct xfs_legacy_timestamp *)&ts; 170 tv.tv_sec = (int)be32_to_cpu(lts->t_sec); 171 tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec); 172 173 return tv; 174 } 175 176 int 177 xfs_inode_from_disk( 178 struct xfs_inode *ip, 179 struct xfs_dinode *from) 180 { 181 struct inode *inode = VFS_I(ip); 182 int error; 183 xfs_failaddr_t fa; 184 185 ASSERT(ip->i_cowfp == NULL); 186 187 fa = xfs_dinode_verify(ip->i_mount, ip->i_ino, from); 188 if (fa) { 189 xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", from, 190 sizeof(*from), fa); 191 return -EFSCORRUPTED; 192 } 193 194 /* 195 * First get the permanent information that is needed to allocate an 196 * inode. If the inode is unused, mode is zero and we shouldn't mess 197 * with the uninitialized part of it. 198 */ 199 if (!xfs_has_v3inodes(ip->i_mount)) 200 ip->i_flushiter = be16_to_cpu(from->di_flushiter); 201 inode->i_generation = be32_to_cpu(from->di_gen); 202 inode->i_mode = be16_to_cpu(from->di_mode); 203 if (!inode->i_mode) 204 return 0; 205 206 /* 207 * Convert v1 inodes immediately to v2 inode format as this is the 208 * minimum inode version format we support in the rest of the code. 209 * They will also be unconditionally written back to disk as v2 inodes. 210 */ 211 if (unlikely(from->di_version == 1)) { 212 set_nlink(inode, be16_to_cpu(from->di_onlink)); 213 ip->i_projid = 0; 214 } else { 215 set_nlink(inode, be32_to_cpu(from->di_nlink)); 216 ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | 217 be16_to_cpu(from->di_projid_lo); 218 } 219 220 i_uid_write(inode, be32_to_cpu(from->di_uid)); 221 i_gid_write(inode, be32_to_cpu(from->di_gid)); 222 223 /* 224 * Time is signed, so need to convert to signed 32 bit before 225 * storing in inode timestamp which may be 64 bit. Otherwise 226 * a time before epoch is converted to a time long after epoch 227 * on 64 bit systems. 228 */ 229 inode_set_atime_to_ts(inode, 230 xfs_inode_from_disk_ts(from, from->di_atime)); 231 inode_set_mtime_to_ts(inode, 232 xfs_inode_from_disk_ts(from, from->di_mtime)); 233 inode_set_ctime_to_ts(inode, 234 xfs_inode_from_disk_ts(from, from->di_ctime)); 235 236 ip->i_disk_size = be64_to_cpu(from->di_size); 237 ip->i_nblocks = be64_to_cpu(from->di_nblocks); 238 ip->i_extsize = be32_to_cpu(from->di_extsize); 239 ip->i_forkoff = from->di_forkoff; 240 ip->i_diflags = be16_to_cpu(from->di_flags); 241 ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); 242 243 if (from->di_dmevmask || from->di_dmstate) 244 xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); 245 246 if (xfs_has_v3inodes(ip->i_mount)) { 247 inode_set_iversion_queried(inode, 248 be64_to_cpu(from->di_changecount)); 249 ip->i_crtime = xfs_inode_from_disk_ts(from, from->di_crtime); 250 ip->i_diflags2 = be64_to_cpu(from->di_flags2); 251 ip->i_cowextsize = be32_to_cpu(from->di_cowextsize); 252 } 253 254 error = xfs_iformat_data_fork(ip, from); 255 if (error) 256 return error; 257 if (from->di_forkoff) { 258 error = xfs_iformat_attr_fork(ip, from); 259 if (error) 260 goto out_destroy_data_fork; 261 } 262 if (xfs_is_reflink_inode(ip)) 263 xfs_ifork_init_cow(ip); 264 return 0; 265 266 out_destroy_data_fork: 267 xfs_idestroy_fork(&ip->i_df); 268 return error; 269 } 270 271 /* Convert an incore timestamp to an ondisk timestamp. */ 272 static inline xfs_timestamp_t 273 xfs_inode_to_disk_ts( 274 struct xfs_inode *ip, 275 const struct timespec64 tv) 276 { 277 struct xfs_legacy_timestamp *lts; 278 xfs_timestamp_t ts; 279 280 if (xfs_inode_has_bigtime(ip)) 281 return cpu_to_be64(xfs_inode_encode_bigtime(tv)); 282 283 lts = (struct xfs_legacy_timestamp *)&ts; 284 lts->t_sec = cpu_to_be32(tv.tv_sec); 285 lts->t_nsec = cpu_to_be32(tv.tv_nsec); 286 287 return ts; 288 } 289 290 static inline void 291 xfs_inode_to_disk_iext_counters( 292 struct xfs_inode *ip, 293 struct xfs_dinode *to) 294 { 295 if (xfs_inode_has_large_extent_counts(ip)) { 296 to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df)); 297 to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_af)); 298 /* 299 * We might be upgrading the inode to use larger extent counters 300 * than was previously used. Hence zero the unused field. 301 */ 302 to->di_nrext64_pad = cpu_to_be16(0); 303 } else { 304 to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df)); 305 to->di_anextents = cpu_to_be16(xfs_ifork_nextents(&ip->i_af)); 306 } 307 } 308 309 void 310 xfs_inode_to_disk( 311 struct xfs_inode *ip, 312 struct xfs_dinode *to, 313 xfs_lsn_t lsn) 314 { 315 struct inode *inode = VFS_I(ip); 316 317 to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 318 to->di_onlink = 0; 319 320 to->di_format = xfs_ifork_format(&ip->i_df); 321 to->di_uid = cpu_to_be32(i_uid_read(inode)); 322 to->di_gid = cpu_to_be32(i_gid_read(inode)); 323 to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff); 324 to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16); 325 326 to->di_atime = xfs_inode_to_disk_ts(ip, inode_get_atime(inode)); 327 to->di_mtime = xfs_inode_to_disk_ts(ip, inode_get_mtime(inode)); 328 to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode)); 329 to->di_nlink = cpu_to_be32(inode->i_nlink); 330 to->di_gen = cpu_to_be32(inode->i_generation); 331 to->di_mode = cpu_to_be16(inode->i_mode); 332 333 to->di_size = cpu_to_be64(ip->i_disk_size); 334 to->di_nblocks = cpu_to_be64(ip->i_nblocks); 335 to->di_extsize = cpu_to_be32(ip->i_extsize); 336 to->di_forkoff = ip->i_forkoff; 337 to->di_aformat = xfs_ifork_format(&ip->i_af); 338 to->di_flags = cpu_to_be16(ip->i_diflags); 339 340 if (xfs_has_v3inodes(ip->i_mount)) { 341 to->di_version = 3; 342 to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); 343 to->di_crtime = xfs_inode_to_disk_ts(ip, ip->i_crtime); 344 to->di_flags2 = cpu_to_be64(ip->i_diflags2); 345 to->di_cowextsize = cpu_to_be32(ip->i_cowextsize); 346 to->di_ino = cpu_to_be64(ip->i_ino); 347 to->di_lsn = cpu_to_be64(lsn); 348 memset(to->di_pad2, 0, sizeof(to->di_pad2)); 349 uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); 350 to->di_v3_pad = 0; 351 } else { 352 to->di_version = 2; 353 to->di_flushiter = cpu_to_be16(ip->i_flushiter); 354 memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); 355 } 356 357 xfs_inode_to_disk_iext_counters(ip, to); 358 } 359 360 static xfs_failaddr_t 361 xfs_dinode_verify_fork( 362 struct xfs_dinode *dip, 363 struct xfs_mount *mp, 364 int whichfork) 365 { 366 xfs_extnum_t di_nextents; 367 xfs_extnum_t max_extents; 368 mode_t mode = be16_to_cpu(dip->di_mode); 369 uint32_t fork_size = XFS_DFORK_SIZE(dip, mp, whichfork); 370 uint32_t fork_format = XFS_DFORK_FORMAT(dip, whichfork); 371 372 di_nextents = xfs_dfork_nextents(dip, whichfork); 373 374 /* 375 * For fork types that can contain local data, check that the fork 376 * format matches the size of local data contained within the fork. 377 * 378 * For all types, check that when the size says the should be in extent 379 * or btree format, the inode isn't claiming it is in local format. 380 */ 381 if (whichfork == XFS_DATA_FORK) { 382 if (S_ISDIR(mode) || S_ISLNK(mode)) { 383 if (be64_to_cpu(dip->di_size) <= fork_size && 384 fork_format != XFS_DINODE_FMT_LOCAL) 385 return __this_address; 386 } 387 388 if (be64_to_cpu(dip->di_size) > fork_size && 389 fork_format == XFS_DINODE_FMT_LOCAL) 390 return __this_address; 391 } 392 393 switch (fork_format) { 394 case XFS_DINODE_FMT_LOCAL: 395 /* 396 * No local regular files yet. 397 */ 398 if (S_ISREG(mode) && whichfork == XFS_DATA_FORK) 399 return __this_address; 400 if (di_nextents) 401 return __this_address; 402 break; 403 case XFS_DINODE_FMT_EXTENTS: 404 if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) 405 return __this_address; 406 break; 407 case XFS_DINODE_FMT_BTREE: 408 max_extents = xfs_iext_max_nextents( 409 xfs_dinode_has_large_extent_counts(dip), 410 whichfork); 411 if (di_nextents > max_extents) 412 return __this_address; 413 break; 414 default: 415 return __this_address; 416 } 417 return NULL; 418 } 419 420 static xfs_failaddr_t 421 xfs_dinode_verify_forkoff( 422 struct xfs_dinode *dip, 423 struct xfs_mount *mp) 424 { 425 if (!dip->di_forkoff) 426 return NULL; 427 428 switch (dip->di_format) { 429 case XFS_DINODE_FMT_DEV: 430 if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 431 return __this_address; 432 break; 433 case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 434 case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 435 case XFS_DINODE_FMT_BTREE: 436 if (dip->di_forkoff >= (XFS_LITINO(mp) >> 3)) 437 return __this_address; 438 break; 439 default: 440 return __this_address; 441 } 442 return NULL; 443 } 444 445 static xfs_failaddr_t 446 xfs_dinode_verify_nrext64( 447 struct xfs_mount *mp, 448 struct xfs_dinode *dip) 449 { 450 if (xfs_dinode_has_large_extent_counts(dip)) { 451 if (!xfs_has_large_extent_counts(mp)) 452 return __this_address; 453 if (dip->di_nrext64_pad != 0) 454 return __this_address; 455 } else if (dip->di_version >= 3) { 456 if (dip->di_v3_pad != 0) 457 return __this_address; 458 } 459 460 return NULL; 461 } 462 463 xfs_failaddr_t 464 xfs_dinode_verify( 465 struct xfs_mount *mp, 466 xfs_ino_t ino, 467 struct xfs_dinode *dip) 468 { 469 xfs_failaddr_t fa; 470 uint16_t mode; 471 uint16_t flags; 472 uint64_t flags2; 473 uint64_t di_size; 474 xfs_extnum_t nextents; 475 xfs_extnum_t naextents; 476 xfs_filblks_t nblocks; 477 478 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) 479 return __this_address; 480 481 /* Verify v3 integrity information first */ 482 if (dip->di_version >= 3) { 483 if (!xfs_has_v3inodes(mp)) 484 return __this_address; 485 if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 486 XFS_DINODE_CRC_OFF)) 487 return __this_address; 488 if (be64_to_cpu(dip->di_ino) != ino) 489 return __this_address; 490 if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid)) 491 return __this_address; 492 } 493 494 /* don't allow invalid i_size */ 495 di_size = be64_to_cpu(dip->di_size); 496 if (di_size & (1ULL << 63)) 497 return __this_address; 498 499 mode = be16_to_cpu(dip->di_mode); 500 if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN) 501 return __this_address; 502 503 /* No zero-length symlinks/dirs. */ 504 if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) 505 return __this_address; 506 507 fa = xfs_dinode_verify_nrext64(mp, dip); 508 if (fa) 509 return fa; 510 511 nextents = xfs_dfork_data_extents(dip); 512 naextents = xfs_dfork_attr_extents(dip); 513 nblocks = be64_to_cpu(dip->di_nblocks); 514 515 /* Fork checks carried over from xfs_iformat_fork */ 516 if (mode && nextents + naextents > nblocks) 517 return __this_address; 518 519 if (nextents + naextents == 0 && nblocks != 0) 520 return __this_address; 521 522 if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) 523 return __this_address; 524 525 if (mode && XFS_DFORK_BOFF(dip) > mp->m_sb.sb_inodesize) 526 return __this_address; 527 528 flags = be16_to_cpu(dip->di_flags); 529 530 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 531 return __this_address; 532 533 /* check for illegal values of forkoff */ 534 fa = xfs_dinode_verify_forkoff(dip, mp); 535 if (fa) 536 return fa; 537 538 /* Do we have appropriate data fork formats for the mode? */ 539 switch (mode & S_IFMT) { 540 case S_IFIFO: 541 case S_IFCHR: 542 case S_IFBLK: 543 case S_IFSOCK: 544 if (dip->di_format != XFS_DINODE_FMT_DEV) 545 return __this_address; 546 break; 547 case S_IFREG: 548 case S_IFLNK: 549 case S_IFDIR: 550 fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); 551 if (fa) 552 return fa; 553 break; 554 case 0: 555 /* Uninitialized inode ok. */ 556 break; 557 default: 558 return __this_address; 559 } 560 561 if (dip->di_forkoff) { 562 fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); 563 if (fa) 564 return fa; 565 } else { 566 /* 567 * If there is no fork offset, this may be a freshly-made inode 568 * in a new disk cluster, in which case di_aformat is zeroed. 569 * Otherwise, such an inode must be in EXTENTS format; this goes 570 * for freed inodes as well. 571 */ 572 switch (dip->di_aformat) { 573 case 0: 574 case XFS_DINODE_FMT_EXTENTS: 575 break; 576 default: 577 return __this_address; 578 } 579 if (naextents) 580 return __this_address; 581 } 582 583 /* extent size hint validation */ 584 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 585 mode, flags); 586 if (fa) 587 return fa; 588 589 /* only version 3 or greater inodes are extensively verified here */ 590 if (dip->di_version < 3) 591 return NULL; 592 593 flags2 = be64_to_cpu(dip->di_flags2); 594 595 /* don't allow reflink/cowextsize if we don't have reflink */ 596 if ((flags2 & (XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)) && 597 !xfs_has_reflink(mp)) 598 return __this_address; 599 600 /* only regular files get reflink */ 601 if ((flags2 & XFS_DIFLAG2_REFLINK) && (mode & S_IFMT) != S_IFREG) 602 return __this_address; 603 604 /* don't let reflink and realtime mix */ 605 if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) 606 return __this_address; 607 608 /* COW extent size hint validation */ 609 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 610 mode, flags, flags2); 611 if (fa) 612 return fa; 613 614 /* bigtime iflag can only happen on bigtime filesystems */ 615 if (xfs_dinode_has_bigtime(dip) && 616 !xfs_has_bigtime(mp)) 617 return __this_address; 618 619 return NULL; 620 } 621 622 void 623 xfs_dinode_calc_crc( 624 struct xfs_mount *mp, 625 struct xfs_dinode *dip) 626 { 627 uint32_t crc; 628 629 if (dip->di_version < 3) 630 return; 631 632 ASSERT(xfs_has_crc(mp)); 633 crc = xfs_start_cksum_update((char *)dip, mp->m_sb.sb_inodesize, 634 XFS_DINODE_CRC_OFF); 635 dip->di_crc = xfs_end_cksum(crc); 636 } 637 638 /* 639 * Validate di_extsize hint. 640 * 641 * 1. Extent size hint is only valid for directories and regular files. 642 * 2. FS_XFLAG_EXTSIZE is only valid for regular files. 643 * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories. 644 * 4. Hint cannot be larger than MAXTEXTLEN. 645 * 5. Can be changed on directories at any time. 646 * 6. Hint value of 0 turns off hints, clears inode flags. 647 * 7. Extent size must be a multiple of the appropriate block size. 648 * For realtime files, this is the rt extent size. 649 * 8. For non-realtime files, the extent size hint must be limited 650 * to half the AG size to avoid alignment extending the extent beyond the 651 * limits of the AG. 652 */ 653 xfs_failaddr_t 654 xfs_inode_validate_extsize( 655 struct xfs_mount *mp, 656 uint32_t extsize, 657 uint16_t mode, 658 uint16_t flags) 659 { 660 bool rt_flag; 661 bool hint_flag; 662 bool inherit_flag; 663 uint32_t extsize_bytes; 664 uint32_t blocksize_bytes; 665 666 rt_flag = (flags & XFS_DIFLAG_REALTIME); 667 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 668 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 669 extsize_bytes = XFS_FSB_TO_B(mp, extsize); 670 671 /* 672 * This comment describes a historic gap in this verifier function. 673 * 674 * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this 675 * function has never checked that the extent size hint is an integer 676 * multiple of the realtime extent size. Since we allow users to set 677 * this combination on non-rt filesystems /and/ to change the rt 678 * extent size when adding a rt device to a filesystem, the net effect 679 * is that users can configure a filesystem anticipating one rt 680 * geometry and change their minds later. Directories do not use the 681 * extent size hint, so this is harmless for them. 682 * 683 * If a directory with a misaligned extent size hint is allowed to 684 * propagate that hint into a new regular realtime file, the result 685 * is that the inode cluster buffer verifier will trigger a corruption 686 * shutdown the next time it is run, because the verifier has always 687 * enforced the alignment rule for regular files. 688 * 689 * Because we allow administrators to set a new rt extent size when 690 * adding a rt section, we cannot add a check to this verifier because 691 * that will result a new source of directory corruption errors when 692 * reading an existing filesystem. Instead, we rely on callers to 693 * decide when alignment checks are appropriate, and fix things up as 694 * needed. 695 */ 696 697 if (rt_flag) 698 blocksize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 699 else 700 blocksize_bytes = mp->m_sb.sb_blocksize; 701 702 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 703 return __this_address; 704 705 if (hint_flag && !S_ISREG(mode)) 706 return __this_address; 707 708 if (inherit_flag && !S_ISDIR(mode)) 709 return __this_address; 710 711 if ((hint_flag || inherit_flag) && extsize == 0) 712 return __this_address; 713 714 /* free inodes get flags set to zero but extsize remains */ 715 if (mode && !(hint_flag || inherit_flag) && extsize != 0) 716 return __this_address; 717 718 if (extsize_bytes % blocksize_bytes) 719 return __this_address; 720 721 if (extsize > XFS_MAX_BMBT_EXTLEN) 722 return __this_address; 723 724 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 725 return __this_address; 726 727 return NULL; 728 } 729 730 /* 731 * Validate di_cowextsize hint. 732 * 733 * 1. CoW extent size hint can only be set if reflink is enabled on the fs. 734 * The inode does not have to have any shared blocks, but it must be a v3. 735 * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files; 736 * for a directory, the hint is propagated to new files. 737 * 3. Can be changed on files & directories at any time. 738 * 4. Hint value of 0 turns off hints, clears inode flags. 739 * 5. Extent size must be a multiple of the appropriate block size. 740 * 6. The extent size hint must be limited to half the AG size to avoid 741 * alignment extending the extent beyond the limits of the AG. 742 */ 743 xfs_failaddr_t 744 xfs_inode_validate_cowextsize( 745 struct xfs_mount *mp, 746 uint32_t cowextsize, 747 uint16_t mode, 748 uint16_t flags, 749 uint64_t flags2) 750 { 751 bool rt_flag; 752 bool hint_flag; 753 uint32_t cowextsize_bytes; 754 755 rt_flag = (flags & XFS_DIFLAG_REALTIME); 756 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 757 cowextsize_bytes = XFS_FSB_TO_B(mp, cowextsize); 758 759 if (hint_flag && !xfs_has_reflink(mp)) 760 return __this_address; 761 762 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 763 return __this_address; 764 765 if (hint_flag && cowextsize == 0) 766 return __this_address; 767 768 /* free inodes get flags set to zero but cowextsize remains */ 769 if (mode && !hint_flag && cowextsize != 0) 770 return __this_address; 771 772 if (hint_flag && rt_flag) 773 return __this_address; 774 775 if (cowextsize_bytes % mp->m_sb.sb_blocksize) 776 return __this_address; 777 778 if (cowextsize > XFS_MAX_BMBT_EXTLEN) 779 return __this_address; 780 781 if (cowextsize > mp->m_sb.sb_agblocks / 2) 782 return __this_address; 783 784 return NULL; 785 } 786