1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap_btree.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_util.h" 19 #include "xfs_errortag.h" 20 #include "xfs_error.h" 21 #include "xfs_trans.h" 22 #include "xfs_trans_space.h" 23 #include "xfs_inode_item.h" 24 #include "xfs_iomap.h" 25 #include "xfs_trace.h" 26 #include "xfs_quota.h" 27 #include "xfs_dquot_item.h" 28 #include "xfs_dquot.h" 29 #include "xfs_reflink.h" 30 31 #define XFS_ALLOC_ALIGN(mp, off) \ 32 (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) 33 34 static int 35 xfs_alert_fsblock_zero( 36 xfs_inode_t *ip, 37 xfs_bmbt_irec_t *imap) 38 { 39 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 40 "Access to block zero in inode %llu " 41 "start_block: %llx start_off: %llx " 42 "blkcnt: %llx extent-state: %x", 43 (unsigned long long)ip->i_ino, 44 (unsigned long long)imap->br_startblock, 45 (unsigned long long)imap->br_startoff, 46 (unsigned long long)imap->br_blockcount, 47 imap->br_state); 48 return -EFSCORRUPTED; 49 } 50 51 u64 52 xfs_iomap_inode_sequence( 53 struct xfs_inode *ip, 54 u16 iomap_flags) 55 { 56 u64 cookie = 0; 57 58 if (iomap_flags & IOMAP_F_XATTR) 59 return READ_ONCE(ip->i_af.if_seq); 60 if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) 61 cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; 62 return cookie | READ_ONCE(ip->i_df.if_seq); 63 } 64 65 /* 66 * Check that the iomap passed to us is still valid for the given offset and 67 * length. 68 */ 69 static bool 70 xfs_iomap_valid( 71 struct inode *inode, 72 const struct iomap *iomap) 73 { 74 struct xfs_inode *ip = XFS_I(inode); 75 76 if (iomap->validity_cookie != 77 xfs_iomap_inode_sequence(ip, iomap->flags)) { 78 trace_xfs_iomap_invalid(ip, iomap); 79 return false; 80 } 81 82 XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); 83 return true; 84 } 85 86 static const struct iomap_page_ops xfs_iomap_page_ops = { 87 .iomap_valid = xfs_iomap_valid, 88 }; 89 90 int 91 xfs_bmbt_to_iomap( 92 struct xfs_inode *ip, 93 struct iomap *iomap, 94 struct xfs_bmbt_irec *imap, 95 unsigned int mapping_flags, 96 u16 iomap_flags, 97 u64 sequence_cookie) 98 { 99 struct xfs_mount *mp = ip->i_mount; 100 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 101 102 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 103 return xfs_alert_fsblock_zero(ip, imap); 104 105 if (imap->br_startblock == HOLESTARTBLOCK) { 106 iomap->addr = IOMAP_NULL_ADDR; 107 iomap->type = IOMAP_HOLE; 108 } else if (imap->br_startblock == DELAYSTARTBLOCK || 109 isnullstartblock(imap->br_startblock)) { 110 iomap->addr = IOMAP_NULL_ADDR; 111 iomap->type = IOMAP_DELALLOC; 112 } else { 113 iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); 114 if (mapping_flags & IOMAP_DAX) 115 iomap->addr += target->bt_dax_part_off; 116 117 if (imap->br_state == XFS_EXT_UNWRITTEN) 118 iomap->type = IOMAP_UNWRITTEN; 119 else 120 iomap->type = IOMAP_MAPPED; 121 122 } 123 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 124 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 125 if (mapping_flags & IOMAP_DAX) 126 iomap->dax_dev = target->bt_daxdev; 127 else 128 iomap->bdev = target->bt_bdev; 129 iomap->flags = iomap_flags; 130 131 if (xfs_ipincount(ip) && 132 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 133 iomap->flags |= IOMAP_F_DIRTY; 134 135 iomap->validity_cookie = sequence_cookie; 136 iomap->page_ops = &xfs_iomap_page_ops; 137 return 0; 138 } 139 140 static void 141 xfs_hole_to_iomap( 142 struct xfs_inode *ip, 143 struct iomap *iomap, 144 xfs_fileoff_t offset_fsb, 145 xfs_fileoff_t end_fsb) 146 { 147 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 148 149 iomap->addr = IOMAP_NULL_ADDR; 150 iomap->type = IOMAP_HOLE; 151 iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); 152 iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); 153 iomap->bdev = target->bt_bdev; 154 iomap->dax_dev = target->bt_daxdev; 155 } 156 157 static inline xfs_fileoff_t 158 xfs_iomap_end_fsb( 159 struct xfs_mount *mp, 160 loff_t offset, 161 loff_t count) 162 { 163 ASSERT(offset <= mp->m_super->s_maxbytes); 164 return min(XFS_B_TO_FSB(mp, offset + count), 165 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 166 } 167 168 static xfs_extlen_t 169 xfs_eof_alignment( 170 struct xfs_inode *ip) 171 { 172 struct xfs_mount *mp = ip->i_mount; 173 xfs_extlen_t align = 0; 174 175 if (!XFS_IS_REALTIME_INODE(ip)) { 176 /* 177 * Round up the allocation request to a stripe unit 178 * (m_dalign) boundary if the file size is >= stripe unit 179 * size, and we are allocating past the allocation eof. 180 * 181 * If mounted with the "-o swalloc" option the alignment is 182 * increased from the strip unit size to the stripe width. 183 */ 184 if (mp->m_swidth && xfs_has_swalloc(mp)) 185 align = mp->m_swidth; 186 else if (mp->m_dalign) 187 align = mp->m_dalign; 188 189 if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) 190 align = 0; 191 } 192 193 return align; 194 } 195 196 /* 197 * Check if last_fsb is outside the last extent, and if so grow it to the next 198 * stripe unit boundary. 199 */ 200 xfs_fileoff_t 201 xfs_iomap_eof_align_last_fsb( 202 struct xfs_inode *ip, 203 xfs_fileoff_t end_fsb) 204 { 205 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 206 xfs_extlen_t extsz = xfs_get_extsz_hint(ip); 207 xfs_extlen_t align = xfs_eof_alignment(ip); 208 struct xfs_bmbt_irec irec; 209 struct xfs_iext_cursor icur; 210 211 ASSERT(!xfs_need_iread_extents(ifp)); 212 213 /* 214 * Always round up the allocation request to the extent hint boundary. 215 */ 216 if (extsz) { 217 if (align) 218 align = roundup_64(align, extsz); 219 else 220 align = extsz; 221 } 222 223 if (align) { 224 xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align); 225 226 xfs_iext_last(ifp, &icur); 227 if (!xfs_iext_get_extent(ifp, &icur, &irec) || 228 aligned_end_fsb >= irec.br_startoff + irec.br_blockcount) 229 return aligned_end_fsb; 230 } 231 232 return end_fsb; 233 } 234 235 int 236 xfs_iomap_write_direct( 237 struct xfs_inode *ip, 238 xfs_fileoff_t offset_fsb, 239 xfs_fileoff_t count_fsb, 240 unsigned int flags, 241 struct xfs_bmbt_irec *imap, 242 u64 *seq) 243 { 244 struct xfs_mount *mp = ip->i_mount; 245 struct xfs_trans *tp; 246 xfs_filblks_t resaligned; 247 int nimaps; 248 unsigned int dblocks, rblocks; 249 bool force = false; 250 int error; 251 int bmapi_flags = XFS_BMAPI_PREALLOC; 252 int nr_exts = XFS_IEXT_ADD_NOSPLIT_CNT; 253 254 ASSERT(count_fsb > 0); 255 256 resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, 257 xfs_get_extsz_hint(ip)); 258 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 259 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 260 rblocks = resaligned; 261 } else { 262 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 263 rblocks = 0; 264 } 265 266 error = xfs_qm_dqattach(ip); 267 if (error) 268 return error; 269 270 /* 271 * For DAX, we do not allocate unwritten extents, but instead we zero 272 * the block before we commit the transaction. Ideally we'd like to do 273 * this outside the transaction context, but if we commit and then crash 274 * we may not have zeroed the blocks and this will be exposed on 275 * recovery of the allocation. Hence we must zero before commit. 276 * 277 * Further, if we are mapping unwritten extents here, we need to zero 278 * and convert them to written so that we don't need an unwritten extent 279 * callback for DAX. This also means that we need to be able to dip into 280 * the reserve block pool for bmbt block allocation if there is no space 281 * left but we need to do unwritten extent conversion. 282 */ 283 if (flags & IOMAP_DAX) { 284 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 285 if (imap->br_state == XFS_EXT_UNWRITTEN) { 286 force = true; 287 nr_exts = XFS_IEXT_WRITE_UNWRITTEN_CNT; 288 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 289 } 290 } 291 292 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, 293 rblocks, force, &tp); 294 if (error) 295 return error; 296 297 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, nr_exts); 298 if (error == -EFBIG) 299 error = xfs_iext_count_upgrade(tp, ip, nr_exts); 300 if (error) 301 goto out_trans_cancel; 302 303 /* 304 * From this point onwards we overwrite the imap pointer that the 305 * caller gave to us. 306 */ 307 nimaps = 1; 308 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, 309 imap, &nimaps); 310 if (error) 311 goto out_trans_cancel; 312 313 /* 314 * Complete the transaction 315 */ 316 error = xfs_trans_commit(tp); 317 if (error) 318 goto out_unlock; 319 320 /* 321 * Copy any maps to caller's array and return any error. 322 */ 323 if (nimaps == 0) { 324 error = -ENOSPC; 325 goto out_unlock; 326 } 327 328 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 329 error = xfs_alert_fsblock_zero(ip, imap); 330 331 out_unlock: 332 *seq = xfs_iomap_inode_sequence(ip, 0); 333 xfs_iunlock(ip, XFS_ILOCK_EXCL); 334 return error; 335 336 out_trans_cancel: 337 xfs_trans_cancel(tp); 338 goto out_unlock; 339 } 340 341 STATIC bool 342 xfs_quota_need_throttle( 343 struct xfs_inode *ip, 344 xfs_dqtype_t type, 345 xfs_fsblock_t alloc_blocks) 346 { 347 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 348 349 if (!dq || !xfs_this_quota_on(ip->i_mount, type)) 350 return false; 351 352 /* no hi watermark, no throttle */ 353 if (!dq->q_prealloc_hi_wmark) 354 return false; 355 356 /* under the lo watermark, no throttle */ 357 if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark) 358 return false; 359 360 return true; 361 } 362 363 STATIC void 364 xfs_quota_calc_throttle( 365 struct xfs_inode *ip, 366 xfs_dqtype_t type, 367 xfs_fsblock_t *qblocks, 368 int *qshift, 369 int64_t *qfreesp) 370 { 371 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 372 int64_t freesp; 373 int shift = 0; 374 375 /* no dq, or over hi wmark, squash the prealloc completely */ 376 if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) { 377 *qblocks = 0; 378 *qfreesp = 0; 379 return; 380 } 381 382 freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved; 383 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { 384 shift = 2; 385 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) 386 shift += 2; 387 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) 388 shift += 2; 389 } 390 391 if (freesp < *qfreesp) 392 *qfreesp = freesp; 393 394 /* only overwrite the throttle values if we are more aggressive */ 395 if ((freesp >> shift) < (*qblocks >> *qshift)) { 396 *qblocks = freesp; 397 *qshift = shift; 398 } 399 } 400 401 /* 402 * If we don't have a user specified preallocation size, dynamically increase 403 * the preallocation size as the size of the file grows. Cap the maximum size 404 * at a single extent or less if the filesystem is near full. The closer the 405 * filesystem is to being full, the smaller the maximum preallocation. 406 */ 407 STATIC xfs_fsblock_t 408 xfs_iomap_prealloc_size( 409 struct xfs_inode *ip, 410 int whichfork, 411 loff_t offset, 412 loff_t count, 413 struct xfs_iext_cursor *icur) 414 { 415 struct xfs_iext_cursor ncur = *icur; 416 struct xfs_bmbt_irec prev, got; 417 struct xfs_mount *mp = ip->i_mount; 418 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 419 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 420 int64_t freesp; 421 xfs_fsblock_t qblocks; 422 xfs_fsblock_t alloc_blocks = 0; 423 xfs_extlen_t plen; 424 int shift = 0; 425 int qshift = 0; 426 427 /* 428 * As an exception we don't do any preallocation at all if the file is 429 * smaller than the minimum preallocation and we are using the default 430 * dynamic preallocation scheme, as it is likely this is the only write 431 * to the file that is going to be done. 432 */ 433 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)) 434 return 0; 435 436 /* 437 * Use the minimum preallocation size for small files or if we are 438 * writing right after a hole. 439 */ 440 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 441 !xfs_iext_prev_extent(ifp, &ncur, &prev) || 442 prev.br_startoff + prev.br_blockcount < offset_fsb) 443 return mp->m_allocsize_blocks; 444 445 /* 446 * Take the size of the preceding data extents as the basis for the 447 * preallocation size. Note that we don't care if the previous extents 448 * are written or not. 449 */ 450 plen = prev.br_blockcount; 451 while (xfs_iext_prev_extent(ifp, &ncur, &got)) { 452 if (plen > XFS_MAX_BMBT_EXTLEN / 2 || 453 isnullstartblock(got.br_startblock) || 454 got.br_startoff + got.br_blockcount != prev.br_startoff || 455 got.br_startblock + got.br_blockcount != prev.br_startblock) 456 break; 457 plen += got.br_blockcount; 458 prev = got; 459 } 460 461 /* 462 * If the size of the extents is greater than half the maximum extent 463 * length, then use the current offset as the basis. This ensures that 464 * for large files the preallocation size always extends to 465 * XFS_BMBT_MAX_EXTLEN rather than falling short due to things like stripe 466 * unit/width alignment of real extents. 467 */ 468 alloc_blocks = plen * 2; 469 if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) 470 alloc_blocks = XFS_B_TO_FSB(mp, offset); 471 qblocks = alloc_blocks; 472 473 /* 474 * XFS_BMBT_MAX_EXTLEN is not a power of two value but we round the prealloc 475 * down to the nearest power of two value after throttling. To prevent 476 * the round down from unconditionally reducing the maximum supported 477 * prealloc size, we round up first, apply appropriate throttling, round 478 * down and cap the value to XFS_BMBT_MAX_EXTLEN. 479 */ 480 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(XFS_MAX_BMBT_EXTLEN), 481 alloc_blocks); 482 483 freesp = percpu_counter_read_positive(&mp->m_fdblocks); 484 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 485 shift = 2; 486 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 487 shift++; 488 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) 489 shift++; 490 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) 491 shift++; 492 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) 493 shift++; 494 } 495 496 /* 497 * Check each quota to cap the prealloc size, provide a shift value to 498 * throttle with and adjust amount of available space. 499 */ 500 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_USER, alloc_blocks)) 501 xfs_quota_calc_throttle(ip, XFS_DQTYPE_USER, &qblocks, &qshift, 502 &freesp); 503 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_GROUP, alloc_blocks)) 504 xfs_quota_calc_throttle(ip, XFS_DQTYPE_GROUP, &qblocks, &qshift, 505 &freesp); 506 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_PROJ, alloc_blocks)) 507 xfs_quota_calc_throttle(ip, XFS_DQTYPE_PROJ, &qblocks, &qshift, 508 &freesp); 509 510 /* 511 * The final prealloc size is set to the minimum of free space available 512 * in each of the quotas and the overall filesystem. 513 * 514 * The shift throttle value is set to the maximum value as determined by 515 * the global low free space values and per-quota low free space values. 516 */ 517 alloc_blocks = min(alloc_blocks, qblocks); 518 shift = max(shift, qshift); 519 520 if (shift) 521 alloc_blocks >>= shift; 522 /* 523 * rounddown_pow_of_two() returns an undefined result if we pass in 524 * alloc_blocks = 0. 525 */ 526 if (alloc_blocks) 527 alloc_blocks = rounddown_pow_of_two(alloc_blocks); 528 if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) 529 alloc_blocks = XFS_MAX_BMBT_EXTLEN; 530 531 /* 532 * If we are still trying to allocate more space than is 533 * available, squash the prealloc hard. This can happen if we 534 * have a large file on a small filesystem and the above 535 * lowspace thresholds are smaller than XFS_BMBT_MAX_EXTLEN. 536 */ 537 while (alloc_blocks && alloc_blocks >= freesp) 538 alloc_blocks >>= 4; 539 if (alloc_blocks < mp->m_allocsize_blocks) 540 alloc_blocks = mp->m_allocsize_blocks; 541 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, 542 mp->m_allocsize_blocks); 543 return alloc_blocks; 544 } 545 546 int 547 xfs_iomap_write_unwritten( 548 xfs_inode_t *ip, 549 xfs_off_t offset, 550 xfs_off_t count, 551 bool update_isize) 552 { 553 xfs_mount_t *mp = ip->i_mount; 554 xfs_fileoff_t offset_fsb; 555 xfs_filblks_t count_fsb; 556 xfs_filblks_t numblks_fsb; 557 int nimaps; 558 xfs_trans_t *tp; 559 xfs_bmbt_irec_t imap; 560 struct inode *inode = VFS_I(ip); 561 xfs_fsize_t i_size; 562 uint resblks; 563 int error; 564 565 trace_xfs_unwritten_convert(ip, offset, count); 566 567 offset_fsb = XFS_B_TO_FSBT(mp, offset); 568 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 569 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 570 571 /* 572 * Reserve enough blocks in this transaction for two complete extent 573 * btree splits. We may be converting the middle part of an unwritten 574 * extent and in this case we will insert two new extents in the btree 575 * each of which could cause a full split. 576 * 577 * This reservation amount will be used in the first call to 578 * xfs_bmbt_split() to select an AG with enough space to satisfy the 579 * rest of the operation. 580 */ 581 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 582 583 /* Attach dquots so that bmbt splits are accounted correctly. */ 584 error = xfs_qm_dqattach(ip); 585 if (error) 586 return error; 587 588 do { 589 /* 590 * Set up a transaction to convert the range of extents 591 * from unwritten to real. Do allocations in a loop until 592 * we have covered the range passed in. 593 * 594 * Note that we can't risk to recursing back into the filesystem 595 * here as we might be asked to write out the same inode that we 596 * complete here and might deadlock on the iolock. 597 */ 598 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 599 0, true, &tp); 600 if (error) 601 return error; 602 603 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, 604 XFS_IEXT_WRITE_UNWRITTEN_CNT); 605 if (error == -EFBIG) 606 error = xfs_iext_count_upgrade(tp, ip, 607 XFS_IEXT_WRITE_UNWRITTEN_CNT); 608 if (error) 609 goto error_on_bmapi_transaction; 610 611 /* 612 * Modify the unwritten extent state of the buffer. 613 */ 614 nimaps = 1; 615 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 616 XFS_BMAPI_CONVERT, resblks, &imap, 617 &nimaps); 618 if (error) 619 goto error_on_bmapi_transaction; 620 621 /* 622 * Log the updated inode size as we go. We have to be careful 623 * to only log it up to the actual write offset if it is 624 * halfway into a block. 625 */ 626 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 627 if (i_size > offset + count) 628 i_size = offset + count; 629 if (update_isize && i_size > i_size_read(inode)) 630 i_size_write(inode, i_size); 631 i_size = xfs_new_eof(ip, i_size); 632 if (i_size) { 633 ip->i_disk_size = i_size; 634 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 635 } 636 637 error = xfs_trans_commit(tp); 638 xfs_iunlock(ip, XFS_ILOCK_EXCL); 639 if (error) 640 return error; 641 642 if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock))) 643 return xfs_alert_fsblock_zero(ip, &imap); 644 645 if ((numblks_fsb = imap.br_blockcount) == 0) { 646 /* 647 * The numblks_fsb value should always get 648 * smaller, otherwise the loop is stuck. 649 */ 650 ASSERT(imap.br_blockcount); 651 break; 652 } 653 offset_fsb += numblks_fsb; 654 count_fsb -= numblks_fsb; 655 } while (count_fsb > 0); 656 657 return 0; 658 659 error_on_bmapi_transaction: 660 xfs_trans_cancel(tp); 661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 662 return error; 663 } 664 665 static inline bool 666 imap_needs_alloc( 667 struct inode *inode, 668 unsigned flags, 669 struct xfs_bmbt_irec *imap, 670 int nimaps) 671 { 672 /* don't allocate blocks when just zeroing */ 673 if (flags & IOMAP_ZERO) 674 return false; 675 if (!nimaps || 676 imap->br_startblock == HOLESTARTBLOCK || 677 imap->br_startblock == DELAYSTARTBLOCK) 678 return true; 679 /* we convert unwritten extents before copying the data for DAX */ 680 if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN) 681 return true; 682 return false; 683 } 684 685 static inline bool 686 imap_needs_cow( 687 struct xfs_inode *ip, 688 unsigned int flags, 689 struct xfs_bmbt_irec *imap, 690 int nimaps) 691 { 692 if (!xfs_is_cow_inode(ip)) 693 return false; 694 695 /* when zeroing we don't have to COW holes or unwritten extents */ 696 if (flags & IOMAP_ZERO) { 697 if (!nimaps || 698 imap->br_startblock == HOLESTARTBLOCK || 699 imap->br_state == XFS_EXT_UNWRITTEN) 700 return false; 701 } 702 703 return true; 704 } 705 706 static int 707 xfs_ilock_for_iomap( 708 struct xfs_inode *ip, 709 unsigned flags, 710 unsigned *lockmode) 711 { 712 unsigned int mode = *lockmode; 713 bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); 714 715 /* 716 * COW writes may allocate delalloc space or convert unwritten COW 717 * extents, so we need to make sure to take the lock exclusively here. 718 */ 719 if (xfs_is_cow_inode(ip) && is_write) 720 mode = XFS_ILOCK_EXCL; 721 722 /* 723 * Extents not yet cached requires exclusive access, don't block. This 724 * is an opencoded xfs_ilock_data_map_shared() call but with 725 * non-blocking behaviour. 726 */ 727 if (xfs_need_iread_extents(&ip->i_df)) { 728 if (flags & IOMAP_NOWAIT) 729 return -EAGAIN; 730 mode = XFS_ILOCK_EXCL; 731 } 732 733 relock: 734 if (flags & IOMAP_NOWAIT) { 735 if (!xfs_ilock_nowait(ip, mode)) 736 return -EAGAIN; 737 } else { 738 xfs_ilock(ip, mode); 739 } 740 741 /* 742 * The reflink iflag could have changed since the earlier unlocked 743 * check, so if we got ILOCK_SHARED for a write and but we're now a 744 * reflink inode we have to switch to ILOCK_EXCL and relock. 745 */ 746 if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) { 747 xfs_iunlock(ip, mode); 748 mode = XFS_ILOCK_EXCL; 749 goto relock; 750 } 751 752 *lockmode = mode; 753 return 0; 754 } 755 756 /* 757 * Check that the imap we are going to return to the caller spans the entire 758 * range that the caller requested for the IO. 759 */ 760 static bool 761 imap_spans_range( 762 struct xfs_bmbt_irec *imap, 763 xfs_fileoff_t offset_fsb, 764 xfs_fileoff_t end_fsb) 765 { 766 if (imap->br_startoff > offset_fsb) 767 return false; 768 if (imap->br_startoff + imap->br_blockcount < end_fsb) 769 return false; 770 return true; 771 } 772 773 static int 774 xfs_direct_write_iomap_begin( 775 struct inode *inode, 776 loff_t offset, 777 loff_t length, 778 unsigned flags, 779 struct iomap *iomap, 780 struct iomap *srcmap) 781 { 782 struct xfs_inode *ip = XFS_I(inode); 783 struct xfs_mount *mp = ip->i_mount; 784 struct xfs_bmbt_irec imap, cmap; 785 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 786 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 787 int nimaps = 1, error = 0; 788 bool shared = false; 789 u16 iomap_flags = 0; 790 unsigned int lockmode = XFS_ILOCK_SHARED; 791 u64 seq; 792 793 ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); 794 795 if (xfs_is_shutdown(mp)) 796 return -EIO; 797 798 /* 799 * Writes that span EOF might trigger an IO size update on completion, 800 * so consider them to be dirty for the purposes of O_DSYNC even if 801 * there is no other metadata changes pending or have been made here. 802 */ 803 if (offset + length > i_size_read(inode)) 804 iomap_flags |= IOMAP_F_DIRTY; 805 806 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 807 if (error) 808 return error; 809 810 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 811 &nimaps, 0); 812 if (error) 813 goto out_unlock; 814 815 if (imap_needs_cow(ip, flags, &imap, nimaps)) { 816 error = -EAGAIN; 817 if (flags & IOMAP_NOWAIT) 818 goto out_unlock; 819 820 /* may drop and re-acquire the ilock */ 821 error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, 822 &lockmode, 823 (flags & IOMAP_DIRECT) || IS_DAX(inode)); 824 if (error) 825 goto out_unlock; 826 if (shared) 827 goto out_found_cow; 828 end_fsb = imap.br_startoff + imap.br_blockcount; 829 length = XFS_FSB_TO_B(mp, end_fsb) - offset; 830 } 831 832 if (imap_needs_alloc(inode, flags, &imap, nimaps)) 833 goto allocate_blocks; 834 835 /* 836 * NOWAIT and OVERWRITE I/O needs to span the entire requested I/O with 837 * a single map so that we avoid partial IO failures due to the rest of 838 * the I/O range not covered by this map triggering an EAGAIN condition 839 * when it is subsequently mapped and aborting the I/O. 840 */ 841 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) { 842 error = -EAGAIN; 843 if (!imap_spans_range(&imap, offset_fsb, end_fsb)) 844 goto out_unlock; 845 } 846 847 /* 848 * For overwrite only I/O, we cannot convert unwritten extents without 849 * requiring sub-block zeroing. This can only be done under an 850 * exclusive IOLOCK, hence return -EAGAIN if this is not a written 851 * extent to tell the caller to try again. 852 */ 853 if (flags & IOMAP_OVERWRITE_ONLY) { 854 error = -EAGAIN; 855 if (imap.br_state != XFS_EXT_NORM && 856 ((offset | length) & mp->m_blockmask)) 857 goto out_unlock; 858 } 859 860 seq = xfs_iomap_inode_sequence(ip, iomap_flags); 861 xfs_iunlock(ip, lockmode); 862 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 863 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); 864 865 allocate_blocks: 866 error = -EAGAIN; 867 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) 868 goto out_unlock; 869 870 /* 871 * We cap the maximum length we map to a sane size to keep the chunks 872 * of work done where somewhat symmetric with the work writeback does. 873 * This is a completely arbitrary number pulled out of thin air as a 874 * best guess for initial testing. 875 * 876 * Note that the values needs to be less than 32-bits wide until the 877 * lower level functions are updated. 878 */ 879 length = min_t(loff_t, length, 1024 * PAGE_SIZE); 880 end_fsb = xfs_iomap_end_fsb(mp, offset, length); 881 882 if (offset + length > XFS_ISIZE(ip)) 883 end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); 884 else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) 885 end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); 886 xfs_iunlock(ip, lockmode); 887 888 error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, 889 flags, &imap, &seq); 890 if (error) 891 return error; 892 893 trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); 894 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 895 iomap_flags | IOMAP_F_NEW, seq); 896 897 out_found_cow: 898 length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); 899 trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); 900 if (imap.br_startblock != HOLESTARTBLOCK) { 901 seq = xfs_iomap_inode_sequence(ip, 0); 902 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 903 if (error) 904 goto out_unlock; 905 } 906 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 907 xfs_iunlock(ip, lockmode); 908 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); 909 910 out_unlock: 911 if (lockmode) 912 xfs_iunlock(ip, lockmode); 913 return error; 914 } 915 916 const struct iomap_ops xfs_direct_write_iomap_ops = { 917 .iomap_begin = xfs_direct_write_iomap_begin, 918 }; 919 920 static int 921 xfs_dax_write_iomap_end( 922 struct inode *inode, 923 loff_t pos, 924 loff_t length, 925 ssize_t written, 926 unsigned flags, 927 struct iomap *iomap) 928 { 929 struct xfs_inode *ip = XFS_I(inode); 930 931 if (!xfs_is_cow_inode(ip)) 932 return 0; 933 934 if (!written) { 935 xfs_reflink_cancel_cow_range(ip, pos, length, true); 936 return 0; 937 } 938 939 return xfs_reflink_end_cow(ip, pos, written); 940 } 941 942 const struct iomap_ops xfs_dax_write_iomap_ops = { 943 .iomap_begin = xfs_direct_write_iomap_begin, 944 .iomap_end = xfs_dax_write_iomap_end, 945 }; 946 947 static int 948 xfs_buffered_write_iomap_begin( 949 struct inode *inode, 950 loff_t offset, 951 loff_t count, 952 unsigned flags, 953 struct iomap *iomap, 954 struct iomap *srcmap) 955 { 956 struct xfs_inode *ip = XFS_I(inode); 957 struct xfs_mount *mp = ip->i_mount; 958 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 959 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); 960 struct xfs_bmbt_irec imap, cmap; 961 struct xfs_iext_cursor icur, ccur; 962 xfs_fsblock_t prealloc_blocks = 0; 963 bool eof = false, cow_eof = false, shared = false; 964 int allocfork = XFS_DATA_FORK; 965 int error = 0; 966 unsigned int lockmode = XFS_ILOCK_EXCL; 967 u64 seq; 968 969 if (xfs_is_shutdown(mp)) 970 return -EIO; 971 972 /* we can't use delayed allocations when using extent size hints */ 973 if (xfs_get_extsz_hint(ip)) 974 return xfs_direct_write_iomap_begin(inode, offset, count, 975 flags, iomap, srcmap); 976 977 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 978 979 error = xfs_qm_dqattach(ip); 980 if (error) 981 return error; 982 983 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 984 if (error) 985 return error; 986 987 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || 988 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 989 error = -EFSCORRUPTED; 990 goto out_unlock; 991 } 992 993 XFS_STATS_INC(mp, xs_blk_mapw); 994 995 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 996 if (error) 997 goto out_unlock; 998 999 /* 1000 * Search the data fork first to look up our source mapping. We 1001 * always need the data fork map, as we have to return it to the 1002 * iomap code so that the higher level write code can read data in to 1003 * perform read-modify-write cycles for unaligned writes. 1004 */ 1005 eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); 1006 if (eof) 1007 imap.br_startoff = end_fsb; /* fake hole until the end */ 1008 1009 /* We never need to allocate blocks for zeroing a hole. */ 1010 if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { 1011 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); 1012 goto out_unlock; 1013 } 1014 1015 /* 1016 * Search the COW fork extent list even if we did not find a data fork 1017 * extent. This serves two purposes: first this implements the 1018 * speculative preallocation using cowextsize, so that we also unshare 1019 * block adjacent to shared blocks instead of just the shared blocks 1020 * themselves. Second the lookup in the extent list is generally faster 1021 * than going out to the shared extent tree. 1022 */ 1023 if (xfs_is_cow_inode(ip)) { 1024 if (!ip->i_cowfp) { 1025 ASSERT(!xfs_is_reflink_inode(ip)); 1026 xfs_ifork_init_cow(ip); 1027 } 1028 cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, 1029 &ccur, &cmap); 1030 if (!cow_eof && cmap.br_startoff <= offset_fsb) { 1031 trace_xfs_reflink_cow_found(ip, &cmap); 1032 goto found_cow; 1033 } 1034 } 1035 1036 if (imap.br_startoff <= offset_fsb) { 1037 /* 1038 * For reflink files we may need a delalloc reservation when 1039 * overwriting shared extents. This includes zeroing of 1040 * existing extents that contain data. 1041 */ 1042 if (!xfs_is_cow_inode(ip) || 1043 ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { 1044 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 1045 &imap); 1046 goto found_imap; 1047 } 1048 1049 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); 1050 1051 /* Trim the mapping to the nearest shared extent boundary. */ 1052 error = xfs_bmap_trim_cow(ip, &imap, &shared); 1053 if (error) 1054 goto out_unlock; 1055 1056 /* Not shared? Just report the (potentially capped) extent. */ 1057 if (!shared) { 1058 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 1059 &imap); 1060 goto found_imap; 1061 } 1062 1063 /* 1064 * Fork all the shared blocks from our write offset until the 1065 * end of the extent. 1066 */ 1067 allocfork = XFS_COW_FORK; 1068 end_fsb = imap.br_startoff + imap.br_blockcount; 1069 } else { 1070 /* 1071 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES 1072 * pages to keep the chunks of work done where somewhat 1073 * symmetric with the work writeback does. This is a completely 1074 * arbitrary number pulled out of thin air. 1075 * 1076 * Note that the values needs to be less than 32-bits wide until 1077 * the lower level functions are updated. 1078 */ 1079 count = min_t(loff_t, count, 1024 * PAGE_SIZE); 1080 end_fsb = xfs_iomap_end_fsb(mp, offset, count); 1081 1082 if (xfs_is_always_cow_inode(ip)) 1083 allocfork = XFS_COW_FORK; 1084 } 1085 1086 if (eof && offset + count > XFS_ISIZE(ip)) { 1087 /* 1088 * Determine the initial size of the preallocation. 1089 * We clean up any extra preallocation when the file is closed. 1090 */ 1091 if (xfs_has_allocsize(mp)) 1092 prealloc_blocks = mp->m_allocsize_blocks; 1093 else 1094 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 1095 offset, count, &icur); 1096 if (prealloc_blocks) { 1097 xfs_extlen_t align; 1098 xfs_off_t end_offset; 1099 xfs_fileoff_t p_end_fsb; 1100 1101 end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1); 1102 p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + 1103 prealloc_blocks; 1104 1105 align = xfs_eof_alignment(ip); 1106 if (align) 1107 p_end_fsb = roundup_64(p_end_fsb, align); 1108 1109 p_end_fsb = min(p_end_fsb, 1110 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 1111 ASSERT(p_end_fsb > offset_fsb); 1112 prealloc_blocks = p_end_fsb - end_fsb; 1113 } 1114 } 1115 1116 retry: 1117 error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, 1118 end_fsb - offset_fsb, prealloc_blocks, 1119 allocfork == XFS_DATA_FORK ? &imap : &cmap, 1120 allocfork == XFS_DATA_FORK ? &icur : &ccur, 1121 allocfork == XFS_DATA_FORK ? eof : cow_eof); 1122 switch (error) { 1123 case 0: 1124 break; 1125 case -ENOSPC: 1126 case -EDQUOT: 1127 /* retry without any preallocation */ 1128 trace_xfs_delalloc_enospc(ip, offset, count); 1129 if (prealloc_blocks) { 1130 prealloc_blocks = 0; 1131 goto retry; 1132 } 1133 fallthrough; 1134 default: 1135 goto out_unlock; 1136 } 1137 1138 if (allocfork == XFS_COW_FORK) { 1139 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); 1140 goto found_cow; 1141 } 1142 1143 /* 1144 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch 1145 * them out if the write happens to fail. 1146 */ 1147 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); 1148 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1149 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); 1150 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); 1151 1152 found_imap: 1153 seq = xfs_iomap_inode_sequence(ip, 0); 1154 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1155 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); 1156 1157 found_cow: 1158 seq = xfs_iomap_inode_sequence(ip, 0); 1159 if (imap.br_startoff <= offset_fsb) { 1160 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 1161 if (error) 1162 goto out_unlock; 1163 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1164 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1165 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 1166 IOMAP_F_SHARED, seq); 1167 } 1168 1169 xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); 1170 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1171 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); 1172 1173 out_unlock: 1174 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1175 return error; 1176 } 1177 1178 static int 1179 xfs_buffered_write_delalloc_punch( 1180 struct inode *inode, 1181 loff_t offset, 1182 loff_t length) 1183 { 1184 return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, 1185 offset + length); 1186 } 1187 1188 static int 1189 xfs_buffered_write_iomap_end( 1190 struct inode *inode, 1191 loff_t offset, 1192 loff_t length, 1193 ssize_t written, 1194 unsigned flags, 1195 struct iomap *iomap) 1196 { 1197 1198 struct xfs_mount *mp = XFS_M(inode->i_sb); 1199 int error; 1200 1201 error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, 1202 length, written, &xfs_buffered_write_delalloc_punch); 1203 if (error && !xfs_is_shutdown(mp)) { 1204 xfs_alert(mp, "%s: unable to clean up ino 0x%llx", 1205 __func__, XFS_I(inode)->i_ino); 1206 return error; 1207 } 1208 return 0; 1209 } 1210 1211 const struct iomap_ops xfs_buffered_write_iomap_ops = { 1212 .iomap_begin = xfs_buffered_write_iomap_begin, 1213 .iomap_end = xfs_buffered_write_iomap_end, 1214 }; 1215 1216 /* 1217 * iomap_page_mkwrite() will never fail in a way that requires delalloc extents 1218 * that it allocated to be revoked. Hence we do not need an .iomap_end method 1219 * for this operation. 1220 */ 1221 const struct iomap_ops xfs_page_mkwrite_iomap_ops = { 1222 .iomap_begin = xfs_buffered_write_iomap_begin, 1223 }; 1224 1225 static int 1226 xfs_read_iomap_begin( 1227 struct inode *inode, 1228 loff_t offset, 1229 loff_t length, 1230 unsigned flags, 1231 struct iomap *iomap, 1232 struct iomap *srcmap) 1233 { 1234 struct xfs_inode *ip = XFS_I(inode); 1235 struct xfs_mount *mp = ip->i_mount; 1236 struct xfs_bmbt_irec imap; 1237 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1238 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 1239 int nimaps = 1, error = 0; 1240 bool shared = false; 1241 unsigned int lockmode = XFS_ILOCK_SHARED; 1242 u64 seq; 1243 1244 ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); 1245 1246 if (xfs_is_shutdown(mp)) 1247 return -EIO; 1248 1249 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 1250 if (error) 1251 return error; 1252 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1253 &nimaps, 0); 1254 if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode))) 1255 error = xfs_reflink_trim_around_shared(ip, &imap, &shared); 1256 seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); 1257 xfs_iunlock(ip, lockmode); 1258 1259 if (error) 1260 return error; 1261 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 1262 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 1263 shared ? IOMAP_F_SHARED : 0, seq); 1264 } 1265 1266 const struct iomap_ops xfs_read_iomap_ops = { 1267 .iomap_begin = xfs_read_iomap_begin, 1268 }; 1269 1270 static int 1271 xfs_seek_iomap_begin( 1272 struct inode *inode, 1273 loff_t offset, 1274 loff_t length, 1275 unsigned flags, 1276 struct iomap *iomap, 1277 struct iomap *srcmap) 1278 { 1279 struct xfs_inode *ip = XFS_I(inode); 1280 struct xfs_mount *mp = ip->i_mount; 1281 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1282 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1283 xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF; 1284 struct xfs_iext_cursor icur; 1285 struct xfs_bmbt_irec imap, cmap; 1286 int error = 0; 1287 unsigned lockmode; 1288 u64 seq; 1289 1290 if (xfs_is_shutdown(mp)) 1291 return -EIO; 1292 1293 lockmode = xfs_ilock_data_map_shared(ip); 1294 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1295 if (error) 1296 goto out_unlock; 1297 1298 if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) { 1299 /* 1300 * If we found a data extent we are done. 1301 */ 1302 if (imap.br_startoff <= offset_fsb) 1303 goto done; 1304 data_fsb = imap.br_startoff; 1305 } else { 1306 /* 1307 * Fake a hole until the end of the file. 1308 */ 1309 data_fsb = xfs_iomap_end_fsb(mp, offset, length); 1310 } 1311 1312 /* 1313 * If a COW fork extent covers the hole, report it - capped to the next 1314 * data fork extent: 1315 */ 1316 if (xfs_inode_has_cow_data(ip) && 1317 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) 1318 cow_fsb = cmap.br_startoff; 1319 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 1320 if (data_fsb < cow_fsb + cmap.br_blockcount) 1321 end_fsb = min(end_fsb, data_fsb); 1322 xfs_trim_extent(&cmap, offset_fsb, end_fsb); 1323 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1324 error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 1325 IOMAP_F_SHARED, seq); 1326 /* 1327 * This is a COW extent, so we must probe the page cache 1328 * because there could be dirty page cache being backed 1329 * by this extent. 1330 */ 1331 iomap->type = IOMAP_UNWRITTEN; 1332 goto out_unlock; 1333 } 1334 1335 /* 1336 * Else report a hole, capped to the next found data or COW extent. 1337 */ 1338 if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb) 1339 imap.br_blockcount = cow_fsb - offset_fsb; 1340 else 1341 imap.br_blockcount = data_fsb - offset_fsb; 1342 imap.br_startoff = offset_fsb; 1343 imap.br_startblock = HOLESTARTBLOCK; 1344 imap.br_state = XFS_EXT_NORM; 1345 done: 1346 seq = xfs_iomap_inode_sequence(ip, 0); 1347 xfs_trim_extent(&imap, offset_fsb, end_fsb); 1348 error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); 1349 out_unlock: 1350 xfs_iunlock(ip, lockmode); 1351 return error; 1352 } 1353 1354 const struct iomap_ops xfs_seek_iomap_ops = { 1355 .iomap_begin = xfs_seek_iomap_begin, 1356 }; 1357 1358 static int 1359 xfs_xattr_iomap_begin( 1360 struct inode *inode, 1361 loff_t offset, 1362 loff_t length, 1363 unsigned flags, 1364 struct iomap *iomap, 1365 struct iomap *srcmap) 1366 { 1367 struct xfs_inode *ip = XFS_I(inode); 1368 struct xfs_mount *mp = ip->i_mount; 1369 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1370 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1371 struct xfs_bmbt_irec imap; 1372 int nimaps = 1, error = 0; 1373 unsigned lockmode; 1374 int seq; 1375 1376 if (xfs_is_shutdown(mp)) 1377 return -EIO; 1378 1379 lockmode = xfs_ilock_attr_map_shared(ip); 1380 1381 /* if there are no attribute fork or extents, return ENOENT */ 1382 if (!xfs_inode_has_attr_fork(ip) || !ip->i_af.if_nextents) { 1383 error = -ENOENT; 1384 goto out_unlock; 1385 } 1386 1387 ASSERT(ip->i_af.if_format != XFS_DINODE_FMT_LOCAL); 1388 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1389 &nimaps, XFS_BMAPI_ATTRFORK); 1390 out_unlock: 1391 1392 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); 1393 xfs_iunlock(ip, lockmode); 1394 1395 if (error) 1396 return error; 1397 ASSERT(nimaps); 1398 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); 1399 } 1400 1401 const struct iomap_ops xfs_xattr_iomap_ops = { 1402 .iomap_begin = xfs_xattr_iomap_begin, 1403 }; 1404 1405 int 1406 xfs_zero_range( 1407 struct xfs_inode *ip, 1408 loff_t pos, 1409 loff_t len, 1410 bool *did_zero) 1411 { 1412 struct inode *inode = VFS_I(ip); 1413 1414 if (IS_DAX(inode)) 1415 return dax_zero_range(inode, pos, len, did_zero, 1416 &xfs_dax_write_iomap_ops); 1417 return iomap_zero_range(inode, pos, len, did_zero, 1418 &xfs_buffered_write_iomap_ops); 1419 } 1420 1421 int 1422 xfs_truncate_page( 1423 struct xfs_inode *ip, 1424 loff_t pos, 1425 bool *did_zero) 1426 { 1427 struct inode *inode = VFS_I(ip); 1428 1429 if (IS_DAX(inode)) 1430 return dax_truncate_page(inode, pos, did_zero, 1431 &xfs_dax_write_iomap_ops); 1432 return iomap_truncate_page(inode, pos, did_zero, 1433 &xfs_buffered_write_iomap_ops); 1434 } 1435