1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * Copyright (c) 2016-2018 Christoph Hellwig. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_bmap_btree.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_util.h" 19 #include "xfs_errortag.h" 20 #include "xfs_error.h" 21 #include "xfs_trans.h" 22 #include "xfs_trans_space.h" 23 #include "xfs_inode_item.h" 24 #include "xfs_iomap.h" 25 #include "xfs_trace.h" 26 #include "xfs_quota.h" 27 #include "xfs_dquot_item.h" 28 #include "xfs_dquot.h" 29 #include "xfs_reflink.h" 30 31 #define XFS_ALLOC_ALIGN(mp, off) \ 32 (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log) 33 34 static int 35 xfs_alert_fsblock_zero( 36 xfs_inode_t *ip, 37 xfs_bmbt_irec_t *imap) 38 { 39 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 40 "Access to block zero in inode %llu " 41 "start_block: %llx start_off: %llx " 42 "blkcnt: %llx extent-state: %x", 43 (unsigned long long)ip->i_ino, 44 (unsigned long long)imap->br_startblock, 45 (unsigned long long)imap->br_startoff, 46 (unsigned long long)imap->br_blockcount, 47 imap->br_state); 48 return -EFSCORRUPTED; 49 } 50 51 u64 52 xfs_iomap_inode_sequence( 53 struct xfs_inode *ip, 54 u16 iomap_flags) 55 { 56 u64 cookie = 0; 57 58 if (iomap_flags & IOMAP_F_XATTR) 59 return READ_ONCE(ip->i_af.if_seq); 60 if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) 61 cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; 62 return cookie | READ_ONCE(ip->i_df.if_seq); 63 } 64 65 /* 66 * Check that the iomap passed to us is still valid for the given offset and 67 * length. 68 */ 69 static bool 70 xfs_iomap_valid( 71 struct inode *inode, 72 const struct iomap *iomap) 73 { 74 struct xfs_inode *ip = XFS_I(inode); 75 76 if (iomap->validity_cookie != 77 xfs_iomap_inode_sequence(ip, iomap->flags)) { 78 trace_xfs_iomap_invalid(ip, iomap); 79 return false; 80 } 81 82 XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS); 83 return true; 84 } 85 86 static const struct iomap_folio_ops xfs_iomap_folio_ops = { 87 .iomap_valid = xfs_iomap_valid, 88 }; 89 90 int 91 xfs_bmbt_to_iomap( 92 struct xfs_inode *ip, 93 struct iomap *iomap, 94 struct xfs_bmbt_irec *imap, 95 unsigned int mapping_flags, 96 u16 iomap_flags, 97 u64 sequence_cookie) 98 { 99 struct xfs_mount *mp = ip->i_mount; 100 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 101 102 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 103 return xfs_alert_fsblock_zero(ip, imap); 104 105 if (imap->br_startblock == HOLESTARTBLOCK) { 106 iomap->addr = IOMAP_NULL_ADDR; 107 iomap->type = IOMAP_HOLE; 108 } else if (imap->br_startblock == DELAYSTARTBLOCK || 109 isnullstartblock(imap->br_startblock)) { 110 iomap->addr = IOMAP_NULL_ADDR; 111 iomap->type = IOMAP_DELALLOC; 112 } else { 113 iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock)); 114 if (mapping_flags & IOMAP_DAX) 115 iomap->addr += target->bt_dax_part_off; 116 117 if (imap->br_state == XFS_EXT_UNWRITTEN) 118 iomap->type = IOMAP_UNWRITTEN; 119 else 120 iomap->type = IOMAP_MAPPED; 121 122 } 123 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 124 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 125 if (mapping_flags & IOMAP_DAX) 126 iomap->dax_dev = target->bt_daxdev; 127 else 128 iomap->bdev = target->bt_bdev; 129 iomap->flags = iomap_flags; 130 131 if (xfs_ipincount(ip) && 132 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 133 iomap->flags |= IOMAP_F_DIRTY; 134 135 iomap->validity_cookie = sequence_cookie; 136 iomap->folio_ops = &xfs_iomap_folio_ops; 137 return 0; 138 } 139 140 static void 141 xfs_hole_to_iomap( 142 struct xfs_inode *ip, 143 struct iomap *iomap, 144 xfs_fileoff_t offset_fsb, 145 xfs_fileoff_t end_fsb) 146 { 147 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 148 149 iomap->addr = IOMAP_NULL_ADDR; 150 iomap->type = IOMAP_HOLE; 151 iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb); 152 iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb); 153 iomap->bdev = target->bt_bdev; 154 iomap->dax_dev = target->bt_daxdev; 155 } 156 157 static inline xfs_fileoff_t 158 xfs_iomap_end_fsb( 159 struct xfs_mount *mp, 160 loff_t offset, 161 loff_t count) 162 { 163 ASSERT(offset <= mp->m_super->s_maxbytes); 164 return min(XFS_B_TO_FSB(mp, offset + count), 165 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 166 } 167 168 static xfs_extlen_t 169 xfs_eof_alignment( 170 struct xfs_inode *ip) 171 { 172 struct xfs_mount *mp = ip->i_mount; 173 xfs_extlen_t align = 0; 174 175 if (!XFS_IS_REALTIME_INODE(ip)) { 176 /* 177 * Round up the allocation request to a stripe unit 178 * (m_dalign) boundary if the file size is >= stripe unit 179 * size, and we are allocating past the allocation eof. 180 * 181 * If mounted with the "-o swalloc" option the alignment is 182 * increased from the strip unit size to the stripe width. 183 */ 184 if (mp->m_swidth && xfs_has_swalloc(mp)) 185 align = mp->m_swidth; 186 else if (mp->m_dalign) 187 align = mp->m_dalign; 188 189 if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) 190 align = 0; 191 } 192 193 return align; 194 } 195 196 /* 197 * Check if last_fsb is outside the last extent, and if so grow it to the next 198 * stripe unit boundary. 199 */ 200 xfs_fileoff_t 201 xfs_iomap_eof_align_last_fsb( 202 struct xfs_inode *ip, 203 xfs_fileoff_t end_fsb) 204 { 205 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 206 xfs_extlen_t extsz = xfs_get_extsz_hint(ip); 207 xfs_extlen_t align = xfs_eof_alignment(ip); 208 struct xfs_bmbt_irec irec; 209 struct xfs_iext_cursor icur; 210 211 ASSERT(!xfs_need_iread_extents(ifp)); 212 213 /* 214 * Always round up the allocation request to the extent hint boundary. 215 */ 216 if (extsz) { 217 if (align) 218 align = roundup_64(align, extsz); 219 else 220 align = extsz; 221 } 222 223 if (align) { 224 xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align); 225 226 xfs_iext_last(ifp, &icur); 227 if (!xfs_iext_get_extent(ifp, &icur, &irec) || 228 aligned_end_fsb >= irec.br_startoff + irec.br_blockcount) 229 return aligned_end_fsb; 230 } 231 232 return end_fsb; 233 } 234 235 int 236 xfs_iomap_write_direct( 237 struct xfs_inode *ip, 238 xfs_fileoff_t offset_fsb, 239 xfs_fileoff_t count_fsb, 240 unsigned int flags, 241 struct xfs_bmbt_irec *imap, 242 u64 *seq) 243 { 244 struct xfs_mount *mp = ip->i_mount; 245 struct xfs_trans *tp; 246 xfs_filblks_t resaligned; 247 int nimaps; 248 unsigned int dblocks, rblocks; 249 bool force = false; 250 int error; 251 int bmapi_flags = XFS_BMAPI_PREALLOC; 252 int nr_exts = XFS_IEXT_ADD_NOSPLIT_CNT; 253 254 ASSERT(count_fsb > 0); 255 256 resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, 257 xfs_get_extsz_hint(ip)); 258 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 259 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 260 rblocks = resaligned; 261 } else { 262 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 263 rblocks = 0; 264 } 265 266 error = xfs_qm_dqattach(ip); 267 if (error) 268 return error; 269 270 /* 271 * For DAX, we do not allocate unwritten extents, but instead we zero 272 * the block before we commit the transaction. Ideally we'd like to do 273 * this outside the transaction context, but if we commit and then crash 274 * we may not have zeroed the blocks and this will be exposed on 275 * recovery of the allocation. Hence we must zero before commit. 276 * 277 * Further, if we are mapping unwritten extents here, we need to zero 278 * and convert them to written so that we don't need an unwritten extent 279 * callback for DAX. This also means that we need to be able to dip into 280 * the reserve block pool for bmbt block allocation if there is no space 281 * left but we need to do unwritten extent conversion. 282 */ 283 if (flags & IOMAP_DAX) { 284 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 285 if (imap->br_state == XFS_EXT_UNWRITTEN) { 286 force = true; 287 nr_exts = XFS_IEXT_WRITE_UNWRITTEN_CNT; 288 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 289 } 290 } 291 292 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, 293 rblocks, force, &tp); 294 if (error) 295 return error; 296 297 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, nr_exts); 298 if (error == -EFBIG) 299 error = xfs_iext_count_upgrade(tp, ip, nr_exts); 300 if (error) 301 goto out_trans_cancel; 302 303 /* 304 * From this point onwards we overwrite the imap pointer that the 305 * caller gave to us. 306 */ 307 nimaps = 1; 308 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0, 309 imap, &nimaps); 310 if (error) 311 goto out_trans_cancel; 312 313 /* 314 * Complete the transaction 315 */ 316 error = xfs_trans_commit(tp); 317 if (error) 318 goto out_unlock; 319 320 /* 321 * Copy any maps to caller's array and return any error. 322 */ 323 if (nimaps == 0) { 324 error = -ENOSPC; 325 goto out_unlock; 326 } 327 328 if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock))) 329 error = xfs_alert_fsblock_zero(ip, imap); 330 331 out_unlock: 332 *seq = xfs_iomap_inode_sequence(ip, 0); 333 xfs_iunlock(ip, XFS_ILOCK_EXCL); 334 return error; 335 336 out_trans_cancel: 337 xfs_trans_cancel(tp); 338 goto out_unlock; 339 } 340 341 STATIC bool 342 xfs_quota_need_throttle( 343 struct xfs_inode *ip, 344 xfs_dqtype_t type, 345 xfs_fsblock_t alloc_blocks) 346 { 347 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 348 349 if (!dq || !xfs_this_quota_on(ip->i_mount, type)) 350 return false; 351 352 /* no hi watermark, no throttle */ 353 if (!dq->q_prealloc_hi_wmark) 354 return false; 355 356 /* under the lo watermark, no throttle */ 357 if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark) 358 return false; 359 360 return true; 361 } 362 363 STATIC void 364 xfs_quota_calc_throttle( 365 struct xfs_inode *ip, 366 xfs_dqtype_t type, 367 xfs_fsblock_t *qblocks, 368 int *qshift, 369 int64_t *qfreesp) 370 { 371 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 372 int64_t freesp; 373 int shift = 0; 374 375 /* no dq, or over hi wmark, squash the prealloc completely */ 376 if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) { 377 *qblocks = 0; 378 *qfreesp = 0; 379 return; 380 } 381 382 freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved; 383 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { 384 shift = 2; 385 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) 386 shift += 2; 387 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) 388 shift += 2; 389 } 390 391 if (freesp < *qfreesp) 392 *qfreesp = freesp; 393 394 /* only overwrite the throttle values if we are more aggressive */ 395 if ((freesp >> shift) < (*qblocks >> *qshift)) { 396 *qblocks = freesp; 397 *qshift = shift; 398 } 399 } 400 401 /* 402 * If we don't have a user specified preallocation size, dynamically increase 403 * the preallocation size as the size of the file grows. Cap the maximum size 404 * at a single extent or less if the filesystem is near full. The closer the 405 * filesystem is to being full, the smaller the maximum preallocation. 406 */ 407 STATIC xfs_fsblock_t 408 xfs_iomap_prealloc_size( 409 struct xfs_inode *ip, 410 int whichfork, 411 loff_t offset, 412 loff_t count, 413 struct xfs_iext_cursor *icur) 414 { 415 struct xfs_iext_cursor ncur = *icur; 416 struct xfs_bmbt_irec prev, got; 417 struct xfs_mount *mp = ip->i_mount; 418 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 419 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 420 int64_t freesp; 421 xfs_fsblock_t qblocks; 422 xfs_fsblock_t alloc_blocks = 0; 423 xfs_extlen_t plen; 424 int shift = 0; 425 int qshift = 0; 426 427 /* 428 * As an exception we don't do any preallocation at all if the file is 429 * smaller than the minimum preallocation and we are using the default 430 * dynamic preallocation scheme, as it is likely this is the only write 431 * to the file that is going to be done. 432 */ 433 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)) 434 return 0; 435 436 /* 437 * Use the minimum preallocation size for small files or if we are 438 * writing right after a hole. 439 */ 440 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) || 441 !xfs_iext_prev_extent(ifp, &ncur, &prev) || 442 prev.br_startoff + prev.br_blockcount < offset_fsb) 443 return mp->m_allocsize_blocks; 444 445 /* 446 * Take the size of the preceding data extents as the basis for the 447 * preallocation size. Note that we don't care if the previous extents 448 * are written or not. 449 */ 450 plen = prev.br_blockcount; 451 while (xfs_iext_prev_extent(ifp, &ncur, &got)) { 452 if (plen > XFS_MAX_BMBT_EXTLEN / 2 || 453 isnullstartblock(got.br_startblock) || 454 got.br_startoff + got.br_blockcount != prev.br_startoff || 455 got.br_startblock + got.br_blockcount != prev.br_startblock) 456 break; 457 plen += got.br_blockcount; 458 prev = got; 459 } 460 461 /* 462 * If the size of the extents is greater than half the maximum extent 463 * length, then use the current offset as the basis. This ensures that 464 * for large files the preallocation size always extends to 465 * XFS_BMBT_MAX_EXTLEN rather than falling short due to things like stripe 466 * unit/width alignment of real extents. 467 */ 468 alloc_blocks = plen * 2; 469 if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) 470 alloc_blocks = XFS_B_TO_FSB(mp, offset); 471 qblocks = alloc_blocks; 472 473 /* 474 * XFS_BMBT_MAX_EXTLEN is not a power of two value but we round the prealloc 475 * down to the nearest power of two value after throttling. To prevent 476 * the round down from unconditionally reducing the maximum supported 477 * prealloc size, we round up first, apply appropriate throttling, round 478 * down and cap the value to XFS_BMBT_MAX_EXTLEN. 479 */ 480 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(XFS_MAX_BMBT_EXTLEN), 481 alloc_blocks); 482 483 freesp = percpu_counter_read_positive(&mp->m_fdblocks); 484 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 485 shift = 2; 486 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 487 shift++; 488 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) 489 shift++; 490 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) 491 shift++; 492 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) 493 shift++; 494 } 495 496 /* 497 * Check each quota to cap the prealloc size, provide a shift value to 498 * throttle with and adjust amount of available space. 499 */ 500 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_USER, alloc_blocks)) 501 xfs_quota_calc_throttle(ip, XFS_DQTYPE_USER, &qblocks, &qshift, 502 &freesp); 503 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_GROUP, alloc_blocks)) 504 xfs_quota_calc_throttle(ip, XFS_DQTYPE_GROUP, &qblocks, &qshift, 505 &freesp); 506 if (xfs_quota_need_throttle(ip, XFS_DQTYPE_PROJ, alloc_blocks)) 507 xfs_quota_calc_throttle(ip, XFS_DQTYPE_PROJ, &qblocks, &qshift, 508 &freesp); 509 510 /* 511 * The final prealloc size is set to the minimum of free space available 512 * in each of the quotas and the overall filesystem. 513 * 514 * The shift throttle value is set to the maximum value as determined by 515 * the global low free space values and per-quota low free space values. 516 */ 517 alloc_blocks = min(alloc_blocks, qblocks); 518 shift = max(shift, qshift); 519 520 if (shift) 521 alloc_blocks >>= shift; 522 /* 523 * rounddown_pow_of_two() returns an undefined result if we pass in 524 * alloc_blocks = 0. 525 */ 526 if (alloc_blocks) 527 alloc_blocks = rounddown_pow_of_two(alloc_blocks); 528 if (alloc_blocks > XFS_MAX_BMBT_EXTLEN) 529 alloc_blocks = XFS_MAX_BMBT_EXTLEN; 530 531 /* 532 * If we are still trying to allocate more space than is 533 * available, squash the prealloc hard. This can happen if we 534 * have a large file on a small filesystem and the above 535 * lowspace thresholds are smaller than XFS_BMBT_MAX_EXTLEN. 536 */ 537 while (alloc_blocks && alloc_blocks >= freesp) 538 alloc_blocks >>= 4; 539 if (alloc_blocks < mp->m_allocsize_blocks) 540 alloc_blocks = mp->m_allocsize_blocks; 541 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, 542 mp->m_allocsize_blocks); 543 return alloc_blocks; 544 } 545 546 int 547 xfs_iomap_write_unwritten( 548 xfs_inode_t *ip, 549 xfs_off_t offset, 550 xfs_off_t count, 551 bool update_isize) 552 { 553 xfs_mount_t *mp = ip->i_mount; 554 xfs_fileoff_t offset_fsb; 555 xfs_filblks_t count_fsb; 556 xfs_filblks_t numblks_fsb; 557 int nimaps; 558 xfs_trans_t *tp; 559 xfs_bmbt_irec_t imap; 560 struct inode *inode = VFS_I(ip); 561 xfs_fsize_t i_size; 562 uint resblks; 563 int error; 564 565 trace_xfs_unwritten_convert(ip, offset, count); 566 567 offset_fsb = XFS_B_TO_FSBT(mp, offset); 568 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 569 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 570 571 /* 572 * Reserve enough blocks in this transaction for two complete extent 573 * btree splits. We may be converting the middle part of an unwritten 574 * extent and in this case we will insert two new extents in the btree 575 * each of which could cause a full split. 576 * 577 * This reservation amount will be used in the first call to 578 * xfs_bmbt_split() to select an AG with enough space to satisfy the 579 * rest of the operation. 580 */ 581 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 582 583 /* Attach dquots so that bmbt splits are accounted correctly. */ 584 error = xfs_qm_dqattach(ip); 585 if (error) 586 return error; 587 588 do { 589 /* 590 * Set up a transaction to convert the range of extents 591 * from unwritten to real. Do allocations in a loop until 592 * we have covered the range passed in. 593 * 594 * Note that we can't risk to recursing back into the filesystem 595 * here as we might be asked to write out the same inode that we 596 * complete here and might deadlock on the iolock. 597 */ 598 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 599 0, true, &tp); 600 if (error) 601 return error; 602 603 error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, 604 XFS_IEXT_WRITE_UNWRITTEN_CNT); 605 if (error == -EFBIG) 606 error = xfs_iext_count_upgrade(tp, ip, 607 XFS_IEXT_WRITE_UNWRITTEN_CNT); 608 if (error) 609 goto error_on_bmapi_transaction; 610 611 /* 612 * Modify the unwritten extent state of the buffer. 613 */ 614 nimaps = 1; 615 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 616 XFS_BMAPI_CONVERT, resblks, &imap, 617 &nimaps); 618 if (error) 619 goto error_on_bmapi_transaction; 620 621 /* 622 * Log the updated inode size as we go. We have to be careful 623 * to only log it up to the actual write offset if it is 624 * halfway into a block. 625 */ 626 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 627 if (i_size > offset + count) 628 i_size = offset + count; 629 if (update_isize && i_size > i_size_read(inode)) 630 i_size_write(inode, i_size); 631 i_size = xfs_new_eof(ip, i_size); 632 if (i_size) { 633 ip->i_disk_size = i_size; 634 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 635 } 636 637 error = xfs_trans_commit(tp); 638 xfs_iunlock(ip, XFS_ILOCK_EXCL); 639 if (error) 640 return error; 641 642 if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock))) 643 return xfs_alert_fsblock_zero(ip, &imap); 644 645 if ((numblks_fsb = imap.br_blockcount) == 0) { 646 /* 647 * The numblks_fsb value should always get 648 * smaller, otherwise the loop is stuck. 649 */ 650 ASSERT(imap.br_blockcount); 651 break; 652 } 653 offset_fsb += numblks_fsb; 654 count_fsb -= numblks_fsb; 655 } while (count_fsb > 0); 656 657 return 0; 658 659 error_on_bmapi_transaction: 660 xfs_trans_cancel(tp); 661 xfs_iunlock(ip, XFS_ILOCK_EXCL); 662 return error; 663 } 664 665 static inline bool 666 imap_needs_alloc( 667 struct inode *inode, 668 unsigned flags, 669 struct xfs_bmbt_irec *imap, 670 int nimaps) 671 { 672 /* don't allocate blocks when just zeroing */ 673 if (flags & IOMAP_ZERO) 674 return false; 675 if (!nimaps || 676 imap->br_startblock == HOLESTARTBLOCK || 677 imap->br_startblock == DELAYSTARTBLOCK) 678 return true; 679 /* we convert unwritten extents before copying the data for DAX */ 680 if ((flags & IOMAP_DAX) && imap->br_state == XFS_EXT_UNWRITTEN) 681 return true; 682 return false; 683 } 684 685 static inline bool 686 imap_needs_cow( 687 struct xfs_inode *ip, 688 unsigned int flags, 689 struct xfs_bmbt_irec *imap, 690 int nimaps) 691 { 692 if (!xfs_is_cow_inode(ip)) 693 return false; 694 695 /* when zeroing we don't have to COW holes or unwritten extents */ 696 if (flags & IOMAP_ZERO) { 697 if (!nimaps || 698 imap->br_startblock == HOLESTARTBLOCK || 699 imap->br_state == XFS_EXT_UNWRITTEN) 700 return false; 701 } 702 703 return true; 704 } 705 706 static int 707 xfs_ilock_for_iomap( 708 struct xfs_inode *ip, 709 unsigned flags, 710 unsigned *lockmode) 711 { 712 unsigned int mode = *lockmode; 713 bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); 714 715 /* 716 * COW writes may allocate delalloc space or convert unwritten COW 717 * extents, so we need to make sure to take the lock exclusively here. 718 */ 719 if (xfs_is_cow_inode(ip) && is_write) 720 mode = XFS_ILOCK_EXCL; 721 722 /* 723 * Extents not yet cached requires exclusive access, don't block. This 724 * is an opencoded xfs_ilock_data_map_shared() call but with 725 * non-blocking behaviour. 726 */ 727 if (xfs_need_iread_extents(&ip->i_df)) { 728 if (flags & IOMAP_NOWAIT) 729 return -EAGAIN; 730 mode = XFS_ILOCK_EXCL; 731 } 732 733 relock: 734 if (flags & IOMAP_NOWAIT) { 735 if (!xfs_ilock_nowait(ip, mode)) 736 return -EAGAIN; 737 } else { 738 xfs_ilock(ip, mode); 739 } 740 741 /* 742 * The reflink iflag could have changed since the earlier unlocked 743 * check, so if we got ILOCK_SHARED for a write and but we're now a 744 * reflink inode we have to switch to ILOCK_EXCL and relock. 745 */ 746 if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_cow_inode(ip)) { 747 xfs_iunlock(ip, mode); 748 mode = XFS_ILOCK_EXCL; 749 goto relock; 750 } 751 752 *lockmode = mode; 753 return 0; 754 } 755 756 /* 757 * Check that the imap we are going to return to the caller spans the entire 758 * range that the caller requested for the IO. 759 */ 760 static bool 761 imap_spans_range( 762 struct xfs_bmbt_irec *imap, 763 xfs_fileoff_t offset_fsb, 764 xfs_fileoff_t end_fsb) 765 { 766 if (imap->br_startoff > offset_fsb) 767 return false; 768 if (imap->br_startoff + imap->br_blockcount < end_fsb) 769 return false; 770 return true; 771 } 772 773 static int 774 xfs_direct_write_iomap_begin( 775 struct inode *inode, 776 loff_t offset, 777 loff_t length, 778 unsigned flags, 779 struct iomap *iomap, 780 struct iomap *srcmap) 781 { 782 struct xfs_inode *ip = XFS_I(inode); 783 struct xfs_mount *mp = ip->i_mount; 784 struct xfs_bmbt_irec imap, cmap; 785 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 786 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 787 int nimaps = 1, error = 0; 788 bool shared = false; 789 u16 iomap_flags = 0; 790 unsigned int lockmode = XFS_ILOCK_SHARED; 791 u64 seq; 792 793 ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); 794 795 if (xfs_is_shutdown(mp)) 796 return -EIO; 797 798 /* 799 * Writes that span EOF might trigger an IO size update on completion, 800 * so consider them to be dirty for the purposes of O_DSYNC even if 801 * there is no other metadata changes pending or have been made here. 802 */ 803 if (offset + length > i_size_read(inode)) 804 iomap_flags |= IOMAP_F_DIRTY; 805 806 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 807 if (error) 808 return error; 809 810 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 811 &nimaps, 0); 812 if (error) 813 goto out_unlock; 814 815 if (imap_needs_cow(ip, flags, &imap, nimaps)) { 816 error = -EAGAIN; 817 if (flags & IOMAP_NOWAIT) 818 goto out_unlock; 819 820 /* may drop and re-acquire the ilock */ 821 error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared, 822 &lockmode, 823 (flags & IOMAP_DIRECT) || IS_DAX(inode)); 824 if (error) 825 goto out_unlock; 826 if (shared) 827 goto out_found_cow; 828 end_fsb = imap.br_startoff + imap.br_blockcount; 829 length = XFS_FSB_TO_B(mp, end_fsb) - offset; 830 } 831 832 if (imap_needs_alloc(inode, flags, &imap, nimaps)) 833 goto allocate_blocks; 834 835 /* 836 * NOWAIT and OVERWRITE I/O needs to span the entire requested I/O with 837 * a single map so that we avoid partial IO failures due to the rest of 838 * the I/O range not covered by this map triggering an EAGAIN condition 839 * when it is subsequently mapped and aborting the I/O. 840 */ 841 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) { 842 error = -EAGAIN; 843 if (!imap_spans_range(&imap, offset_fsb, end_fsb)) 844 goto out_unlock; 845 } 846 847 /* 848 * For overwrite only I/O, we cannot convert unwritten extents without 849 * requiring sub-block zeroing. This can only be done under an 850 * exclusive IOLOCK, hence return -EAGAIN if this is not a written 851 * extent to tell the caller to try again. 852 */ 853 if (flags & IOMAP_OVERWRITE_ONLY) { 854 error = -EAGAIN; 855 if (imap.br_state != XFS_EXT_NORM && 856 ((offset | length) & mp->m_blockmask)) 857 goto out_unlock; 858 } 859 860 seq = xfs_iomap_inode_sequence(ip, iomap_flags); 861 xfs_iunlock(ip, lockmode); 862 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 863 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); 864 865 allocate_blocks: 866 error = -EAGAIN; 867 if (flags & (IOMAP_NOWAIT | IOMAP_OVERWRITE_ONLY)) 868 goto out_unlock; 869 870 /* 871 * We cap the maximum length we map to a sane size to keep the chunks 872 * of work done where somewhat symmetric with the work writeback does. 873 * This is a completely arbitrary number pulled out of thin air as a 874 * best guess for initial testing. 875 * 876 * Note that the values needs to be less than 32-bits wide until the 877 * lower level functions are updated. 878 */ 879 length = min_t(loff_t, length, 1024 * PAGE_SIZE); 880 end_fsb = xfs_iomap_end_fsb(mp, offset, length); 881 882 if (offset + length > XFS_ISIZE(ip)) 883 end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); 884 else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) 885 end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); 886 xfs_iunlock(ip, lockmode); 887 888 error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, 889 flags, &imap, &seq); 890 if (error) 891 return error; 892 893 trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); 894 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 895 iomap_flags | IOMAP_F_NEW, seq); 896 897 out_found_cow: 898 length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); 899 trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); 900 if (imap.br_startblock != HOLESTARTBLOCK) { 901 seq = xfs_iomap_inode_sequence(ip, 0); 902 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 903 if (error) 904 goto out_unlock; 905 } 906 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 907 xfs_iunlock(ip, lockmode); 908 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); 909 910 out_unlock: 911 if (lockmode) 912 xfs_iunlock(ip, lockmode); 913 return error; 914 } 915 916 const struct iomap_ops xfs_direct_write_iomap_ops = { 917 .iomap_begin = xfs_direct_write_iomap_begin, 918 }; 919 920 static int 921 xfs_dax_write_iomap_end( 922 struct inode *inode, 923 loff_t pos, 924 loff_t length, 925 ssize_t written, 926 unsigned flags, 927 struct iomap *iomap) 928 { 929 struct xfs_inode *ip = XFS_I(inode); 930 931 if (!xfs_is_cow_inode(ip)) 932 return 0; 933 934 if (!written) { 935 xfs_reflink_cancel_cow_range(ip, pos, length, true); 936 return 0; 937 } 938 939 return xfs_reflink_end_cow(ip, pos, written); 940 } 941 942 const struct iomap_ops xfs_dax_write_iomap_ops = { 943 .iomap_begin = xfs_direct_write_iomap_begin, 944 .iomap_end = xfs_dax_write_iomap_end, 945 }; 946 947 static int 948 xfs_buffered_write_iomap_begin( 949 struct inode *inode, 950 loff_t offset, 951 loff_t count, 952 unsigned flags, 953 struct iomap *iomap, 954 struct iomap *srcmap) 955 { 956 struct xfs_inode *ip = XFS_I(inode); 957 struct xfs_mount *mp = ip->i_mount; 958 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 959 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count); 960 struct xfs_bmbt_irec imap, cmap; 961 struct xfs_iext_cursor icur, ccur; 962 xfs_fsblock_t prealloc_blocks = 0; 963 bool eof = false, cow_eof = false, shared = false; 964 int allocfork = XFS_DATA_FORK; 965 int error = 0; 966 unsigned int lockmode = XFS_ILOCK_EXCL; 967 u64 seq; 968 969 if (xfs_is_shutdown(mp)) 970 return -EIO; 971 972 /* we can't use delayed allocations when using extent size hints */ 973 if (xfs_get_extsz_hint(ip)) 974 return xfs_direct_write_iomap_begin(inode, offset, count, 975 flags, iomap, srcmap); 976 977 ASSERT(!XFS_IS_REALTIME_INODE(ip)); 978 979 error = xfs_qm_dqattach(ip); 980 if (error) 981 return error; 982 983 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 984 if (error) 985 return error; 986 987 if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(&ip->i_df)) || 988 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { 989 error = -EFSCORRUPTED; 990 goto out_unlock; 991 } 992 993 XFS_STATS_INC(mp, xs_blk_mapw); 994 995 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 996 if (error) 997 goto out_unlock; 998 999 /* 1000 * Search the data fork first to look up our source mapping. We 1001 * always need the data fork map, as we have to return it to the 1002 * iomap code so that the higher level write code can read data in to 1003 * perform read-modify-write cycles for unaligned writes. 1004 */ 1005 eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap); 1006 if (eof) 1007 imap.br_startoff = end_fsb; /* fake hole until the end */ 1008 1009 /* We never need to allocate blocks for zeroing or unsharing a hole. */ 1010 if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) && 1011 imap.br_startoff > offset_fsb) { 1012 xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); 1013 goto out_unlock; 1014 } 1015 1016 /* 1017 * Search the COW fork extent list even if we did not find a data fork 1018 * extent. This serves two purposes: first this implements the 1019 * speculative preallocation using cowextsize, so that we also unshare 1020 * block adjacent to shared blocks instead of just the shared blocks 1021 * themselves. Second the lookup in the extent list is generally faster 1022 * than going out to the shared extent tree. 1023 */ 1024 if (xfs_is_cow_inode(ip)) { 1025 if (!ip->i_cowfp) { 1026 ASSERT(!xfs_is_reflink_inode(ip)); 1027 xfs_ifork_init_cow(ip); 1028 } 1029 cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, 1030 &ccur, &cmap); 1031 if (!cow_eof && cmap.br_startoff <= offset_fsb) { 1032 trace_xfs_reflink_cow_found(ip, &cmap); 1033 goto found_cow; 1034 } 1035 } 1036 1037 if (imap.br_startoff <= offset_fsb) { 1038 /* 1039 * For reflink files we may need a delalloc reservation when 1040 * overwriting shared extents. This includes zeroing of 1041 * existing extents that contain data. 1042 */ 1043 if (!xfs_is_cow_inode(ip) || 1044 ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) { 1045 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 1046 &imap); 1047 goto found_imap; 1048 } 1049 1050 xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); 1051 1052 /* Trim the mapping to the nearest shared extent boundary. */ 1053 error = xfs_bmap_trim_cow(ip, &imap, &shared); 1054 if (error) 1055 goto out_unlock; 1056 1057 /* Not shared? Just report the (potentially capped) extent. */ 1058 if (!shared) { 1059 trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK, 1060 &imap); 1061 goto found_imap; 1062 } 1063 1064 /* 1065 * Fork all the shared blocks from our write offset until the 1066 * end of the extent. 1067 */ 1068 allocfork = XFS_COW_FORK; 1069 end_fsb = imap.br_startoff + imap.br_blockcount; 1070 } else { 1071 /* 1072 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES 1073 * pages to keep the chunks of work done where somewhat 1074 * symmetric with the work writeback does. This is a completely 1075 * arbitrary number pulled out of thin air. 1076 * 1077 * Note that the values needs to be less than 32-bits wide until 1078 * the lower level functions are updated. 1079 */ 1080 count = min_t(loff_t, count, 1024 * PAGE_SIZE); 1081 end_fsb = xfs_iomap_end_fsb(mp, offset, count); 1082 1083 if (xfs_is_always_cow_inode(ip)) 1084 allocfork = XFS_COW_FORK; 1085 } 1086 1087 if (eof && offset + count > XFS_ISIZE(ip)) { 1088 /* 1089 * Determine the initial size of the preallocation. 1090 * We clean up any extra preallocation when the file is closed. 1091 */ 1092 if (xfs_has_allocsize(mp)) 1093 prealloc_blocks = mp->m_allocsize_blocks; 1094 else if (allocfork == XFS_DATA_FORK) 1095 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 1096 offset, count, &icur); 1097 else 1098 prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, 1099 offset, count, &ccur); 1100 if (prealloc_blocks) { 1101 xfs_extlen_t align; 1102 xfs_off_t end_offset; 1103 xfs_fileoff_t p_end_fsb; 1104 1105 end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1); 1106 p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) + 1107 prealloc_blocks; 1108 1109 align = xfs_eof_alignment(ip); 1110 if (align) 1111 p_end_fsb = roundup_64(p_end_fsb, align); 1112 1113 p_end_fsb = min(p_end_fsb, 1114 XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)); 1115 ASSERT(p_end_fsb > offset_fsb); 1116 prealloc_blocks = p_end_fsb - end_fsb; 1117 } 1118 } 1119 1120 retry: 1121 error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb, 1122 end_fsb - offset_fsb, prealloc_blocks, 1123 allocfork == XFS_DATA_FORK ? &imap : &cmap, 1124 allocfork == XFS_DATA_FORK ? &icur : &ccur, 1125 allocfork == XFS_DATA_FORK ? eof : cow_eof); 1126 switch (error) { 1127 case 0: 1128 break; 1129 case -ENOSPC: 1130 case -EDQUOT: 1131 /* retry without any preallocation */ 1132 trace_xfs_delalloc_enospc(ip, offset, count); 1133 if (prealloc_blocks) { 1134 prealloc_blocks = 0; 1135 goto retry; 1136 } 1137 fallthrough; 1138 default: 1139 goto out_unlock; 1140 } 1141 1142 if (allocfork == XFS_COW_FORK) { 1143 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap); 1144 goto found_cow; 1145 } 1146 1147 /* 1148 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch 1149 * them out if the write happens to fail. 1150 */ 1151 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); 1152 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1153 trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); 1154 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); 1155 1156 found_imap: 1157 seq = xfs_iomap_inode_sequence(ip, 0); 1158 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1159 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); 1160 1161 found_cow: 1162 seq = xfs_iomap_inode_sequence(ip, 0); 1163 if (imap.br_startoff <= offset_fsb) { 1164 error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); 1165 if (error) 1166 goto out_unlock; 1167 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1168 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1169 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 1170 IOMAP_F_SHARED, seq); 1171 } 1172 1173 xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); 1174 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1175 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); 1176 1177 out_unlock: 1178 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1179 return error; 1180 } 1181 1182 static int 1183 xfs_buffered_write_delalloc_punch( 1184 struct inode *inode, 1185 loff_t offset, 1186 loff_t length) 1187 { 1188 return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, 1189 offset + length); 1190 } 1191 1192 static int 1193 xfs_buffered_write_iomap_end( 1194 struct inode *inode, 1195 loff_t offset, 1196 loff_t length, 1197 ssize_t written, 1198 unsigned flags, 1199 struct iomap *iomap) 1200 { 1201 1202 struct xfs_mount *mp = XFS_M(inode->i_sb); 1203 int error; 1204 1205 error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, 1206 length, written, &xfs_buffered_write_delalloc_punch); 1207 if (error && !xfs_is_shutdown(mp)) { 1208 xfs_alert(mp, "%s: unable to clean up ino 0x%llx", 1209 __func__, XFS_I(inode)->i_ino); 1210 return error; 1211 } 1212 return 0; 1213 } 1214 1215 const struct iomap_ops xfs_buffered_write_iomap_ops = { 1216 .iomap_begin = xfs_buffered_write_iomap_begin, 1217 .iomap_end = xfs_buffered_write_iomap_end, 1218 }; 1219 1220 /* 1221 * iomap_page_mkwrite() will never fail in a way that requires delalloc extents 1222 * that it allocated to be revoked. Hence we do not need an .iomap_end method 1223 * for this operation. 1224 */ 1225 const struct iomap_ops xfs_page_mkwrite_iomap_ops = { 1226 .iomap_begin = xfs_buffered_write_iomap_begin, 1227 }; 1228 1229 static int 1230 xfs_read_iomap_begin( 1231 struct inode *inode, 1232 loff_t offset, 1233 loff_t length, 1234 unsigned flags, 1235 struct iomap *iomap, 1236 struct iomap *srcmap) 1237 { 1238 struct xfs_inode *ip = XFS_I(inode); 1239 struct xfs_mount *mp = ip->i_mount; 1240 struct xfs_bmbt_irec imap; 1241 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1242 xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 1243 int nimaps = 1, error = 0; 1244 bool shared = false; 1245 unsigned int lockmode = XFS_ILOCK_SHARED; 1246 u64 seq; 1247 1248 ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); 1249 1250 if (xfs_is_shutdown(mp)) 1251 return -EIO; 1252 1253 error = xfs_ilock_for_iomap(ip, flags, &lockmode); 1254 if (error) 1255 return error; 1256 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1257 &nimaps, 0); 1258 if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode))) 1259 error = xfs_reflink_trim_around_shared(ip, &imap, &shared); 1260 seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); 1261 xfs_iunlock(ip, lockmode); 1262 1263 if (error) 1264 return error; 1265 trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); 1266 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 1267 shared ? IOMAP_F_SHARED : 0, seq); 1268 } 1269 1270 const struct iomap_ops xfs_read_iomap_ops = { 1271 .iomap_begin = xfs_read_iomap_begin, 1272 }; 1273 1274 static int 1275 xfs_seek_iomap_begin( 1276 struct inode *inode, 1277 loff_t offset, 1278 loff_t length, 1279 unsigned flags, 1280 struct iomap *iomap, 1281 struct iomap *srcmap) 1282 { 1283 struct xfs_inode *ip = XFS_I(inode); 1284 struct xfs_mount *mp = ip->i_mount; 1285 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1286 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1287 xfs_fileoff_t cow_fsb = NULLFILEOFF, data_fsb = NULLFILEOFF; 1288 struct xfs_iext_cursor icur; 1289 struct xfs_bmbt_irec imap, cmap; 1290 int error = 0; 1291 unsigned lockmode; 1292 u64 seq; 1293 1294 if (xfs_is_shutdown(mp)) 1295 return -EIO; 1296 1297 lockmode = xfs_ilock_data_map_shared(ip); 1298 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1299 if (error) 1300 goto out_unlock; 1301 1302 if (xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) { 1303 /* 1304 * If we found a data extent we are done. 1305 */ 1306 if (imap.br_startoff <= offset_fsb) 1307 goto done; 1308 data_fsb = imap.br_startoff; 1309 } else { 1310 /* 1311 * Fake a hole until the end of the file. 1312 */ 1313 data_fsb = xfs_iomap_end_fsb(mp, offset, length); 1314 } 1315 1316 /* 1317 * If a COW fork extent covers the hole, report it - capped to the next 1318 * data fork extent: 1319 */ 1320 if (xfs_inode_has_cow_data(ip) && 1321 xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) 1322 cow_fsb = cmap.br_startoff; 1323 if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 1324 if (data_fsb < cow_fsb + cmap.br_blockcount) 1325 end_fsb = min(end_fsb, data_fsb); 1326 xfs_trim_extent(&cmap, offset_fsb, end_fsb); 1327 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1328 error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 1329 IOMAP_F_SHARED, seq); 1330 /* 1331 * This is a COW extent, so we must probe the page cache 1332 * because there could be dirty page cache being backed 1333 * by this extent. 1334 */ 1335 iomap->type = IOMAP_UNWRITTEN; 1336 goto out_unlock; 1337 } 1338 1339 /* 1340 * Else report a hole, capped to the next found data or COW extent. 1341 */ 1342 if (cow_fsb != NULLFILEOFF && cow_fsb < data_fsb) 1343 imap.br_blockcount = cow_fsb - offset_fsb; 1344 else 1345 imap.br_blockcount = data_fsb - offset_fsb; 1346 imap.br_startoff = offset_fsb; 1347 imap.br_startblock = HOLESTARTBLOCK; 1348 imap.br_state = XFS_EXT_NORM; 1349 done: 1350 seq = xfs_iomap_inode_sequence(ip, 0); 1351 xfs_trim_extent(&imap, offset_fsb, end_fsb); 1352 error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); 1353 out_unlock: 1354 xfs_iunlock(ip, lockmode); 1355 return error; 1356 } 1357 1358 const struct iomap_ops xfs_seek_iomap_ops = { 1359 .iomap_begin = xfs_seek_iomap_begin, 1360 }; 1361 1362 static int 1363 xfs_xattr_iomap_begin( 1364 struct inode *inode, 1365 loff_t offset, 1366 loff_t length, 1367 unsigned flags, 1368 struct iomap *iomap, 1369 struct iomap *srcmap) 1370 { 1371 struct xfs_inode *ip = XFS_I(inode); 1372 struct xfs_mount *mp = ip->i_mount; 1373 xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1374 xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1375 struct xfs_bmbt_irec imap; 1376 int nimaps = 1, error = 0; 1377 unsigned lockmode; 1378 int seq; 1379 1380 if (xfs_is_shutdown(mp)) 1381 return -EIO; 1382 1383 lockmode = xfs_ilock_attr_map_shared(ip); 1384 1385 /* if there are no attribute fork or extents, return ENOENT */ 1386 if (!xfs_inode_has_attr_fork(ip) || !ip->i_af.if_nextents) { 1387 error = -ENOENT; 1388 goto out_unlock; 1389 } 1390 1391 ASSERT(ip->i_af.if_format != XFS_DINODE_FMT_LOCAL); 1392 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1393 &nimaps, XFS_BMAPI_ATTRFORK); 1394 out_unlock: 1395 1396 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); 1397 xfs_iunlock(ip, lockmode); 1398 1399 if (error) 1400 return error; 1401 ASSERT(nimaps); 1402 return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); 1403 } 1404 1405 const struct iomap_ops xfs_xattr_iomap_ops = { 1406 .iomap_begin = xfs_xattr_iomap_begin, 1407 }; 1408 1409 int 1410 xfs_zero_range( 1411 struct xfs_inode *ip, 1412 loff_t pos, 1413 loff_t len, 1414 bool *did_zero) 1415 { 1416 struct inode *inode = VFS_I(ip); 1417 1418 if (IS_DAX(inode)) 1419 return dax_zero_range(inode, pos, len, did_zero, 1420 &xfs_dax_write_iomap_ops); 1421 return iomap_zero_range(inode, pos, len, did_zero, 1422 &xfs_buffered_write_iomap_ops); 1423 } 1424 1425 int 1426 xfs_truncate_page( 1427 struct xfs_inode *ip, 1428 loff_t pos, 1429 bool *did_zero) 1430 { 1431 struct inode *inode = VFS_I(ip); 1432 1433 if (IS_DAX(inode)) 1434 return dax_truncate_page(inode, pos, did_zero, 1435 &xfs_dax_write_iomap_ops); 1436 return iomap_truncate_page(inode, pos, did_zero, 1437 &xfs_buffered_write_iomap_ops); 1438 } 1439