1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs_platform.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_inode.h" 16 #include "xfs_trans.h" 17 #include "xfs_trans_priv.h" 18 #include "xfs_exchmaps_item.h" 19 #include "xfs_exchmaps.h" 20 #include "xfs_log.h" 21 #include "xfs_bmap.h" 22 #include "xfs_icache.h" 23 #include "xfs_bmap_btree.h" 24 #include "xfs_trans_space.h" 25 #include "xfs_error.h" 26 #include "xfs_log_priv.h" 27 #include "xfs_log_recover.h" 28 #include "xfs_exchrange.h" 29 #include "xfs_trace.h" 30 31 struct kmem_cache *xfs_xmi_cache; 32 struct kmem_cache *xfs_xmd_cache; 33 34 static const struct xfs_item_ops xfs_xmi_item_ops; 35 36 static inline struct xfs_xmi_log_item *XMI_ITEM(struct xfs_log_item *lip) 37 { 38 return container_of(lip, struct xfs_xmi_log_item, xmi_item); 39 } 40 41 STATIC void 42 xfs_xmi_item_free( 43 struct xfs_xmi_log_item *xmi_lip) 44 { 45 kvfree(xmi_lip->xmi_item.li_lv_shadow); 46 kmem_cache_free(xfs_xmi_cache, xmi_lip); 47 } 48 49 /* 50 * Freeing the XMI requires that we remove it from the AIL if it has already 51 * been placed there. However, the XMI may not yet have been placed in the AIL 52 * when called by xfs_xmi_release() from XMD processing due to the ordering of 53 * committed vs unpin operations in bulk insert operations. Hence the reference 54 * count to ensure only the last caller frees the XMI. 55 */ 56 STATIC void 57 xfs_xmi_release( 58 struct xfs_xmi_log_item *xmi_lip) 59 { 60 ASSERT(atomic_read(&xmi_lip->xmi_refcount) > 0); 61 if (atomic_dec_and_test(&xmi_lip->xmi_refcount)) { 62 xfs_trans_ail_delete(&xmi_lip->xmi_item, 0); 63 xfs_xmi_item_free(xmi_lip); 64 } 65 } 66 67 68 STATIC void 69 xfs_xmi_item_size( 70 struct xfs_log_item *lip, 71 int *nvecs, 72 int *nbytes) 73 { 74 *nvecs += 1; 75 *nbytes += sizeof(struct xfs_xmi_log_format); 76 } 77 78 /* 79 * This is called to fill in the vector of log iovecs for the given xmi log 80 * item. We use only 1 iovec, and we point that at the xmi_log_format structure 81 * embedded in the xmi item. 82 */ 83 STATIC void 84 xfs_xmi_item_format( 85 struct xfs_log_item *lip, 86 struct xlog_format_buf *lfb) 87 { 88 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); 89 90 xmi_lip->xmi_format.xmi_type = XFS_LI_XMI; 91 xmi_lip->xmi_format.xmi_size = 1; 92 93 xlog_format_copy(lfb, XLOG_REG_TYPE_XMI_FORMAT, &xmi_lip->xmi_format, 94 sizeof(struct xfs_xmi_log_format)); 95 } 96 97 /* 98 * The unpin operation is the last place an XMI is manipulated in the log. It 99 * is either inserted in the AIL or aborted in the event of a log I/O error. In 100 * either case, the XMI transaction has been successfully committed to make it 101 * this far. Therefore, we expect whoever committed the XMI to either construct 102 * and commit the XMD or drop the XMD's reference in the event of error. Simply 103 * drop the log's XMI reference now that the log is done with it. 104 */ 105 STATIC void 106 xfs_xmi_item_unpin( 107 struct xfs_log_item *lip, 108 int remove) 109 { 110 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); 111 112 xfs_xmi_release(xmi_lip); 113 } 114 115 /* 116 * The XMI has been either committed or aborted if the transaction has been 117 * cancelled. If the transaction was cancelled, an XMD isn't going to be 118 * constructed and thus we free the XMI here directly. 119 */ 120 STATIC void 121 xfs_xmi_item_release( 122 struct xfs_log_item *lip) 123 { 124 xfs_xmi_release(XMI_ITEM(lip)); 125 } 126 127 /* Allocate and initialize an xmi item. */ 128 STATIC struct xfs_xmi_log_item * 129 xfs_xmi_init( 130 struct xfs_mount *mp) 131 132 { 133 struct xfs_xmi_log_item *xmi_lip; 134 135 xmi_lip = kmem_cache_zalloc(xfs_xmi_cache, GFP_KERNEL | __GFP_NOFAIL); 136 137 xfs_log_item_init(mp, &xmi_lip->xmi_item, XFS_LI_XMI, &xfs_xmi_item_ops); 138 xmi_lip->xmi_format.xmi_id = (uintptr_t)(void *)xmi_lip; 139 atomic_set(&xmi_lip->xmi_refcount, 2); 140 141 return xmi_lip; 142 } 143 144 static inline struct xfs_xmd_log_item *XMD_ITEM(struct xfs_log_item *lip) 145 { 146 return container_of(lip, struct xfs_xmd_log_item, xmd_item); 147 } 148 149 STATIC void 150 xfs_xmd_item_size( 151 struct xfs_log_item *lip, 152 int *nvecs, 153 int *nbytes) 154 { 155 *nvecs += 1; 156 *nbytes += sizeof(struct xfs_xmd_log_format); 157 } 158 159 /* 160 * This is called to fill in the vector of log iovecs for the given xmd log 161 * item. We use only 1 iovec, and we point that at the xmd_log_format structure 162 * embedded in the xmd item. 163 */ 164 STATIC void 165 xfs_xmd_item_format( 166 struct xfs_log_item *lip, 167 struct xlog_format_buf *lfb) 168 { 169 struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip); 170 171 xmd_lip->xmd_format.xmd_type = XFS_LI_XMD; 172 xmd_lip->xmd_format.xmd_size = 1; 173 174 xlog_format_copy(lfb, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, 175 sizeof(struct xfs_xmd_log_format)); 176 } 177 178 /* 179 * The XMD is either committed or aborted if the transaction is cancelled. If 180 * the transaction is cancelled, drop our reference to the XMI and free the 181 * XMD. 182 */ 183 STATIC void 184 xfs_xmd_item_release( 185 struct xfs_log_item *lip) 186 { 187 struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip); 188 189 xfs_xmi_release(xmd_lip->xmd_intent_log_item); 190 kvfree(xmd_lip->xmd_item.li_lv_shadow); 191 kmem_cache_free(xfs_xmd_cache, xmd_lip); 192 } 193 194 static struct xfs_log_item * 195 xfs_xmd_item_intent( 196 struct xfs_log_item *lip) 197 { 198 return &XMD_ITEM(lip)->xmd_intent_log_item->xmi_item; 199 } 200 201 static const struct xfs_item_ops xfs_xmd_item_ops = { 202 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | 203 XFS_ITEM_INTENT_DONE, 204 .iop_size = xfs_xmd_item_size, 205 .iop_format = xfs_xmd_item_format, 206 .iop_release = xfs_xmd_item_release, 207 .iop_intent = xfs_xmd_item_intent, 208 }; 209 210 /* Log file mapping exchange information in the intent item. */ 211 STATIC struct xfs_log_item * 212 xfs_exchmaps_create_intent( 213 struct xfs_trans *tp, 214 struct list_head *items, 215 unsigned int count, 216 bool sort) 217 { 218 struct xfs_xmi_log_item *xmi_lip; 219 struct xfs_exchmaps_intent *xmi; 220 struct xfs_xmi_log_format *xlf; 221 222 ASSERT(count == 1); 223 224 xmi = list_first_entry_or_null(items, struct xfs_exchmaps_intent, 225 xmi_list); 226 227 xmi_lip = xfs_xmi_init(tp->t_mountp); 228 xlf = &xmi_lip->xmi_format; 229 230 xlf->xmi_inode1 = xmi->xmi_ip1->i_ino; 231 xlf->xmi_igen1 = VFS_I(xmi->xmi_ip1)->i_generation; 232 xlf->xmi_inode2 = xmi->xmi_ip2->i_ino; 233 xlf->xmi_igen2 = VFS_I(xmi->xmi_ip2)->i_generation; 234 xlf->xmi_startoff1 = xmi->xmi_startoff1; 235 xlf->xmi_startoff2 = xmi->xmi_startoff2; 236 xlf->xmi_blockcount = xmi->xmi_blockcount; 237 xlf->xmi_isize1 = xmi->xmi_isize1; 238 xlf->xmi_isize2 = xmi->xmi_isize2; 239 xlf->xmi_flags = xmi->xmi_flags & XFS_EXCHMAPS_LOGGED_FLAGS; 240 241 return &xmi_lip->xmi_item; 242 } 243 244 STATIC struct xfs_log_item * 245 xfs_exchmaps_create_done( 246 struct xfs_trans *tp, 247 struct xfs_log_item *intent, 248 unsigned int count) 249 { 250 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(intent); 251 struct xfs_xmd_log_item *xmd_lip; 252 253 xmd_lip = kmem_cache_zalloc(xfs_xmd_cache, GFP_KERNEL | __GFP_NOFAIL); 254 xfs_log_item_init(tp->t_mountp, &xmd_lip->xmd_item, XFS_LI_XMD, 255 &xfs_xmd_item_ops); 256 xmd_lip->xmd_intent_log_item = xmi_lip; 257 xmd_lip->xmd_format.xmd_xmi_id = xmi_lip->xmi_format.xmi_id; 258 259 return &xmd_lip->xmd_item; 260 } 261 262 /* Add this deferred XMI to the transaction. */ 263 void 264 xfs_exchmaps_defer_add( 265 struct xfs_trans *tp, 266 struct xfs_exchmaps_intent *xmi) 267 { 268 trace_xfs_exchmaps_defer(tp->t_mountp, xmi); 269 270 xfs_defer_add(tp, &xmi->xmi_list, &xfs_exchmaps_defer_type); 271 } 272 273 static inline struct xfs_exchmaps_intent *xmi_entry(const struct list_head *e) 274 { 275 return list_entry(e, struct xfs_exchmaps_intent, xmi_list); 276 } 277 278 /* Cancel a deferred file mapping exchange. */ 279 STATIC void 280 xfs_exchmaps_cancel_item( 281 struct list_head *item) 282 { 283 struct xfs_exchmaps_intent *xmi = xmi_entry(item); 284 285 kmem_cache_free(xfs_exchmaps_intent_cache, xmi); 286 } 287 288 /* Process a deferred file mapping exchange. */ 289 STATIC int 290 xfs_exchmaps_finish_item( 291 struct xfs_trans *tp, 292 struct xfs_log_item *done, 293 struct list_head *item, 294 struct xfs_btree_cur **state) 295 { 296 struct xfs_exchmaps_intent *xmi = xmi_entry(item); 297 int error; 298 299 /* 300 * Exchange one more mappings between two files. If there's still more 301 * work to do, we want to requeue ourselves after all other pending 302 * deferred operations have finished. This includes all of the dfops 303 * that we queued directly as well as any new ones created in the 304 * process of finishing the others. Doing so prevents us from queuing 305 * a large number of XMI log items in kernel memory, which in turn 306 * prevents us from pinning the tail of the log (while logging those 307 * new XMI items) until the first XMI items can be processed. 308 */ 309 error = xfs_exchmaps_finish_one(tp, xmi); 310 if (error != -EAGAIN) 311 xfs_exchmaps_cancel_item(item); 312 return error; 313 } 314 315 /* Abort all pending XMIs. */ 316 STATIC void 317 xfs_exchmaps_abort_intent( 318 struct xfs_log_item *intent) 319 { 320 xfs_xmi_release(XMI_ITEM(intent)); 321 } 322 323 /* Is this recovered XMI ok? */ 324 static inline bool 325 xfs_xmi_validate( 326 struct xfs_mount *mp, 327 struct xfs_xmi_log_item *xmi_lip) 328 { 329 struct xfs_xmi_log_format *xlf = &xmi_lip->xmi_format; 330 331 if (!xfs_has_exchange_range(mp)) 332 return false; 333 334 if (xmi_lip->xmi_format.__pad != 0) 335 return false; 336 337 if (xlf->xmi_flags & ~XFS_EXCHMAPS_LOGGED_FLAGS) 338 return false; 339 340 if (!xfs_verify_ino(mp, xlf->xmi_inode1) || 341 !xfs_verify_ino(mp, xlf->xmi_inode2)) 342 return false; 343 344 if (!xfs_verify_fileext(mp, xlf->xmi_startoff1, xlf->xmi_blockcount)) 345 return false; 346 347 return xfs_verify_fileext(mp, xlf->xmi_startoff2, xlf->xmi_blockcount); 348 } 349 350 /* 351 * Use the recovered log state to create a new request, estimate resource 352 * requirements, and create a new incore intent state. 353 */ 354 STATIC struct xfs_exchmaps_intent * 355 xfs_xmi_item_recover_intent( 356 struct xfs_mount *mp, 357 struct xfs_defer_pending *dfp, 358 const struct xfs_xmi_log_format *xlf, 359 struct xfs_exchmaps_req *req, 360 struct xfs_inode **ipp1, 361 struct xfs_inode **ipp2) 362 { 363 struct xfs_inode *ip1, *ip2; 364 struct xfs_exchmaps_intent *xmi; 365 int error; 366 367 /* 368 * Grab both inodes and set IRECOVERY to prevent trimming of post-eof 369 * mappings and freeing of unlinked inodes until we're totally done 370 * processing files. The ondisk format of this new log item contains 371 * file handle information, which is why recovery for other items do 372 * not check the inode generation number. 373 */ 374 error = xlog_recover_iget_handle(mp, xlf->xmi_inode1, xlf->xmi_igen1, 375 &ip1); 376 if (error) { 377 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf, 378 sizeof(*xlf)); 379 return ERR_PTR(error); 380 } 381 382 error = xlog_recover_iget_handle(mp, xlf->xmi_inode2, xlf->xmi_igen2, 383 &ip2); 384 if (error) { 385 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf, 386 sizeof(*xlf)); 387 goto err_rele1; 388 } 389 390 req->ip1 = ip1; 391 req->ip2 = ip2; 392 req->startoff1 = xlf->xmi_startoff1; 393 req->startoff2 = xlf->xmi_startoff2; 394 req->blockcount = xlf->xmi_blockcount; 395 req->flags = xlf->xmi_flags & XFS_EXCHMAPS_PARAMS; 396 397 xfs_exchrange_ilock(NULL, ip1, ip2); 398 error = xfs_exchmaps_estimate(req); 399 xfs_exchrange_iunlock(ip1, ip2); 400 if (error) 401 goto err_rele2; 402 403 *ipp1 = ip1; 404 *ipp2 = ip2; 405 xmi = xfs_exchmaps_init_intent(req); 406 xfs_defer_add_item(dfp, &xmi->xmi_list); 407 return xmi; 408 409 err_rele2: 410 xfs_irele(ip2); 411 err_rele1: 412 xfs_irele(ip1); 413 req->ip2 = req->ip1 = NULL; 414 return ERR_PTR(error); 415 } 416 417 /* Process a file mapping exchange item that was recovered from the log. */ 418 STATIC int 419 xfs_exchmaps_recover_work( 420 struct xfs_defer_pending *dfp, 421 struct list_head *capture_list) 422 { 423 struct xfs_exchmaps_req req = { .flags = 0 }; 424 struct xfs_trans_res resv; 425 struct xfs_exchmaps_intent *xmi; 426 struct xfs_log_item *lip = dfp->dfp_intent; 427 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); 428 struct xfs_mount *mp = lip->li_log->l_mp; 429 struct xfs_trans *tp; 430 struct xfs_inode *ip1, *ip2; 431 int error = 0; 432 433 if (!xfs_xmi_validate(mp, xmi_lip)) { 434 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 435 &xmi_lip->xmi_format, 436 sizeof(xmi_lip->xmi_format)); 437 return -EFSCORRUPTED; 438 } 439 440 xmi = xfs_xmi_item_recover_intent(mp, dfp, &xmi_lip->xmi_format, &req, 441 &ip1, &ip2); 442 if (IS_ERR(xmi)) 443 return PTR_ERR(xmi); 444 445 trace_xfs_exchmaps_recover(mp, xmi); 446 447 resv = xlog_recover_resv(&M_RES(mp)->tr_write); 448 error = xfs_trans_alloc(mp, &resv, req.resblks, 0, 0, &tp); 449 if (error) 450 goto err_rele; 451 452 xfs_exchrange_ilock(tp, ip1, ip2); 453 454 xfs_exchmaps_ensure_reflink(tp, xmi); 455 xfs_exchmaps_upgrade_extent_counts(tp, xmi); 456 error = xlog_recover_finish_intent(tp, dfp); 457 if (error == -EFSCORRUPTED) 458 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 459 &xmi_lip->xmi_format, 460 sizeof(xmi_lip->xmi_format)); 461 if (error) 462 goto err_cancel; 463 464 /* 465 * Commit transaction, which frees the transaction and saves the inodes 466 * for later replay activities. 467 */ 468 error = xfs_defer_ops_capture_and_commit(tp, capture_list); 469 goto err_unlock; 470 471 err_cancel: 472 xfs_trans_cancel(tp); 473 err_unlock: 474 xfs_exchrange_iunlock(ip1, ip2); 475 err_rele: 476 xfs_irele(ip2); 477 xfs_irele(ip1); 478 return error; 479 } 480 481 /* Relog an intent item to push the log tail forward. */ 482 static struct xfs_log_item * 483 xfs_exchmaps_relog_intent( 484 struct xfs_trans *tp, 485 struct xfs_log_item *intent, 486 struct xfs_log_item *done_item) 487 { 488 struct xfs_xmi_log_item *xmi_lip; 489 struct xfs_xmi_log_format *old_xlf, *new_xlf; 490 491 old_xlf = &XMI_ITEM(intent)->xmi_format; 492 493 xmi_lip = xfs_xmi_init(tp->t_mountp); 494 new_xlf = &xmi_lip->xmi_format; 495 496 new_xlf->xmi_inode1 = old_xlf->xmi_inode1; 497 new_xlf->xmi_inode2 = old_xlf->xmi_inode2; 498 new_xlf->xmi_igen1 = old_xlf->xmi_igen1; 499 new_xlf->xmi_igen2 = old_xlf->xmi_igen2; 500 new_xlf->xmi_startoff1 = old_xlf->xmi_startoff1; 501 new_xlf->xmi_startoff2 = old_xlf->xmi_startoff2; 502 new_xlf->xmi_blockcount = old_xlf->xmi_blockcount; 503 new_xlf->xmi_flags = old_xlf->xmi_flags; 504 new_xlf->xmi_isize1 = old_xlf->xmi_isize1; 505 new_xlf->xmi_isize2 = old_xlf->xmi_isize2; 506 507 return &xmi_lip->xmi_item; 508 } 509 510 const struct xfs_defer_op_type xfs_exchmaps_defer_type = { 511 .name = "exchmaps", 512 .max_items = 1, 513 .create_intent = xfs_exchmaps_create_intent, 514 .abort_intent = xfs_exchmaps_abort_intent, 515 .create_done = xfs_exchmaps_create_done, 516 .finish_item = xfs_exchmaps_finish_item, 517 .cancel_item = xfs_exchmaps_cancel_item, 518 .recover_work = xfs_exchmaps_recover_work, 519 .relog_intent = xfs_exchmaps_relog_intent, 520 }; 521 522 STATIC bool 523 xfs_xmi_item_match( 524 struct xfs_log_item *lip, 525 uint64_t intent_id) 526 { 527 return XMI_ITEM(lip)->xmi_format.xmi_id == intent_id; 528 } 529 530 static const struct xfs_item_ops xfs_xmi_item_ops = { 531 .flags = XFS_ITEM_INTENT, 532 .iop_size = xfs_xmi_item_size, 533 .iop_format = xfs_xmi_item_format, 534 .iop_unpin = xfs_xmi_item_unpin, 535 .iop_release = xfs_xmi_item_release, 536 .iop_match = xfs_xmi_item_match, 537 }; 538 539 /* 540 * This routine is called to create an in-core file mapping exchange item from 541 * the xmi format structure which was logged on disk. It allocates an in-core 542 * xmi, copies the exchange information from the format structure into it, and 543 * adds the xmi to the AIL with the given LSN. 544 */ 545 STATIC int 546 xlog_recover_xmi_commit_pass2( 547 struct xlog *log, 548 struct list_head *buffer_list, 549 struct xlog_recover_item *item, 550 xfs_lsn_t lsn) 551 { 552 struct xfs_mount *mp = log->l_mp; 553 struct xfs_xmi_log_item *xmi_lip; 554 struct xfs_xmi_log_format *xmi_formatp; 555 size_t len; 556 557 len = sizeof(struct xfs_xmi_log_format); 558 if (item->ri_buf[0].iov_len != len) { 559 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 560 return -EFSCORRUPTED; 561 } 562 563 xmi_formatp = item->ri_buf[0].iov_base; 564 if (xmi_formatp->__pad != 0) { 565 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 566 return -EFSCORRUPTED; 567 } 568 569 xmi_lip = xfs_xmi_init(mp); 570 memcpy(&xmi_lip->xmi_format, xmi_formatp, len); 571 572 xlog_recover_intent_item(log, &xmi_lip->xmi_item, lsn, 573 &xfs_exchmaps_defer_type); 574 return 0; 575 } 576 577 const struct xlog_recover_item_ops xlog_xmi_item_ops = { 578 .item_type = XFS_LI_XMI, 579 .commit_pass2 = xlog_recover_xmi_commit_pass2, 580 }; 581 582 /* 583 * This routine is called when an XMD format structure is found in a committed 584 * transaction in the log. Its purpose is to cancel the corresponding XMI if it 585 * was still in the log. To do this it searches the AIL for the XMI with an id 586 * equal to that in the XMD format structure. If we find it we drop the XMD 587 * reference, which removes the XMI from the AIL and frees it. 588 */ 589 STATIC int 590 xlog_recover_xmd_commit_pass2( 591 struct xlog *log, 592 struct list_head *buffer_list, 593 struct xlog_recover_item *item, 594 xfs_lsn_t lsn) 595 { 596 struct xfs_xmd_log_format *xmd_formatp; 597 598 xmd_formatp = item->ri_buf[0].iov_base; 599 if (item->ri_buf[0].iov_len != sizeof(struct xfs_xmd_log_format)) { 600 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 601 return -EFSCORRUPTED; 602 } 603 604 xlog_recover_release_intent(log, XFS_LI_XMI, xmd_formatp->xmd_xmi_id); 605 return 0; 606 } 607 608 const struct xlog_recover_item_ops xlog_xmd_item_ops = { 609 .item_type = XFS_LI_XMD, 610 .commit_pass2 = xlog_recover_xmd_commit_pass2, 611 }; 612