1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_inode.h" 16 #include "xfs_trans.h" 17 #include "xfs_trans_priv.h" 18 #include "xfs_exchmaps_item.h" 19 #include "xfs_exchmaps.h" 20 #include "xfs_log.h" 21 #include "xfs_bmap.h" 22 #include "xfs_icache.h" 23 #include "xfs_bmap_btree.h" 24 #include "xfs_trans_space.h" 25 #include "xfs_error.h" 26 #include "xfs_log_priv.h" 27 #include "xfs_log_recover.h" 28 #include "xfs_exchrange.h" 29 #include "xfs_trace.h" 30 31 struct kmem_cache *xfs_xmi_cache; 32 struct kmem_cache *xfs_xmd_cache; 33 34 static const struct xfs_item_ops xfs_xmi_item_ops; 35 36 static inline struct xfs_xmi_log_item *XMI_ITEM(struct xfs_log_item *lip) 37 { 38 return container_of(lip, struct xfs_xmi_log_item, xmi_item); 39 } 40 41 STATIC void 42 xfs_xmi_item_free( 43 struct xfs_xmi_log_item *xmi_lip) 44 { 45 kvfree(xmi_lip->xmi_item.li_lv_shadow); 46 kmem_cache_free(xfs_xmi_cache, xmi_lip); 47 } 48 49 /* 50 * Freeing the XMI requires that we remove it from the AIL if it has already 51 * been placed there. However, the XMI may not yet have been placed in the AIL 52 * when called by xfs_xmi_release() from XMD processing due to the ordering of 53 * committed vs unpin operations in bulk insert operations. Hence the reference 54 * count to ensure only the last caller frees the XMI. 55 */ 56 STATIC void 57 xfs_xmi_release( 58 struct xfs_xmi_log_item *xmi_lip) 59 { 60 ASSERT(atomic_read(&xmi_lip->xmi_refcount) > 0); 61 if (atomic_dec_and_test(&xmi_lip->xmi_refcount)) { 62 xfs_trans_ail_delete(&xmi_lip->xmi_item, 0); 63 xfs_xmi_item_free(xmi_lip); 64 } 65 } 66 67 68 STATIC void 69 xfs_xmi_item_size( 70 struct xfs_log_item *lip, 71 int *nvecs, 72 int *nbytes) 73 { 74 *nvecs += 1; 75 *nbytes += sizeof(struct xfs_xmi_log_format); 76 } 77 78 /* 79 * This is called to fill in the vector of log iovecs for the given xmi log 80 * item. We use only 1 iovec, and we point that at the xmi_log_format structure 81 * embedded in the xmi item. 82 */ 83 STATIC void 84 xfs_xmi_item_format( 85 struct xfs_log_item *lip, 86 struct xfs_log_vec *lv) 87 { 88 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); 89 struct xfs_log_iovec *vecp = NULL; 90 91 xmi_lip->xmi_format.xmi_type = XFS_LI_XMI; 92 xmi_lip->xmi_format.xmi_size = 1; 93 94 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT, 95 &xmi_lip->xmi_format, 96 sizeof(struct xfs_xmi_log_format)); 97 } 98 99 /* 100 * The unpin operation is the last place an XMI is manipulated in the log. It 101 * is either inserted in the AIL or aborted in the event of a log I/O error. In 102 * either case, the XMI transaction has been successfully committed to make it 103 * this far. Therefore, we expect whoever committed the XMI to either construct 104 * and commit the XMD or drop the XMD's reference in the event of error. Simply 105 * drop the log's XMI reference now that the log is done with it. 106 */ 107 STATIC void 108 xfs_xmi_item_unpin( 109 struct xfs_log_item *lip, 110 int remove) 111 { 112 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); 113 114 xfs_xmi_release(xmi_lip); 115 } 116 117 /* 118 * The XMI has been either committed or aborted if the transaction has been 119 * cancelled. If the transaction was cancelled, an XMD isn't going to be 120 * constructed and thus we free the XMI here directly. 121 */ 122 STATIC void 123 xfs_xmi_item_release( 124 struct xfs_log_item *lip) 125 { 126 xfs_xmi_release(XMI_ITEM(lip)); 127 } 128 129 /* Allocate and initialize an xmi item. */ 130 STATIC struct xfs_xmi_log_item * 131 xfs_xmi_init( 132 struct xfs_mount *mp) 133 134 { 135 struct xfs_xmi_log_item *xmi_lip; 136 137 xmi_lip = kmem_cache_zalloc(xfs_xmi_cache, GFP_KERNEL | __GFP_NOFAIL); 138 139 xfs_log_item_init(mp, &xmi_lip->xmi_item, XFS_LI_XMI, &xfs_xmi_item_ops); 140 xmi_lip->xmi_format.xmi_id = (uintptr_t)(void *)xmi_lip; 141 atomic_set(&xmi_lip->xmi_refcount, 2); 142 143 return xmi_lip; 144 } 145 146 static inline struct xfs_xmd_log_item *XMD_ITEM(struct xfs_log_item *lip) 147 { 148 return container_of(lip, struct xfs_xmd_log_item, xmd_item); 149 } 150 151 STATIC void 152 xfs_xmd_item_size( 153 struct xfs_log_item *lip, 154 int *nvecs, 155 int *nbytes) 156 { 157 *nvecs += 1; 158 *nbytes += sizeof(struct xfs_xmd_log_format); 159 } 160 161 /* 162 * This is called to fill in the vector of log iovecs for the given xmd log 163 * item. We use only 1 iovec, and we point that at the xmd_log_format structure 164 * embedded in the xmd item. 165 */ 166 STATIC void 167 xfs_xmd_item_format( 168 struct xfs_log_item *lip, 169 struct xfs_log_vec *lv) 170 { 171 struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip); 172 struct xfs_log_iovec *vecp = NULL; 173 174 xmd_lip->xmd_format.xmd_type = XFS_LI_XMD; 175 xmd_lip->xmd_format.xmd_size = 1; 176 177 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format, 178 sizeof(struct xfs_xmd_log_format)); 179 } 180 181 /* 182 * The XMD is either committed or aborted if the transaction is cancelled. If 183 * the transaction is cancelled, drop our reference to the XMI and free the 184 * XMD. 185 */ 186 STATIC void 187 xfs_xmd_item_release( 188 struct xfs_log_item *lip) 189 { 190 struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip); 191 192 xfs_xmi_release(xmd_lip->xmd_intent_log_item); 193 kvfree(xmd_lip->xmd_item.li_lv_shadow); 194 kmem_cache_free(xfs_xmd_cache, xmd_lip); 195 } 196 197 static struct xfs_log_item * 198 xfs_xmd_item_intent( 199 struct xfs_log_item *lip) 200 { 201 return &XMD_ITEM(lip)->xmd_intent_log_item->xmi_item; 202 } 203 204 static const struct xfs_item_ops xfs_xmd_item_ops = { 205 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | 206 XFS_ITEM_INTENT_DONE, 207 .iop_size = xfs_xmd_item_size, 208 .iop_format = xfs_xmd_item_format, 209 .iop_release = xfs_xmd_item_release, 210 .iop_intent = xfs_xmd_item_intent, 211 }; 212 213 /* Log file mapping exchange information in the intent item. */ 214 STATIC struct xfs_log_item * 215 xfs_exchmaps_create_intent( 216 struct xfs_trans *tp, 217 struct list_head *items, 218 unsigned int count, 219 bool sort) 220 { 221 struct xfs_xmi_log_item *xmi_lip; 222 struct xfs_exchmaps_intent *xmi; 223 struct xfs_xmi_log_format *xlf; 224 225 ASSERT(count == 1); 226 227 xmi = list_first_entry_or_null(items, struct xfs_exchmaps_intent, 228 xmi_list); 229 230 xmi_lip = xfs_xmi_init(tp->t_mountp); 231 xlf = &xmi_lip->xmi_format; 232 233 xlf->xmi_inode1 = xmi->xmi_ip1->i_ino; 234 xlf->xmi_igen1 = VFS_I(xmi->xmi_ip1)->i_generation; 235 xlf->xmi_inode2 = xmi->xmi_ip2->i_ino; 236 xlf->xmi_igen2 = VFS_I(xmi->xmi_ip2)->i_generation; 237 xlf->xmi_startoff1 = xmi->xmi_startoff1; 238 xlf->xmi_startoff2 = xmi->xmi_startoff2; 239 xlf->xmi_blockcount = xmi->xmi_blockcount; 240 xlf->xmi_isize1 = xmi->xmi_isize1; 241 xlf->xmi_isize2 = xmi->xmi_isize2; 242 xlf->xmi_flags = xmi->xmi_flags & XFS_EXCHMAPS_LOGGED_FLAGS; 243 244 return &xmi_lip->xmi_item; 245 } 246 247 STATIC struct xfs_log_item * 248 xfs_exchmaps_create_done( 249 struct xfs_trans *tp, 250 struct xfs_log_item *intent, 251 unsigned int count) 252 { 253 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(intent); 254 struct xfs_xmd_log_item *xmd_lip; 255 256 xmd_lip = kmem_cache_zalloc(xfs_xmd_cache, GFP_KERNEL | __GFP_NOFAIL); 257 xfs_log_item_init(tp->t_mountp, &xmd_lip->xmd_item, XFS_LI_XMD, 258 &xfs_xmd_item_ops); 259 xmd_lip->xmd_intent_log_item = xmi_lip; 260 xmd_lip->xmd_format.xmd_xmi_id = xmi_lip->xmi_format.xmi_id; 261 262 return &xmd_lip->xmd_item; 263 } 264 265 /* Add this deferred XMI to the transaction. */ 266 void 267 xfs_exchmaps_defer_add( 268 struct xfs_trans *tp, 269 struct xfs_exchmaps_intent *xmi) 270 { 271 trace_xfs_exchmaps_defer(tp->t_mountp, xmi); 272 273 xfs_defer_add(tp, &xmi->xmi_list, &xfs_exchmaps_defer_type); 274 } 275 276 static inline struct xfs_exchmaps_intent *xmi_entry(const struct list_head *e) 277 { 278 return list_entry(e, struct xfs_exchmaps_intent, xmi_list); 279 } 280 281 /* Cancel a deferred file mapping exchange. */ 282 STATIC void 283 xfs_exchmaps_cancel_item( 284 struct list_head *item) 285 { 286 struct xfs_exchmaps_intent *xmi = xmi_entry(item); 287 288 kmem_cache_free(xfs_exchmaps_intent_cache, xmi); 289 } 290 291 /* Process a deferred file mapping exchange. */ 292 STATIC int 293 xfs_exchmaps_finish_item( 294 struct xfs_trans *tp, 295 struct xfs_log_item *done, 296 struct list_head *item, 297 struct xfs_btree_cur **state) 298 { 299 struct xfs_exchmaps_intent *xmi = xmi_entry(item); 300 int error; 301 302 /* 303 * Exchange one more mappings between two files. If there's still more 304 * work to do, we want to requeue ourselves after all other pending 305 * deferred operations have finished. This includes all of the dfops 306 * that we queued directly as well as any new ones created in the 307 * process of finishing the others. Doing so prevents us from queuing 308 * a large number of XMI log items in kernel memory, which in turn 309 * prevents us from pinning the tail of the log (while logging those 310 * new XMI items) until the first XMI items can be processed. 311 */ 312 error = xfs_exchmaps_finish_one(tp, xmi); 313 if (error != -EAGAIN) 314 xfs_exchmaps_cancel_item(item); 315 return error; 316 } 317 318 /* Abort all pending XMIs. */ 319 STATIC void 320 xfs_exchmaps_abort_intent( 321 struct xfs_log_item *intent) 322 { 323 xfs_xmi_release(XMI_ITEM(intent)); 324 } 325 326 /* Is this recovered XMI ok? */ 327 static inline bool 328 xfs_xmi_validate( 329 struct xfs_mount *mp, 330 struct xfs_xmi_log_item *xmi_lip) 331 { 332 struct xfs_xmi_log_format *xlf = &xmi_lip->xmi_format; 333 334 if (!xfs_has_exchange_range(mp)) 335 return false; 336 337 if (xmi_lip->xmi_format.__pad != 0) 338 return false; 339 340 if (xlf->xmi_flags & ~XFS_EXCHMAPS_LOGGED_FLAGS) 341 return false; 342 343 if (!xfs_verify_ino(mp, xlf->xmi_inode1) || 344 !xfs_verify_ino(mp, xlf->xmi_inode2)) 345 return false; 346 347 if (!xfs_verify_fileext(mp, xlf->xmi_startoff1, xlf->xmi_blockcount)) 348 return false; 349 350 return xfs_verify_fileext(mp, xlf->xmi_startoff2, xlf->xmi_blockcount); 351 } 352 353 /* 354 * Use the recovered log state to create a new request, estimate resource 355 * requirements, and create a new incore intent state. 356 */ 357 STATIC struct xfs_exchmaps_intent * 358 xfs_xmi_item_recover_intent( 359 struct xfs_mount *mp, 360 struct xfs_defer_pending *dfp, 361 const struct xfs_xmi_log_format *xlf, 362 struct xfs_exchmaps_req *req, 363 struct xfs_inode **ipp1, 364 struct xfs_inode **ipp2) 365 { 366 struct xfs_inode *ip1, *ip2; 367 struct xfs_exchmaps_intent *xmi; 368 int error; 369 370 /* 371 * Grab both inodes and set IRECOVERY to prevent trimming of post-eof 372 * mappings and freeing of unlinked inodes until we're totally done 373 * processing files. The ondisk format of this new log item contains 374 * file handle information, which is why recovery for other items do 375 * not check the inode generation number. 376 */ 377 error = xlog_recover_iget_handle(mp, xlf->xmi_inode1, xlf->xmi_igen1, 378 &ip1); 379 if (error) { 380 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf, 381 sizeof(*xlf)); 382 return ERR_PTR(error); 383 } 384 385 error = xlog_recover_iget_handle(mp, xlf->xmi_inode2, xlf->xmi_igen2, 386 &ip2); 387 if (error) { 388 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf, 389 sizeof(*xlf)); 390 goto err_rele1; 391 } 392 393 req->ip1 = ip1; 394 req->ip2 = ip2; 395 req->startoff1 = xlf->xmi_startoff1; 396 req->startoff2 = xlf->xmi_startoff2; 397 req->blockcount = xlf->xmi_blockcount; 398 req->flags = xlf->xmi_flags & XFS_EXCHMAPS_PARAMS; 399 400 xfs_exchrange_ilock(NULL, ip1, ip2); 401 error = xfs_exchmaps_estimate(req); 402 xfs_exchrange_iunlock(ip1, ip2); 403 if (error) 404 goto err_rele2; 405 406 *ipp1 = ip1; 407 *ipp2 = ip2; 408 xmi = xfs_exchmaps_init_intent(req); 409 xfs_defer_add_item(dfp, &xmi->xmi_list); 410 return xmi; 411 412 err_rele2: 413 xfs_irele(ip2); 414 err_rele1: 415 xfs_irele(ip1); 416 req->ip2 = req->ip1 = NULL; 417 return ERR_PTR(error); 418 } 419 420 /* Process a file mapping exchange item that was recovered from the log. */ 421 STATIC int 422 xfs_exchmaps_recover_work( 423 struct xfs_defer_pending *dfp, 424 struct list_head *capture_list) 425 { 426 struct xfs_exchmaps_req req = { .flags = 0 }; 427 struct xfs_trans_res resv; 428 struct xfs_exchmaps_intent *xmi; 429 struct xfs_log_item *lip = dfp->dfp_intent; 430 struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip); 431 struct xfs_mount *mp = lip->li_log->l_mp; 432 struct xfs_trans *tp; 433 struct xfs_inode *ip1, *ip2; 434 int error = 0; 435 436 if (!xfs_xmi_validate(mp, xmi_lip)) { 437 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 438 &xmi_lip->xmi_format, 439 sizeof(xmi_lip->xmi_format)); 440 return -EFSCORRUPTED; 441 } 442 443 xmi = xfs_xmi_item_recover_intent(mp, dfp, &xmi_lip->xmi_format, &req, 444 &ip1, &ip2); 445 if (IS_ERR(xmi)) 446 return PTR_ERR(xmi); 447 448 trace_xfs_exchmaps_recover(mp, xmi); 449 450 resv = xlog_recover_resv(&M_RES(mp)->tr_write); 451 error = xfs_trans_alloc(mp, &resv, req.resblks, 0, 0, &tp); 452 if (error) 453 goto err_rele; 454 455 xfs_exchrange_ilock(tp, ip1, ip2); 456 457 xfs_exchmaps_ensure_reflink(tp, xmi); 458 xfs_exchmaps_upgrade_extent_counts(tp, xmi); 459 error = xlog_recover_finish_intent(tp, dfp); 460 if (error == -EFSCORRUPTED) 461 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 462 &xmi_lip->xmi_format, 463 sizeof(xmi_lip->xmi_format)); 464 if (error) 465 goto err_cancel; 466 467 /* 468 * Commit transaction, which frees the transaction and saves the inodes 469 * for later replay activities. 470 */ 471 error = xfs_defer_ops_capture_and_commit(tp, capture_list); 472 goto err_unlock; 473 474 err_cancel: 475 xfs_trans_cancel(tp); 476 err_unlock: 477 xfs_exchrange_iunlock(ip1, ip2); 478 err_rele: 479 xfs_irele(ip2); 480 xfs_irele(ip1); 481 return error; 482 } 483 484 /* Relog an intent item to push the log tail forward. */ 485 static struct xfs_log_item * 486 xfs_exchmaps_relog_intent( 487 struct xfs_trans *tp, 488 struct xfs_log_item *intent, 489 struct xfs_log_item *done_item) 490 { 491 struct xfs_xmi_log_item *xmi_lip; 492 struct xfs_xmi_log_format *old_xlf, *new_xlf; 493 494 old_xlf = &XMI_ITEM(intent)->xmi_format; 495 496 xmi_lip = xfs_xmi_init(tp->t_mountp); 497 new_xlf = &xmi_lip->xmi_format; 498 499 new_xlf->xmi_inode1 = old_xlf->xmi_inode1; 500 new_xlf->xmi_inode2 = old_xlf->xmi_inode2; 501 new_xlf->xmi_igen1 = old_xlf->xmi_igen1; 502 new_xlf->xmi_igen2 = old_xlf->xmi_igen2; 503 new_xlf->xmi_startoff1 = old_xlf->xmi_startoff1; 504 new_xlf->xmi_startoff2 = old_xlf->xmi_startoff2; 505 new_xlf->xmi_blockcount = old_xlf->xmi_blockcount; 506 new_xlf->xmi_flags = old_xlf->xmi_flags; 507 new_xlf->xmi_isize1 = old_xlf->xmi_isize1; 508 new_xlf->xmi_isize2 = old_xlf->xmi_isize2; 509 510 return &xmi_lip->xmi_item; 511 } 512 513 const struct xfs_defer_op_type xfs_exchmaps_defer_type = { 514 .name = "exchmaps", 515 .max_items = 1, 516 .create_intent = xfs_exchmaps_create_intent, 517 .abort_intent = xfs_exchmaps_abort_intent, 518 .create_done = xfs_exchmaps_create_done, 519 .finish_item = xfs_exchmaps_finish_item, 520 .cancel_item = xfs_exchmaps_cancel_item, 521 .recover_work = xfs_exchmaps_recover_work, 522 .relog_intent = xfs_exchmaps_relog_intent, 523 }; 524 525 STATIC bool 526 xfs_xmi_item_match( 527 struct xfs_log_item *lip, 528 uint64_t intent_id) 529 { 530 return XMI_ITEM(lip)->xmi_format.xmi_id == intent_id; 531 } 532 533 static const struct xfs_item_ops xfs_xmi_item_ops = { 534 .flags = XFS_ITEM_INTENT, 535 .iop_size = xfs_xmi_item_size, 536 .iop_format = xfs_xmi_item_format, 537 .iop_unpin = xfs_xmi_item_unpin, 538 .iop_release = xfs_xmi_item_release, 539 .iop_match = xfs_xmi_item_match, 540 }; 541 542 /* 543 * This routine is called to create an in-core file mapping exchange item from 544 * the xmi format structure which was logged on disk. It allocates an in-core 545 * xmi, copies the exchange information from the format structure into it, and 546 * adds the xmi to the AIL with the given LSN. 547 */ 548 STATIC int 549 xlog_recover_xmi_commit_pass2( 550 struct xlog *log, 551 struct list_head *buffer_list, 552 struct xlog_recover_item *item, 553 xfs_lsn_t lsn) 554 { 555 struct xfs_mount *mp = log->l_mp; 556 struct xfs_xmi_log_item *xmi_lip; 557 struct xfs_xmi_log_format *xmi_formatp; 558 size_t len; 559 560 len = sizeof(struct xfs_xmi_log_format); 561 if (item->ri_buf[0].i_len != len) { 562 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 563 return -EFSCORRUPTED; 564 } 565 566 xmi_formatp = item->ri_buf[0].i_addr; 567 if (xmi_formatp->__pad != 0) { 568 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 569 return -EFSCORRUPTED; 570 } 571 572 xmi_lip = xfs_xmi_init(mp); 573 memcpy(&xmi_lip->xmi_format, xmi_formatp, len); 574 575 xlog_recover_intent_item(log, &xmi_lip->xmi_item, lsn, 576 &xfs_exchmaps_defer_type); 577 return 0; 578 } 579 580 const struct xlog_recover_item_ops xlog_xmi_item_ops = { 581 .item_type = XFS_LI_XMI, 582 .commit_pass2 = xlog_recover_xmi_commit_pass2, 583 }; 584 585 /* 586 * This routine is called when an XMD format structure is found in a committed 587 * transaction in the log. Its purpose is to cancel the corresponding XMI if it 588 * was still in the log. To do this it searches the AIL for the XMI with an id 589 * equal to that in the XMD format structure. If we find it we drop the XMD 590 * reference, which removes the XMI from the AIL and frees it. 591 */ 592 STATIC int 593 xlog_recover_xmd_commit_pass2( 594 struct xlog *log, 595 struct list_head *buffer_list, 596 struct xlog_recover_item *item, 597 xfs_lsn_t lsn) 598 { 599 struct xfs_xmd_log_format *xmd_formatp; 600 601 xmd_formatp = item->ri_buf[0].i_addr; 602 if (item->ri_buf[0].i_len != sizeof(struct xfs_xmd_log_format)) { 603 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 604 return -EFSCORRUPTED; 605 } 606 607 xlog_recover_release_intent(log, XFS_LI_XMI, xmd_formatp->xmd_xmi_id); 608 return 0; 609 } 610 611 const struct xlog_recover_item_ops xlog_xmd_item_ops = { 612 .item_type = XFS_LI_XMD, 613 .commit_pass2 = xlog_recover_xmd_commit_pass2, 614 }; 615