1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_trans.h" 16 #include "xfs_trans_priv.h" 17 #include "xfs_refcount_item.h" 18 #include "xfs_log.h" 19 #include "xfs_refcount.h" 20 #include "xfs_error.h" 21 #include "xfs_log_priv.h" 22 #include "xfs_log_recover.h" 23 #include "xfs_ag.h" 24 #include "xfs_btree.h" 25 #include "xfs_trace.h" 26 27 struct kmem_cache *xfs_cui_cache; 28 struct kmem_cache *xfs_cud_cache; 29 30 static const struct xfs_item_ops xfs_cui_item_ops; 31 32 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 33 { 34 return container_of(lip, struct xfs_cui_log_item, cui_item); 35 } 36 37 STATIC void 38 xfs_cui_item_free( 39 struct xfs_cui_log_item *cuip) 40 { 41 kvfree(cuip->cui_item.li_lv_shadow); 42 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 43 kfree(cuip); 44 else 45 kmem_cache_free(xfs_cui_cache, cuip); 46 } 47 48 /* 49 * Freeing the CUI requires that we remove it from the AIL if it has already 50 * been placed there. However, the CUI may not yet have been placed in the AIL 51 * when called by xfs_cui_release() from CUD processing due to the ordering of 52 * committed vs unpin operations in bulk insert operations. Hence the reference 53 * count to ensure only the last caller frees the CUI. 54 */ 55 STATIC void 56 xfs_cui_release( 57 struct xfs_cui_log_item *cuip) 58 { 59 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 60 if (!atomic_dec_and_test(&cuip->cui_refcount)) 61 return; 62 63 xfs_trans_ail_delete(&cuip->cui_item, 0); 64 xfs_cui_item_free(cuip); 65 } 66 67 68 STATIC void 69 xfs_cui_item_size( 70 struct xfs_log_item *lip, 71 int *nvecs, 72 int *nbytes) 73 { 74 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 75 76 *nvecs += 1; 77 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 78 } 79 80 /* 81 * This is called to fill in the vector of log iovecs for the 82 * given cui log item. We use only 1 iovec, and we point that 83 * at the cui_log_format structure embedded in the cui item. 84 * It is at this point that we assert that all of the extent 85 * slots in the cui item have been filled. 86 */ 87 STATIC void 88 xfs_cui_item_format( 89 struct xfs_log_item *lip, 90 struct xfs_log_vec *lv) 91 { 92 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 93 struct xfs_log_iovec *vecp = NULL; 94 95 ASSERT(atomic_read(&cuip->cui_next_extent) == 96 cuip->cui_format.cui_nextents); 97 98 cuip->cui_format.cui_type = XFS_LI_CUI; 99 cuip->cui_format.cui_size = 1; 100 101 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 102 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 103 } 104 105 /* 106 * The unpin operation is the last place an CUI is manipulated in the log. It is 107 * either inserted in the AIL or aborted in the event of a log I/O error. In 108 * either case, the CUI transaction has been successfully committed to make it 109 * this far. Therefore, we expect whoever committed the CUI to either construct 110 * and commit the CUD or drop the CUD's reference in the event of error. Simply 111 * drop the log's CUI reference now that the log is done with it. 112 */ 113 STATIC void 114 xfs_cui_item_unpin( 115 struct xfs_log_item *lip, 116 int remove) 117 { 118 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 119 120 xfs_cui_release(cuip); 121 } 122 123 /* 124 * The CUI has been either committed or aborted if the transaction has been 125 * cancelled. If the transaction was cancelled, an CUD isn't going to be 126 * constructed and thus we free the CUI here directly. 127 */ 128 STATIC void 129 xfs_cui_item_release( 130 struct xfs_log_item *lip) 131 { 132 xfs_cui_release(CUI_ITEM(lip)); 133 } 134 135 /* 136 * Allocate and initialize an cui item with the given number of extents. 137 */ 138 STATIC struct xfs_cui_log_item * 139 xfs_cui_init( 140 struct xfs_mount *mp, 141 uint nextents) 142 143 { 144 struct xfs_cui_log_item *cuip; 145 146 ASSERT(nextents > 0); 147 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 148 cuip = kzalloc(xfs_cui_log_item_sizeof(nextents), 149 GFP_KERNEL | __GFP_NOFAIL); 150 else 151 cuip = kmem_cache_zalloc(xfs_cui_cache, 152 GFP_KERNEL | __GFP_NOFAIL); 153 154 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 155 cuip->cui_format.cui_nextents = nextents; 156 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 157 atomic_set(&cuip->cui_next_extent, 0); 158 atomic_set(&cuip->cui_refcount, 2); 159 160 return cuip; 161 } 162 163 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 164 { 165 return container_of(lip, struct xfs_cud_log_item, cud_item); 166 } 167 168 STATIC void 169 xfs_cud_item_size( 170 struct xfs_log_item *lip, 171 int *nvecs, 172 int *nbytes) 173 { 174 *nvecs += 1; 175 *nbytes += sizeof(struct xfs_cud_log_format); 176 } 177 178 /* 179 * This is called to fill in the vector of log iovecs for the 180 * given cud log item. We use only 1 iovec, and we point that 181 * at the cud_log_format structure embedded in the cud item. 182 * It is at this point that we assert that all of the extent 183 * slots in the cud item have been filled. 184 */ 185 STATIC void 186 xfs_cud_item_format( 187 struct xfs_log_item *lip, 188 struct xfs_log_vec *lv) 189 { 190 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 191 struct xfs_log_iovec *vecp = NULL; 192 193 cudp->cud_format.cud_type = XFS_LI_CUD; 194 cudp->cud_format.cud_size = 1; 195 196 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 197 sizeof(struct xfs_cud_log_format)); 198 } 199 200 /* 201 * The CUD is either committed or aborted if the transaction is cancelled. If 202 * the transaction is cancelled, drop our reference to the CUI and free the 203 * CUD. 204 */ 205 STATIC void 206 xfs_cud_item_release( 207 struct xfs_log_item *lip) 208 { 209 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 210 211 xfs_cui_release(cudp->cud_cuip); 212 kvfree(cudp->cud_item.li_lv_shadow); 213 kmem_cache_free(xfs_cud_cache, cudp); 214 } 215 216 static struct xfs_log_item * 217 xfs_cud_item_intent( 218 struct xfs_log_item *lip) 219 { 220 return &CUD_ITEM(lip)->cud_cuip->cui_item; 221 } 222 223 static const struct xfs_item_ops xfs_cud_item_ops = { 224 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | 225 XFS_ITEM_INTENT_DONE, 226 .iop_size = xfs_cud_item_size, 227 .iop_format = xfs_cud_item_format, 228 .iop_release = xfs_cud_item_release, 229 .iop_intent = xfs_cud_item_intent, 230 }; 231 232 static inline struct xfs_refcount_intent *ci_entry(const struct list_head *e) 233 { 234 return list_entry(e, struct xfs_refcount_intent, ri_list); 235 } 236 237 /* Sort refcount intents by AG. */ 238 static int 239 xfs_refcount_update_diff_items( 240 void *priv, 241 const struct list_head *a, 242 const struct list_head *b) 243 { 244 struct xfs_refcount_intent *ra = ci_entry(a); 245 struct xfs_refcount_intent *rb = ci_entry(b); 246 247 return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno; 248 } 249 250 /* Log refcount updates in the intent item. */ 251 STATIC void 252 xfs_refcount_update_log_item( 253 struct xfs_trans *tp, 254 struct xfs_cui_log_item *cuip, 255 struct xfs_refcount_intent *ri) 256 { 257 uint next_extent; 258 struct xfs_phys_extent *pmap; 259 260 /* 261 * atomic_inc_return gives us the value after the increment; 262 * we want to use it as an array index so we need to subtract 1 from 263 * it. 264 */ 265 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 266 ASSERT(next_extent < cuip->cui_format.cui_nextents); 267 pmap = &cuip->cui_format.cui_extents[next_extent]; 268 pmap->pe_startblock = ri->ri_startblock; 269 pmap->pe_len = ri->ri_blockcount; 270 271 pmap->pe_flags = 0; 272 switch (ri->ri_type) { 273 case XFS_REFCOUNT_INCREASE: 274 case XFS_REFCOUNT_DECREASE: 275 case XFS_REFCOUNT_ALLOC_COW: 276 case XFS_REFCOUNT_FREE_COW: 277 pmap->pe_flags |= ri->ri_type; 278 break; 279 default: 280 ASSERT(0); 281 } 282 } 283 284 static struct xfs_log_item * 285 xfs_refcount_update_create_intent( 286 struct xfs_trans *tp, 287 struct list_head *items, 288 unsigned int count, 289 bool sort) 290 { 291 struct xfs_mount *mp = tp->t_mountp; 292 struct xfs_cui_log_item *cuip = xfs_cui_init(mp, count); 293 struct xfs_refcount_intent *ri; 294 295 ASSERT(count > 0); 296 297 if (sort) 298 list_sort(mp, items, xfs_refcount_update_diff_items); 299 list_for_each_entry(ri, items, ri_list) 300 xfs_refcount_update_log_item(tp, cuip, ri); 301 return &cuip->cui_item; 302 } 303 304 /* Get an CUD so we can process all the deferred refcount updates. */ 305 static struct xfs_log_item * 306 xfs_refcount_update_create_done( 307 struct xfs_trans *tp, 308 struct xfs_log_item *intent, 309 unsigned int count) 310 { 311 struct xfs_cui_log_item *cuip = CUI_ITEM(intent); 312 struct xfs_cud_log_item *cudp; 313 314 cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); 315 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 316 &xfs_cud_item_ops); 317 cudp->cud_cuip = cuip; 318 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 319 320 return &cudp->cud_item; 321 } 322 323 /* Add this deferred CUI to the transaction. */ 324 void 325 xfs_refcount_defer_add( 326 struct xfs_trans *tp, 327 struct xfs_refcount_intent *ri) 328 { 329 struct xfs_mount *mp = tp->t_mountp; 330 331 trace_xfs_refcount_defer(mp, ri); 332 333 ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_startblock); 334 xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type); 335 } 336 337 /* Cancel a deferred refcount update. */ 338 STATIC void 339 xfs_refcount_update_cancel_item( 340 struct list_head *item) 341 { 342 struct xfs_refcount_intent *ri = ci_entry(item); 343 344 xfs_perag_intent_put(ri->ri_pag); 345 kmem_cache_free(xfs_refcount_intent_cache, ri); 346 } 347 348 /* Process a deferred refcount update. */ 349 STATIC int 350 xfs_refcount_update_finish_item( 351 struct xfs_trans *tp, 352 struct xfs_log_item *done, 353 struct list_head *item, 354 struct xfs_btree_cur **state) 355 { 356 struct xfs_refcount_intent *ri = ci_entry(item); 357 int error; 358 359 /* Did we run out of reservation? Requeue what we didn't finish. */ 360 error = xfs_refcount_finish_one(tp, ri, state); 361 if (!error && ri->ri_blockcount > 0) { 362 ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || 363 ri->ri_type == XFS_REFCOUNT_DECREASE); 364 return -EAGAIN; 365 } 366 367 xfs_refcount_update_cancel_item(item); 368 return error; 369 } 370 371 /* Clean up after calling xfs_refcount_finish_one. */ 372 STATIC void 373 xfs_refcount_finish_one_cleanup( 374 struct xfs_trans *tp, 375 struct xfs_btree_cur *rcur, 376 int error) 377 { 378 struct xfs_buf *agbp; 379 380 if (rcur == NULL) 381 return; 382 agbp = rcur->bc_ag.agbp; 383 xfs_btree_del_cursor(rcur, error); 384 if (error) 385 xfs_trans_brelse(tp, agbp); 386 } 387 388 /* Abort all pending CUIs. */ 389 STATIC void 390 xfs_refcount_update_abort_intent( 391 struct xfs_log_item *intent) 392 { 393 xfs_cui_release(CUI_ITEM(intent)); 394 } 395 396 /* Is this recovered CUI ok? */ 397 static inline bool 398 xfs_cui_validate_phys( 399 struct xfs_mount *mp, 400 struct xfs_phys_extent *pmap) 401 { 402 if (!xfs_has_reflink(mp)) 403 return false; 404 405 if (pmap->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) 406 return false; 407 408 switch (pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 409 case XFS_REFCOUNT_INCREASE: 410 case XFS_REFCOUNT_DECREASE: 411 case XFS_REFCOUNT_ALLOC_COW: 412 case XFS_REFCOUNT_FREE_COW: 413 break; 414 default: 415 return false; 416 } 417 418 return xfs_verify_fsbext(mp, pmap->pe_startblock, pmap->pe_len); 419 } 420 421 static inline void 422 xfs_cui_recover_work( 423 struct xfs_mount *mp, 424 struct xfs_defer_pending *dfp, 425 struct xfs_phys_extent *pmap) 426 { 427 struct xfs_refcount_intent *ri; 428 429 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 430 GFP_KERNEL | __GFP_NOFAIL); 431 ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 432 ri->ri_startblock = pmap->pe_startblock; 433 ri->ri_blockcount = pmap->pe_len; 434 ri->ri_pag = xfs_perag_intent_get(mp, pmap->pe_startblock); 435 436 xfs_defer_add_item(dfp, &ri->ri_list); 437 } 438 439 /* 440 * Process a refcount update intent item that was recovered from the log. 441 * We need to update the refcountbt. 442 */ 443 STATIC int 444 xfs_refcount_recover_work( 445 struct xfs_defer_pending *dfp, 446 struct list_head *capture_list) 447 { 448 struct xfs_trans_res resv; 449 struct xfs_log_item *lip = dfp->dfp_intent; 450 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 451 struct xfs_trans *tp; 452 struct xfs_mount *mp = lip->li_log->l_mp; 453 int i; 454 int error = 0; 455 456 /* 457 * First check the validity of the extents described by the 458 * CUI. If any are bad, then assume that all are bad and 459 * just toss the CUI. 460 */ 461 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 462 if (!xfs_cui_validate_phys(mp, 463 &cuip->cui_format.cui_extents[i])) { 464 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 465 &cuip->cui_format, 466 sizeof(cuip->cui_format)); 467 return -EFSCORRUPTED; 468 } 469 470 xfs_cui_recover_work(mp, dfp, &cuip->cui_format.cui_extents[i]); 471 } 472 473 /* 474 * Under normal operation, refcount updates are deferred, so we 475 * wouldn't be adding them directly to a transaction. All 476 * refcount updates manage reservation usage internally and 477 * dynamically by deferring work that won't fit in the 478 * transaction. Normally, any work that needs to be deferred 479 * gets attached to the same defer_ops that scheduled the 480 * refcount update. However, we're in log recovery here, so we 481 * use the passed in defer_ops and to finish up any work that 482 * doesn't fit. We need to reserve enough blocks to handle a 483 * full btree split on either end of the refcount range. 484 */ 485 resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 486 error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, 487 XFS_TRANS_RESERVE, &tp); 488 if (error) 489 return error; 490 491 error = xlog_recover_finish_intent(tp, dfp); 492 if (error == -EFSCORRUPTED) 493 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 494 &cuip->cui_format, 495 sizeof(cuip->cui_format)); 496 if (error) 497 goto abort_error; 498 499 return xfs_defer_ops_capture_and_commit(tp, capture_list); 500 501 abort_error: 502 xfs_trans_cancel(tp); 503 return error; 504 } 505 506 /* Relog an intent item to push the log tail forward. */ 507 static struct xfs_log_item * 508 xfs_refcount_relog_intent( 509 struct xfs_trans *tp, 510 struct xfs_log_item *intent, 511 struct xfs_log_item *done_item) 512 { 513 struct xfs_cui_log_item *cuip; 514 struct xfs_phys_extent *pmap; 515 unsigned int count; 516 517 count = CUI_ITEM(intent)->cui_format.cui_nextents; 518 pmap = CUI_ITEM(intent)->cui_format.cui_extents; 519 520 cuip = xfs_cui_init(tp->t_mountp, count); 521 memcpy(cuip->cui_format.cui_extents, pmap, count * sizeof(*pmap)); 522 atomic_set(&cuip->cui_next_extent, count); 523 524 return &cuip->cui_item; 525 } 526 527 const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 528 .name = "refcount", 529 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 530 .create_intent = xfs_refcount_update_create_intent, 531 .abort_intent = xfs_refcount_update_abort_intent, 532 .create_done = xfs_refcount_update_create_done, 533 .finish_item = xfs_refcount_update_finish_item, 534 .finish_cleanup = xfs_refcount_finish_one_cleanup, 535 .cancel_item = xfs_refcount_update_cancel_item, 536 .recover_work = xfs_refcount_recover_work, 537 .relog_intent = xfs_refcount_relog_intent, 538 }; 539 540 STATIC bool 541 xfs_cui_item_match( 542 struct xfs_log_item *lip, 543 uint64_t intent_id) 544 { 545 return CUI_ITEM(lip)->cui_format.cui_id == intent_id; 546 } 547 548 static const struct xfs_item_ops xfs_cui_item_ops = { 549 .flags = XFS_ITEM_INTENT, 550 .iop_size = xfs_cui_item_size, 551 .iop_format = xfs_cui_item_format, 552 .iop_unpin = xfs_cui_item_unpin, 553 .iop_release = xfs_cui_item_release, 554 .iop_match = xfs_cui_item_match, 555 }; 556 557 static inline void 558 xfs_cui_copy_format( 559 struct xfs_cui_log_format *dst, 560 const struct xfs_cui_log_format *src) 561 { 562 unsigned int i; 563 564 memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); 565 566 for (i = 0; i < src->cui_nextents; i++) 567 memcpy(&dst->cui_extents[i], &src->cui_extents[i], 568 sizeof(struct xfs_phys_extent)); 569 } 570 571 /* 572 * This routine is called to create an in-core extent refcount update 573 * item from the cui format structure which was logged on disk. 574 * It allocates an in-core cui, copies the extents from the format 575 * structure into it, and adds the cui to the AIL with the given 576 * LSN. 577 */ 578 STATIC int 579 xlog_recover_cui_commit_pass2( 580 struct xlog *log, 581 struct list_head *buffer_list, 582 struct xlog_recover_item *item, 583 xfs_lsn_t lsn) 584 { 585 struct xfs_mount *mp = log->l_mp; 586 struct xfs_cui_log_item *cuip; 587 struct xfs_cui_log_format *cui_formatp; 588 size_t len; 589 590 cui_formatp = item->ri_buf[0].i_addr; 591 592 if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { 593 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 594 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 595 return -EFSCORRUPTED; 596 } 597 598 len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); 599 if (item->ri_buf[0].i_len != len) { 600 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 601 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 602 return -EFSCORRUPTED; 603 } 604 605 cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); 606 xfs_cui_copy_format(&cuip->cui_format, cui_formatp); 607 atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); 608 609 xlog_recover_intent_item(log, &cuip->cui_item, lsn, 610 &xfs_refcount_update_defer_type); 611 return 0; 612 } 613 614 const struct xlog_recover_item_ops xlog_cui_item_ops = { 615 .item_type = XFS_LI_CUI, 616 .commit_pass2 = xlog_recover_cui_commit_pass2, 617 }; 618 619 /* 620 * This routine is called when an CUD format structure is found in a committed 621 * transaction in the log. Its purpose is to cancel the corresponding CUI if it 622 * was still in the log. To do this it searches the AIL for the CUI with an id 623 * equal to that in the CUD format structure. If we find it we drop the CUD 624 * reference, which removes the CUI from the AIL and frees it. 625 */ 626 STATIC int 627 xlog_recover_cud_commit_pass2( 628 struct xlog *log, 629 struct list_head *buffer_list, 630 struct xlog_recover_item *item, 631 xfs_lsn_t lsn) 632 { 633 struct xfs_cud_log_format *cud_formatp; 634 635 cud_formatp = item->ri_buf[0].i_addr; 636 if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { 637 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, 638 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 639 return -EFSCORRUPTED; 640 } 641 642 xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id); 643 return 0; 644 } 645 646 const struct xlog_recover_item_ops xlog_cud_item_ops = { 647 .item_type = XFS_LI_CUD, 648 .commit_pass2 = xlog_recover_cud_commit_pass2, 649 }; 650