1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_trans.h" 16 #include "xfs_trans_priv.h" 17 #include "xfs_refcount_item.h" 18 #include "xfs_log.h" 19 #include "xfs_refcount.h" 20 #include "xfs_error.h" 21 #include "xfs_log_priv.h" 22 #include "xfs_log_recover.h" 23 #include "xfs_ag.h" 24 #include "xfs_btree.h" 25 #include "xfs_trace.h" 26 27 struct kmem_cache *xfs_cui_cache; 28 struct kmem_cache *xfs_cud_cache; 29 30 static const struct xfs_item_ops xfs_cui_item_ops; 31 32 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 33 { 34 return container_of(lip, struct xfs_cui_log_item, cui_item); 35 } 36 37 STATIC void 38 xfs_cui_item_free( 39 struct xfs_cui_log_item *cuip) 40 { 41 kvfree(cuip->cui_item.li_lv_shadow); 42 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 43 kfree(cuip); 44 else 45 kmem_cache_free(xfs_cui_cache, cuip); 46 } 47 48 /* 49 * Freeing the CUI requires that we remove it from the AIL if it has already 50 * been placed there. However, the CUI may not yet have been placed in the AIL 51 * when called by xfs_cui_release() from CUD processing due to the ordering of 52 * committed vs unpin operations in bulk insert operations. Hence the reference 53 * count to ensure only the last caller frees the CUI. 54 */ 55 STATIC void 56 xfs_cui_release( 57 struct xfs_cui_log_item *cuip) 58 { 59 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 60 if (!atomic_dec_and_test(&cuip->cui_refcount)) 61 return; 62 63 xfs_trans_ail_delete(&cuip->cui_item, 0); 64 xfs_cui_item_free(cuip); 65 } 66 67 68 STATIC void 69 xfs_cui_item_size( 70 struct xfs_log_item *lip, 71 int *nvecs, 72 int *nbytes) 73 { 74 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 75 76 *nvecs += 1; 77 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 78 } 79 80 /* 81 * This is called to fill in the vector of log iovecs for the 82 * given cui log item. We use only 1 iovec, and we point that 83 * at the cui_log_format structure embedded in the cui item. 84 * It is at this point that we assert that all of the extent 85 * slots in the cui item have been filled. 86 */ 87 STATIC void 88 xfs_cui_item_format( 89 struct xfs_log_item *lip, 90 struct xfs_log_vec *lv) 91 { 92 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 93 struct xfs_log_iovec *vecp = NULL; 94 95 ASSERT(atomic_read(&cuip->cui_next_extent) == 96 cuip->cui_format.cui_nextents); 97 98 cuip->cui_format.cui_type = XFS_LI_CUI; 99 cuip->cui_format.cui_size = 1; 100 101 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 102 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 103 } 104 105 /* 106 * The unpin operation is the last place an CUI is manipulated in the log. It is 107 * either inserted in the AIL or aborted in the event of a log I/O error. In 108 * either case, the CUI transaction has been successfully committed to make it 109 * this far. Therefore, we expect whoever committed the CUI to either construct 110 * and commit the CUD or drop the CUD's reference in the event of error. Simply 111 * drop the log's CUI reference now that the log is done with it. 112 */ 113 STATIC void 114 xfs_cui_item_unpin( 115 struct xfs_log_item *lip, 116 int remove) 117 { 118 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 119 120 xfs_cui_release(cuip); 121 } 122 123 /* 124 * The CUI has been either committed or aborted if the transaction has been 125 * cancelled. If the transaction was cancelled, an CUD isn't going to be 126 * constructed and thus we free the CUI here directly. 127 */ 128 STATIC void 129 xfs_cui_item_release( 130 struct xfs_log_item *lip) 131 { 132 xfs_cui_release(CUI_ITEM(lip)); 133 } 134 135 /* 136 * Allocate and initialize an cui item with the given number of extents. 137 */ 138 STATIC struct xfs_cui_log_item * 139 xfs_cui_init( 140 struct xfs_mount *mp, 141 uint nextents) 142 143 { 144 struct xfs_cui_log_item *cuip; 145 146 ASSERT(nextents > 0); 147 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 148 cuip = kzalloc(xfs_cui_log_item_sizeof(nextents), 149 GFP_KERNEL | __GFP_NOFAIL); 150 else 151 cuip = kmem_cache_zalloc(xfs_cui_cache, 152 GFP_KERNEL | __GFP_NOFAIL); 153 154 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 155 cuip->cui_format.cui_nextents = nextents; 156 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 157 atomic_set(&cuip->cui_next_extent, 0); 158 atomic_set(&cuip->cui_refcount, 2); 159 160 return cuip; 161 } 162 163 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 164 { 165 return container_of(lip, struct xfs_cud_log_item, cud_item); 166 } 167 168 STATIC void 169 xfs_cud_item_size( 170 struct xfs_log_item *lip, 171 int *nvecs, 172 int *nbytes) 173 { 174 *nvecs += 1; 175 *nbytes += sizeof(struct xfs_cud_log_format); 176 } 177 178 /* 179 * This is called to fill in the vector of log iovecs for the 180 * given cud log item. We use only 1 iovec, and we point that 181 * at the cud_log_format structure embedded in the cud item. 182 * It is at this point that we assert that all of the extent 183 * slots in the cud item have been filled. 184 */ 185 STATIC void 186 xfs_cud_item_format( 187 struct xfs_log_item *lip, 188 struct xfs_log_vec *lv) 189 { 190 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 191 struct xfs_log_iovec *vecp = NULL; 192 193 cudp->cud_format.cud_type = XFS_LI_CUD; 194 cudp->cud_format.cud_size = 1; 195 196 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 197 sizeof(struct xfs_cud_log_format)); 198 } 199 200 /* 201 * The CUD is either committed or aborted if the transaction is cancelled. If 202 * the transaction is cancelled, drop our reference to the CUI and free the 203 * CUD. 204 */ 205 STATIC void 206 xfs_cud_item_release( 207 struct xfs_log_item *lip) 208 { 209 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 210 211 xfs_cui_release(cudp->cud_cuip); 212 kvfree(cudp->cud_item.li_lv_shadow); 213 kmem_cache_free(xfs_cud_cache, cudp); 214 } 215 216 static struct xfs_log_item * 217 xfs_cud_item_intent( 218 struct xfs_log_item *lip) 219 { 220 return &CUD_ITEM(lip)->cud_cuip->cui_item; 221 } 222 223 static const struct xfs_item_ops xfs_cud_item_ops = { 224 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | 225 XFS_ITEM_INTENT_DONE, 226 .iop_size = xfs_cud_item_size, 227 .iop_format = xfs_cud_item_format, 228 .iop_release = xfs_cud_item_release, 229 .iop_intent = xfs_cud_item_intent, 230 }; 231 232 static inline struct xfs_refcount_intent *ci_entry(const struct list_head *e) 233 { 234 return list_entry(e, struct xfs_refcount_intent, ri_list); 235 } 236 237 /* Sort refcount intents by AG. */ 238 static int 239 xfs_refcount_update_diff_items( 240 void *priv, 241 const struct list_head *a, 242 const struct list_head *b) 243 { 244 struct xfs_refcount_intent *ra = ci_entry(a); 245 struct xfs_refcount_intent *rb = ci_entry(b); 246 247 return ra->ri_group->xg_gno - rb->ri_group->xg_gno; 248 } 249 250 /* Log refcount updates in the intent item. */ 251 STATIC void 252 xfs_refcount_update_log_item( 253 struct xfs_trans *tp, 254 struct xfs_cui_log_item *cuip, 255 struct xfs_refcount_intent *ri) 256 { 257 uint next_extent; 258 struct xfs_phys_extent *pmap; 259 260 /* 261 * atomic_inc_return gives us the value after the increment; 262 * we want to use it as an array index so we need to subtract 1 from 263 * it. 264 */ 265 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 266 ASSERT(next_extent < cuip->cui_format.cui_nextents); 267 pmap = &cuip->cui_format.cui_extents[next_extent]; 268 pmap->pe_startblock = ri->ri_startblock; 269 pmap->pe_len = ri->ri_blockcount; 270 271 pmap->pe_flags = 0; 272 switch (ri->ri_type) { 273 case XFS_REFCOUNT_INCREASE: 274 case XFS_REFCOUNT_DECREASE: 275 case XFS_REFCOUNT_ALLOC_COW: 276 case XFS_REFCOUNT_FREE_COW: 277 pmap->pe_flags |= ri->ri_type; 278 break; 279 default: 280 ASSERT(0); 281 } 282 } 283 284 static struct xfs_log_item * 285 xfs_refcount_update_create_intent( 286 struct xfs_trans *tp, 287 struct list_head *items, 288 unsigned int count, 289 bool sort) 290 { 291 struct xfs_mount *mp = tp->t_mountp; 292 struct xfs_cui_log_item *cuip = xfs_cui_init(mp, count); 293 struct xfs_refcount_intent *ri; 294 295 ASSERT(count > 0); 296 297 if (sort) 298 list_sort(mp, items, xfs_refcount_update_diff_items); 299 list_for_each_entry(ri, items, ri_list) 300 xfs_refcount_update_log_item(tp, cuip, ri); 301 return &cuip->cui_item; 302 } 303 304 /* Get an CUD so we can process all the deferred refcount updates. */ 305 static struct xfs_log_item * 306 xfs_refcount_update_create_done( 307 struct xfs_trans *tp, 308 struct xfs_log_item *intent, 309 unsigned int count) 310 { 311 struct xfs_cui_log_item *cuip = CUI_ITEM(intent); 312 struct xfs_cud_log_item *cudp; 313 314 cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); 315 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 316 &xfs_cud_item_ops); 317 cudp->cud_cuip = cuip; 318 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 319 320 return &cudp->cud_item; 321 } 322 323 /* Add this deferred CUI to the transaction. */ 324 void 325 xfs_refcount_defer_add( 326 struct xfs_trans *tp, 327 struct xfs_refcount_intent *ri) 328 { 329 struct xfs_mount *mp = tp->t_mountp; 330 331 trace_xfs_refcount_defer(mp, ri); 332 333 ri->ri_group = xfs_group_intent_get(mp, ri->ri_startblock, XG_TYPE_AG); 334 xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type); 335 } 336 337 /* Cancel a deferred refcount update. */ 338 STATIC void 339 xfs_refcount_update_cancel_item( 340 struct list_head *item) 341 { 342 struct xfs_refcount_intent *ri = ci_entry(item); 343 344 xfs_group_intent_put(ri->ri_group); 345 kmem_cache_free(xfs_refcount_intent_cache, ri); 346 } 347 348 /* Process a deferred refcount update. */ 349 STATIC int 350 xfs_refcount_update_finish_item( 351 struct xfs_trans *tp, 352 struct xfs_log_item *done, 353 struct list_head *item, 354 struct xfs_btree_cur **state) 355 { 356 struct xfs_refcount_intent *ri = ci_entry(item); 357 int error; 358 359 /* Did we run out of reservation? Requeue what we didn't finish. */ 360 error = xfs_refcount_finish_one(tp, ri, state); 361 if (!error && ri->ri_blockcount > 0) { 362 ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || 363 ri->ri_type == XFS_REFCOUNT_DECREASE); 364 return -EAGAIN; 365 } 366 367 xfs_refcount_update_cancel_item(item); 368 return error; 369 } 370 371 /* Clean up after calling xfs_refcount_finish_one. */ 372 STATIC void 373 xfs_refcount_finish_one_cleanup( 374 struct xfs_trans *tp, 375 struct xfs_btree_cur *rcur, 376 int error) 377 { 378 struct xfs_buf *agbp; 379 380 if (rcur == NULL) 381 return; 382 agbp = rcur->bc_ag.agbp; 383 xfs_btree_del_cursor(rcur, error); 384 if (error) 385 xfs_trans_brelse(tp, agbp); 386 } 387 388 /* Abort all pending CUIs. */ 389 STATIC void 390 xfs_refcount_update_abort_intent( 391 struct xfs_log_item *intent) 392 { 393 xfs_cui_release(CUI_ITEM(intent)); 394 } 395 396 /* Is this recovered CUI ok? */ 397 static inline bool 398 xfs_cui_validate_phys( 399 struct xfs_mount *mp, 400 struct xfs_phys_extent *pmap) 401 { 402 if (!xfs_has_reflink(mp)) 403 return false; 404 405 if (pmap->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) 406 return false; 407 408 switch (pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 409 case XFS_REFCOUNT_INCREASE: 410 case XFS_REFCOUNT_DECREASE: 411 case XFS_REFCOUNT_ALLOC_COW: 412 case XFS_REFCOUNT_FREE_COW: 413 break; 414 default: 415 return false; 416 } 417 418 return xfs_verify_fsbext(mp, pmap->pe_startblock, pmap->pe_len); 419 } 420 421 static inline void 422 xfs_cui_recover_work( 423 struct xfs_mount *mp, 424 struct xfs_defer_pending *dfp, 425 struct xfs_phys_extent *pmap) 426 { 427 struct xfs_refcount_intent *ri; 428 429 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 430 GFP_KERNEL | __GFP_NOFAIL); 431 ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 432 ri->ri_startblock = pmap->pe_startblock; 433 ri->ri_blockcount = pmap->pe_len; 434 ri->ri_group = xfs_group_intent_get(mp, pmap->pe_startblock, 435 XG_TYPE_AG); 436 437 xfs_defer_add_item(dfp, &ri->ri_list); 438 } 439 440 /* 441 * Process a refcount update intent item that was recovered from the log. 442 * We need to update the refcountbt. 443 */ 444 STATIC int 445 xfs_refcount_recover_work( 446 struct xfs_defer_pending *dfp, 447 struct list_head *capture_list) 448 { 449 struct xfs_trans_res resv; 450 struct xfs_log_item *lip = dfp->dfp_intent; 451 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 452 struct xfs_trans *tp; 453 struct xfs_mount *mp = lip->li_log->l_mp; 454 int i; 455 int error = 0; 456 457 /* 458 * First check the validity of the extents described by the 459 * CUI. If any are bad, then assume that all are bad and 460 * just toss the CUI. 461 */ 462 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 463 if (!xfs_cui_validate_phys(mp, 464 &cuip->cui_format.cui_extents[i])) { 465 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 466 &cuip->cui_format, 467 sizeof(cuip->cui_format)); 468 return -EFSCORRUPTED; 469 } 470 471 xfs_cui_recover_work(mp, dfp, &cuip->cui_format.cui_extents[i]); 472 } 473 474 /* 475 * Under normal operation, refcount updates are deferred, so we 476 * wouldn't be adding them directly to a transaction. All 477 * refcount updates manage reservation usage internally and 478 * dynamically by deferring work that won't fit in the 479 * transaction. Normally, any work that needs to be deferred 480 * gets attached to the same defer_ops that scheduled the 481 * refcount update. However, we're in log recovery here, so we 482 * use the passed in defer_ops and to finish up any work that 483 * doesn't fit. We need to reserve enough blocks to handle a 484 * full btree split on either end of the refcount range. 485 */ 486 resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 487 error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, 488 XFS_TRANS_RESERVE, &tp); 489 if (error) 490 return error; 491 492 error = xlog_recover_finish_intent(tp, dfp); 493 if (error == -EFSCORRUPTED) 494 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 495 &cuip->cui_format, 496 sizeof(cuip->cui_format)); 497 if (error) 498 goto abort_error; 499 500 return xfs_defer_ops_capture_and_commit(tp, capture_list); 501 502 abort_error: 503 xfs_trans_cancel(tp); 504 return error; 505 } 506 507 /* Relog an intent item to push the log tail forward. */ 508 static struct xfs_log_item * 509 xfs_refcount_relog_intent( 510 struct xfs_trans *tp, 511 struct xfs_log_item *intent, 512 struct xfs_log_item *done_item) 513 { 514 struct xfs_cui_log_item *cuip; 515 struct xfs_phys_extent *pmap; 516 unsigned int count; 517 518 count = CUI_ITEM(intent)->cui_format.cui_nextents; 519 pmap = CUI_ITEM(intent)->cui_format.cui_extents; 520 521 cuip = xfs_cui_init(tp->t_mountp, count); 522 memcpy(cuip->cui_format.cui_extents, pmap, count * sizeof(*pmap)); 523 atomic_set(&cuip->cui_next_extent, count); 524 525 return &cuip->cui_item; 526 } 527 528 const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 529 .name = "refcount", 530 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 531 .create_intent = xfs_refcount_update_create_intent, 532 .abort_intent = xfs_refcount_update_abort_intent, 533 .create_done = xfs_refcount_update_create_done, 534 .finish_item = xfs_refcount_update_finish_item, 535 .finish_cleanup = xfs_refcount_finish_one_cleanup, 536 .cancel_item = xfs_refcount_update_cancel_item, 537 .recover_work = xfs_refcount_recover_work, 538 .relog_intent = xfs_refcount_relog_intent, 539 }; 540 541 STATIC bool 542 xfs_cui_item_match( 543 struct xfs_log_item *lip, 544 uint64_t intent_id) 545 { 546 return CUI_ITEM(lip)->cui_format.cui_id == intent_id; 547 } 548 549 static const struct xfs_item_ops xfs_cui_item_ops = { 550 .flags = XFS_ITEM_INTENT, 551 .iop_size = xfs_cui_item_size, 552 .iop_format = xfs_cui_item_format, 553 .iop_unpin = xfs_cui_item_unpin, 554 .iop_release = xfs_cui_item_release, 555 .iop_match = xfs_cui_item_match, 556 }; 557 558 static inline void 559 xfs_cui_copy_format( 560 struct xfs_cui_log_format *dst, 561 const struct xfs_cui_log_format *src) 562 { 563 unsigned int i; 564 565 memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); 566 567 for (i = 0; i < src->cui_nextents; i++) 568 memcpy(&dst->cui_extents[i], &src->cui_extents[i], 569 sizeof(struct xfs_phys_extent)); 570 } 571 572 /* 573 * This routine is called to create an in-core extent refcount update 574 * item from the cui format structure which was logged on disk. 575 * It allocates an in-core cui, copies the extents from the format 576 * structure into it, and adds the cui to the AIL with the given 577 * LSN. 578 */ 579 STATIC int 580 xlog_recover_cui_commit_pass2( 581 struct xlog *log, 582 struct list_head *buffer_list, 583 struct xlog_recover_item *item, 584 xfs_lsn_t lsn) 585 { 586 struct xfs_mount *mp = log->l_mp; 587 struct xfs_cui_log_item *cuip; 588 struct xfs_cui_log_format *cui_formatp; 589 size_t len; 590 591 cui_formatp = item->ri_buf[0].i_addr; 592 593 if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { 594 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 595 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 596 return -EFSCORRUPTED; 597 } 598 599 len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); 600 if (item->ri_buf[0].i_len != len) { 601 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 602 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 603 return -EFSCORRUPTED; 604 } 605 606 cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); 607 xfs_cui_copy_format(&cuip->cui_format, cui_formatp); 608 atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); 609 610 xlog_recover_intent_item(log, &cuip->cui_item, lsn, 611 &xfs_refcount_update_defer_type); 612 return 0; 613 } 614 615 const struct xlog_recover_item_ops xlog_cui_item_ops = { 616 .item_type = XFS_LI_CUI, 617 .commit_pass2 = xlog_recover_cui_commit_pass2, 618 }; 619 620 /* 621 * This routine is called when an CUD format structure is found in a committed 622 * transaction in the log. Its purpose is to cancel the corresponding CUI if it 623 * was still in the log. To do this it searches the AIL for the CUI with an id 624 * equal to that in the CUD format structure. If we find it we drop the CUD 625 * reference, which removes the CUI from the AIL and frees it. 626 */ 627 STATIC int 628 xlog_recover_cud_commit_pass2( 629 struct xlog *log, 630 struct list_head *buffer_list, 631 struct xlog_recover_item *item, 632 xfs_lsn_t lsn) 633 { 634 struct xfs_cud_log_format *cud_formatp; 635 636 cud_formatp = item->ri_buf[0].i_addr; 637 if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { 638 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, 639 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 640 return -EFSCORRUPTED; 641 } 642 643 xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id); 644 return 0; 645 } 646 647 const struct xlog_recover_item_ops xlog_cud_item_ops = { 648 .item_type = XFS_LI_CUD, 649 .commit_pass2 = xlog_recover_cud_commit_pass2, 650 }; 651