1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2016 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_bit.h" 12 #include "xfs_shared.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_trans.h" 16 #include "xfs_trans_priv.h" 17 #include "xfs_refcount_item.h" 18 #include "xfs_log.h" 19 #include "xfs_refcount.h" 20 #include "xfs_error.h" 21 #include "xfs_log_priv.h" 22 #include "xfs_log_recover.h" 23 #include "xfs_ag.h" 24 25 struct kmem_cache *xfs_cui_cache; 26 struct kmem_cache *xfs_cud_cache; 27 28 static const struct xfs_item_ops xfs_cui_item_ops; 29 30 static inline struct xfs_cui_log_item *CUI_ITEM(struct xfs_log_item *lip) 31 { 32 return container_of(lip, struct xfs_cui_log_item, cui_item); 33 } 34 35 STATIC void 36 xfs_cui_item_free( 37 struct xfs_cui_log_item *cuip) 38 { 39 kvfree(cuip->cui_item.li_lv_shadow); 40 if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) 41 kfree(cuip); 42 else 43 kmem_cache_free(xfs_cui_cache, cuip); 44 } 45 46 /* 47 * Freeing the CUI requires that we remove it from the AIL if it has already 48 * been placed there. However, the CUI may not yet have been placed in the AIL 49 * when called by xfs_cui_release() from CUD processing due to the ordering of 50 * committed vs unpin operations in bulk insert operations. Hence the reference 51 * count to ensure only the last caller frees the CUI. 52 */ 53 STATIC void 54 xfs_cui_release( 55 struct xfs_cui_log_item *cuip) 56 { 57 ASSERT(atomic_read(&cuip->cui_refcount) > 0); 58 if (!atomic_dec_and_test(&cuip->cui_refcount)) 59 return; 60 61 xfs_trans_ail_delete(&cuip->cui_item, 0); 62 xfs_cui_item_free(cuip); 63 } 64 65 66 STATIC void 67 xfs_cui_item_size( 68 struct xfs_log_item *lip, 69 int *nvecs, 70 int *nbytes) 71 { 72 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 73 74 *nvecs += 1; 75 *nbytes += xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents); 76 } 77 78 /* 79 * This is called to fill in the vector of log iovecs for the 80 * given cui log item. We use only 1 iovec, and we point that 81 * at the cui_log_format structure embedded in the cui item. 82 * It is at this point that we assert that all of the extent 83 * slots in the cui item have been filled. 84 */ 85 STATIC void 86 xfs_cui_item_format( 87 struct xfs_log_item *lip, 88 struct xfs_log_vec *lv) 89 { 90 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 91 struct xfs_log_iovec *vecp = NULL; 92 93 ASSERT(atomic_read(&cuip->cui_next_extent) == 94 cuip->cui_format.cui_nextents); 95 96 cuip->cui_format.cui_type = XFS_LI_CUI; 97 cuip->cui_format.cui_size = 1; 98 99 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUI_FORMAT, &cuip->cui_format, 100 xfs_cui_log_format_sizeof(cuip->cui_format.cui_nextents)); 101 } 102 103 /* 104 * The unpin operation is the last place an CUI is manipulated in the log. It is 105 * either inserted in the AIL or aborted in the event of a log I/O error. In 106 * either case, the CUI transaction has been successfully committed to make it 107 * this far. Therefore, we expect whoever committed the CUI to either construct 108 * and commit the CUD or drop the CUD's reference in the event of error. Simply 109 * drop the log's CUI reference now that the log is done with it. 110 */ 111 STATIC void 112 xfs_cui_item_unpin( 113 struct xfs_log_item *lip, 114 int remove) 115 { 116 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 117 118 xfs_cui_release(cuip); 119 } 120 121 /* 122 * The CUI has been either committed or aborted if the transaction has been 123 * cancelled. If the transaction was cancelled, an CUD isn't going to be 124 * constructed and thus we free the CUI here directly. 125 */ 126 STATIC void 127 xfs_cui_item_release( 128 struct xfs_log_item *lip) 129 { 130 xfs_cui_release(CUI_ITEM(lip)); 131 } 132 133 /* 134 * Allocate and initialize an cui item with the given number of extents. 135 */ 136 STATIC struct xfs_cui_log_item * 137 xfs_cui_init( 138 struct xfs_mount *mp, 139 uint nextents) 140 141 { 142 struct xfs_cui_log_item *cuip; 143 144 ASSERT(nextents > 0); 145 if (nextents > XFS_CUI_MAX_FAST_EXTENTS) 146 cuip = kzalloc(xfs_cui_log_item_sizeof(nextents), 147 GFP_KERNEL | __GFP_NOFAIL); 148 else 149 cuip = kmem_cache_zalloc(xfs_cui_cache, 150 GFP_KERNEL | __GFP_NOFAIL); 151 152 xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); 153 cuip->cui_format.cui_nextents = nextents; 154 cuip->cui_format.cui_id = (uintptr_t)(void *)cuip; 155 atomic_set(&cuip->cui_next_extent, 0); 156 atomic_set(&cuip->cui_refcount, 2); 157 158 return cuip; 159 } 160 161 static inline struct xfs_cud_log_item *CUD_ITEM(struct xfs_log_item *lip) 162 { 163 return container_of(lip, struct xfs_cud_log_item, cud_item); 164 } 165 166 STATIC void 167 xfs_cud_item_size( 168 struct xfs_log_item *lip, 169 int *nvecs, 170 int *nbytes) 171 { 172 *nvecs += 1; 173 *nbytes += sizeof(struct xfs_cud_log_format); 174 } 175 176 /* 177 * This is called to fill in the vector of log iovecs for the 178 * given cud log item. We use only 1 iovec, and we point that 179 * at the cud_log_format structure embedded in the cud item. 180 * It is at this point that we assert that all of the extent 181 * slots in the cud item have been filled. 182 */ 183 STATIC void 184 xfs_cud_item_format( 185 struct xfs_log_item *lip, 186 struct xfs_log_vec *lv) 187 { 188 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 189 struct xfs_log_iovec *vecp = NULL; 190 191 cudp->cud_format.cud_type = XFS_LI_CUD; 192 cudp->cud_format.cud_size = 1; 193 194 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_CUD_FORMAT, &cudp->cud_format, 195 sizeof(struct xfs_cud_log_format)); 196 } 197 198 /* 199 * The CUD is either committed or aborted if the transaction is cancelled. If 200 * the transaction is cancelled, drop our reference to the CUI and free the 201 * CUD. 202 */ 203 STATIC void 204 xfs_cud_item_release( 205 struct xfs_log_item *lip) 206 { 207 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 208 209 xfs_cui_release(cudp->cud_cuip); 210 kvfree(cudp->cud_item.li_lv_shadow); 211 kmem_cache_free(xfs_cud_cache, cudp); 212 } 213 214 static struct xfs_log_item * 215 xfs_cud_item_intent( 216 struct xfs_log_item *lip) 217 { 218 return &CUD_ITEM(lip)->cud_cuip->cui_item; 219 } 220 221 static const struct xfs_item_ops xfs_cud_item_ops = { 222 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED | 223 XFS_ITEM_INTENT_DONE, 224 .iop_size = xfs_cud_item_size, 225 .iop_format = xfs_cud_item_format, 226 .iop_release = xfs_cud_item_release, 227 .iop_intent = xfs_cud_item_intent, 228 }; 229 230 /* Sort refcount intents by AG. */ 231 static int 232 xfs_refcount_update_diff_items( 233 void *priv, 234 const struct list_head *a, 235 const struct list_head *b) 236 { 237 struct xfs_refcount_intent *ra; 238 struct xfs_refcount_intent *rb; 239 240 ra = container_of(a, struct xfs_refcount_intent, ri_list); 241 rb = container_of(b, struct xfs_refcount_intent, ri_list); 242 243 return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno; 244 } 245 246 /* Set the phys extent flags for this reverse mapping. */ 247 static void 248 xfs_trans_set_refcount_flags( 249 struct xfs_phys_extent *pmap, 250 enum xfs_refcount_intent_type type) 251 { 252 pmap->pe_flags = 0; 253 switch (type) { 254 case XFS_REFCOUNT_INCREASE: 255 case XFS_REFCOUNT_DECREASE: 256 case XFS_REFCOUNT_ALLOC_COW: 257 case XFS_REFCOUNT_FREE_COW: 258 pmap->pe_flags |= type; 259 break; 260 default: 261 ASSERT(0); 262 } 263 } 264 265 /* Log refcount updates in the intent item. */ 266 STATIC void 267 xfs_refcount_update_log_item( 268 struct xfs_trans *tp, 269 struct xfs_cui_log_item *cuip, 270 struct xfs_refcount_intent *ri) 271 { 272 uint next_extent; 273 struct xfs_phys_extent *pmap; 274 275 /* 276 * atomic_inc_return gives us the value after the increment; 277 * we want to use it as an array index so we need to subtract 1 from 278 * it. 279 */ 280 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; 281 ASSERT(next_extent < cuip->cui_format.cui_nextents); 282 pmap = &cuip->cui_format.cui_extents[next_extent]; 283 pmap->pe_startblock = ri->ri_startblock; 284 pmap->pe_len = ri->ri_blockcount; 285 xfs_trans_set_refcount_flags(pmap, ri->ri_type); 286 } 287 288 static struct xfs_log_item * 289 xfs_refcount_update_create_intent( 290 struct xfs_trans *tp, 291 struct list_head *items, 292 unsigned int count, 293 bool sort) 294 { 295 struct xfs_mount *mp = tp->t_mountp; 296 struct xfs_cui_log_item *cuip = xfs_cui_init(mp, count); 297 struct xfs_refcount_intent *ri; 298 299 ASSERT(count > 0); 300 301 if (sort) 302 list_sort(mp, items, xfs_refcount_update_diff_items); 303 list_for_each_entry(ri, items, ri_list) 304 xfs_refcount_update_log_item(tp, cuip, ri); 305 return &cuip->cui_item; 306 } 307 308 /* Get an CUD so we can process all the deferred refcount updates. */ 309 static struct xfs_log_item * 310 xfs_refcount_update_create_done( 311 struct xfs_trans *tp, 312 struct xfs_log_item *intent, 313 unsigned int count) 314 { 315 struct xfs_cui_log_item *cuip = CUI_ITEM(intent); 316 struct xfs_cud_log_item *cudp; 317 318 cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); 319 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, 320 &xfs_cud_item_ops); 321 cudp->cud_cuip = cuip; 322 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 323 324 return &cudp->cud_item; 325 } 326 327 /* Take a passive ref to the AG containing the space we're refcounting. */ 328 void 329 xfs_refcount_update_get_group( 330 struct xfs_mount *mp, 331 struct xfs_refcount_intent *ri) 332 { 333 xfs_agnumber_t agno; 334 335 agno = XFS_FSB_TO_AGNO(mp, ri->ri_startblock); 336 ri->ri_pag = xfs_perag_intent_get(mp, agno); 337 } 338 339 /* Release a passive AG ref after finishing refcounting work. */ 340 static inline void 341 xfs_refcount_update_put_group( 342 struct xfs_refcount_intent *ri) 343 { 344 xfs_perag_intent_put(ri->ri_pag); 345 } 346 347 /* Process a deferred refcount update. */ 348 STATIC int 349 xfs_refcount_update_finish_item( 350 struct xfs_trans *tp, 351 struct xfs_log_item *done, 352 struct list_head *item, 353 struct xfs_btree_cur **state) 354 { 355 struct xfs_refcount_intent *ri; 356 int error; 357 358 ri = container_of(item, struct xfs_refcount_intent, ri_list); 359 360 /* Did we run out of reservation? Requeue what we didn't finish. */ 361 error = xfs_refcount_finish_one(tp, ri, state); 362 if (!error && ri->ri_blockcount > 0) { 363 ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || 364 ri->ri_type == XFS_REFCOUNT_DECREASE); 365 return -EAGAIN; 366 } 367 368 xfs_refcount_update_put_group(ri); 369 kmem_cache_free(xfs_refcount_intent_cache, ri); 370 return error; 371 } 372 373 /* Abort all pending CUIs. */ 374 STATIC void 375 xfs_refcount_update_abort_intent( 376 struct xfs_log_item *intent) 377 { 378 xfs_cui_release(CUI_ITEM(intent)); 379 } 380 381 /* Cancel a deferred refcount update. */ 382 STATIC void 383 xfs_refcount_update_cancel_item( 384 struct list_head *item) 385 { 386 struct xfs_refcount_intent *ri; 387 388 ri = container_of(item, struct xfs_refcount_intent, ri_list); 389 390 xfs_refcount_update_put_group(ri); 391 kmem_cache_free(xfs_refcount_intent_cache, ri); 392 } 393 394 /* Is this recovered CUI ok? */ 395 static inline bool 396 xfs_cui_validate_phys( 397 struct xfs_mount *mp, 398 struct xfs_phys_extent *pmap) 399 { 400 if (!xfs_has_reflink(mp)) 401 return false; 402 403 if (pmap->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) 404 return false; 405 406 switch (pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { 407 case XFS_REFCOUNT_INCREASE: 408 case XFS_REFCOUNT_DECREASE: 409 case XFS_REFCOUNT_ALLOC_COW: 410 case XFS_REFCOUNT_FREE_COW: 411 break; 412 default: 413 return false; 414 } 415 416 return xfs_verify_fsbext(mp, pmap->pe_startblock, pmap->pe_len); 417 } 418 419 static inline void 420 xfs_cui_recover_work( 421 struct xfs_mount *mp, 422 struct xfs_defer_pending *dfp, 423 struct xfs_phys_extent *pmap) 424 { 425 struct xfs_refcount_intent *ri; 426 427 ri = kmem_cache_alloc(xfs_refcount_intent_cache, 428 GFP_KERNEL | __GFP_NOFAIL); 429 ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; 430 ri->ri_startblock = pmap->pe_startblock; 431 ri->ri_blockcount = pmap->pe_len; 432 xfs_refcount_update_get_group(mp, ri); 433 434 xfs_defer_add_item(dfp, &ri->ri_list); 435 } 436 437 /* 438 * Process a refcount update intent item that was recovered from the log. 439 * We need to update the refcountbt. 440 */ 441 STATIC int 442 xfs_refcount_recover_work( 443 struct xfs_defer_pending *dfp, 444 struct list_head *capture_list) 445 { 446 struct xfs_trans_res resv; 447 struct xfs_log_item *lip = dfp->dfp_intent; 448 struct xfs_cui_log_item *cuip = CUI_ITEM(lip); 449 struct xfs_trans *tp; 450 struct xfs_mount *mp = lip->li_log->l_mp; 451 int i; 452 int error = 0; 453 454 /* 455 * First check the validity of the extents described by the 456 * CUI. If any are bad, then assume that all are bad and 457 * just toss the CUI. 458 */ 459 for (i = 0; i < cuip->cui_format.cui_nextents; i++) { 460 if (!xfs_cui_validate_phys(mp, 461 &cuip->cui_format.cui_extents[i])) { 462 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 463 &cuip->cui_format, 464 sizeof(cuip->cui_format)); 465 return -EFSCORRUPTED; 466 } 467 468 xfs_cui_recover_work(mp, dfp, &cuip->cui_format.cui_extents[i]); 469 } 470 471 /* 472 * Under normal operation, refcount updates are deferred, so we 473 * wouldn't be adding them directly to a transaction. All 474 * refcount updates manage reservation usage internally and 475 * dynamically by deferring work that won't fit in the 476 * transaction. Normally, any work that needs to be deferred 477 * gets attached to the same defer_ops that scheduled the 478 * refcount update. However, we're in log recovery here, so we 479 * use the passed in defer_ops and to finish up any work that 480 * doesn't fit. We need to reserve enough blocks to handle a 481 * full btree split on either end of the refcount range. 482 */ 483 resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); 484 error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, 485 XFS_TRANS_RESERVE, &tp); 486 if (error) 487 return error; 488 489 error = xlog_recover_finish_intent(tp, dfp); 490 if (error == -EFSCORRUPTED) 491 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 492 &cuip->cui_format, 493 sizeof(cuip->cui_format)); 494 if (error) 495 goto abort_error; 496 497 return xfs_defer_ops_capture_and_commit(tp, capture_list); 498 499 abort_error: 500 xfs_trans_cancel(tp); 501 return error; 502 } 503 504 /* Relog an intent item to push the log tail forward. */ 505 static struct xfs_log_item * 506 xfs_refcount_relog_intent( 507 struct xfs_trans *tp, 508 struct xfs_log_item *intent, 509 struct xfs_log_item *done_item) 510 { 511 struct xfs_cui_log_item *cuip; 512 struct xfs_phys_extent *pmap; 513 unsigned int count; 514 515 count = CUI_ITEM(intent)->cui_format.cui_nextents; 516 pmap = CUI_ITEM(intent)->cui_format.cui_extents; 517 518 cuip = xfs_cui_init(tp->t_mountp, count); 519 memcpy(cuip->cui_format.cui_extents, pmap, count * sizeof(*pmap)); 520 atomic_set(&cuip->cui_next_extent, count); 521 522 return &cuip->cui_item; 523 } 524 525 const struct xfs_defer_op_type xfs_refcount_update_defer_type = { 526 .name = "refcount", 527 .max_items = XFS_CUI_MAX_FAST_EXTENTS, 528 .create_intent = xfs_refcount_update_create_intent, 529 .abort_intent = xfs_refcount_update_abort_intent, 530 .create_done = xfs_refcount_update_create_done, 531 .finish_item = xfs_refcount_update_finish_item, 532 .finish_cleanup = xfs_refcount_finish_one_cleanup, 533 .cancel_item = xfs_refcount_update_cancel_item, 534 .recover_work = xfs_refcount_recover_work, 535 .relog_intent = xfs_refcount_relog_intent, 536 }; 537 538 STATIC bool 539 xfs_cui_item_match( 540 struct xfs_log_item *lip, 541 uint64_t intent_id) 542 { 543 return CUI_ITEM(lip)->cui_format.cui_id == intent_id; 544 } 545 546 static const struct xfs_item_ops xfs_cui_item_ops = { 547 .flags = XFS_ITEM_INTENT, 548 .iop_size = xfs_cui_item_size, 549 .iop_format = xfs_cui_item_format, 550 .iop_unpin = xfs_cui_item_unpin, 551 .iop_release = xfs_cui_item_release, 552 .iop_match = xfs_cui_item_match, 553 }; 554 555 static inline void 556 xfs_cui_copy_format( 557 struct xfs_cui_log_format *dst, 558 const struct xfs_cui_log_format *src) 559 { 560 unsigned int i; 561 562 memcpy(dst, src, offsetof(struct xfs_cui_log_format, cui_extents)); 563 564 for (i = 0; i < src->cui_nextents; i++) 565 memcpy(&dst->cui_extents[i], &src->cui_extents[i], 566 sizeof(struct xfs_phys_extent)); 567 } 568 569 /* 570 * This routine is called to create an in-core extent refcount update 571 * item from the cui format structure which was logged on disk. 572 * It allocates an in-core cui, copies the extents from the format 573 * structure into it, and adds the cui to the AIL with the given 574 * LSN. 575 */ 576 STATIC int 577 xlog_recover_cui_commit_pass2( 578 struct xlog *log, 579 struct list_head *buffer_list, 580 struct xlog_recover_item *item, 581 xfs_lsn_t lsn) 582 { 583 struct xfs_mount *mp = log->l_mp; 584 struct xfs_cui_log_item *cuip; 585 struct xfs_cui_log_format *cui_formatp; 586 size_t len; 587 588 cui_formatp = item->ri_buf[0].i_addr; 589 590 if (item->ri_buf[0].i_len < xfs_cui_log_format_sizeof(0)) { 591 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 592 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 593 return -EFSCORRUPTED; 594 } 595 596 len = xfs_cui_log_format_sizeof(cui_formatp->cui_nextents); 597 if (item->ri_buf[0].i_len != len) { 598 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, 599 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 600 return -EFSCORRUPTED; 601 } 602 603 cuip = xfs_cui_init(mp, cui_formatp->cui_nextents); 604 xfs_cui_copy_format(&cuip->cui_format, cui_formatp); 605 atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents); 606 607 xlog_recover_intent_item(log, &cuip->cui_item, lsn, 608 &xfs_refcount_update_defer_type); 609 return 0; 610 } 611 612 const struct xlog_recover_item_ops xlog_cui_item_ops = { 613 .item_type = XFS_LI_CUI, 614 .commit_pass2 = xlog_recover_cui_commit_pass2, 615 }; 616 617 /* 618 * This routine is called when an CUD format structure is found in a committed 619 * transaction in the log. Its purpose is to cancel the corresponding CUI if it 620 * was still in the log. To do this it searches the AIL for the CUI with an id 621 * equal to that in the CUD format structure. If we find it we drop the CUD 622 * reference, which removes the CUI from the AIL and frees it. 623 */ 624 STATIC int 625 xlog_recover_cud_commit_pass2( 626 struct xlog *log, 627 struct list_head *buffer_list, 628 struct xlog_recover_item *item, 629 xfs_lsn_t lsn) 630 { 631 struct xfs_cud_log_format *cud_formatp; 632 633 cud_formatp = item->ri_buf[0].i_addr; 634 if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) { 635 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp, 636 item->ri_buf[0].i_addr, item->ri_buf[0].i_len); 637 return -EFSCORRUPTED; 638 } 639 640 xlog_recover_release_intent(log, XFS_LI_CUI, cud_formatp->cud_cui_id); 641 return 0; 642 } 643 644 const struct xlog_recover_item_ops xlog_cud_item_ops = { 645 .item_type = XFS_LI_CUD, 646 .commit_pass2 = xlog_recover_cud_commit_pass2, 647 }; 648