1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs_platform.h" 7 #include <linux/backing-dev.h> 8 #include <linux/dax.h> 9 10 #include "xfs_shared.h" 11 #include "xfs_format.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans_resv.h" 14 #include "xfs_mount.h" 15 #include "xfs_trace.h" 16 #include "xfs_log.h" 17 #include "xfs_log_recover.h" 18 #include "xfs_log_priv.h" 19 #include "xfs_trans.h" 20 #include "xfs_buf_item.h" 21 #include "xfs_errortag.h" 22 #include "xfs_error.h" 23 #include "xfs_ag.h" 24 #include "xfs_buf_mem.h" 25 #include "xfs_notify_failure.h" 26 27 struct kmem_cache *xfs_buf_cache; 28 29 /* 30 * Locking orders 31 * 32 * xfs_buf_stale: 33 * b_sema (caller holds) 34 * b_lockref.lock 35 * lru_lock 36 * 37 * xfs_buf_rele: 38 * b_lockref.lock 39 * lru_lock 40 * 41 * xfs_buftarg_drain_rele 42 * lru_lock 43 * b_lockref.lock (trylock due to inversion) 44 * 45 * xfs_buftarg_isolate 46 * lru_lock 47 * b_lockref.lock (trylock due to inversion) 48 */ 49 50 static void xfs_buf_submit(struct xfs_buf *bp); 51 static int xfs_buf_iowait(struct xfs_buf *bp); 52 53 static inline bool xfs_buf_is_uncached(struct xfs_buf *bp) 54 { 55 return bp->b_rhash_key == XFS_BUF_DADDR_NULL; 56 } 57 58 /* 59 * When we mark a buffer stale, we remove the buffer from the LRU and clear the 60 * b_lru_ref count so that the buffer is freed immediately when the buffer 61 * reference count falls to zero. If the buffer is already on the LRU, we need 62 * to remove the reference that LRU holds on the buffer. 63 * 64 * This prevents build-up of stale buffers on the LRU. 65 */ 66 void 67 xfs_buf_stale( 68 struct xfs_buf *bp) 69 { 70 ASSERT(xfs_buf_islocked(bp)); 71 72 bp->b_flags |= XBF_STALE; 73 74 /* 75 * Clear the delwri status so that a delwri queue walker will not 76 * flush this buffer to disk now that it is stale. The delwri queue has 77 * a reference to the buffer, so this is safe to do. 78 */ 79 bp->b_flags &= ~_XBF_DELWRI_Q; 80 81 spin_lock(&bp->b_lockref.lock); 82 atomic_set(&bp->b_lru_ref, 0); 83 if (!__lockref_is_dead(&bp->b_lockref)) 84 list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru); 85 spin_unlock(&bp->b_lockref.lock); 86 } 87 88 static void 89 xfs_buf_free_callback( 90 struct callback_head *cb) 91 { 92 struct xfs_buf *bp = container_of(cb, struct xfs_buf, b_rcu); 93 94 if (bp->b_maps != &bp->__b_map) 95 kfree(bp->b_maps); 96 kmem_cache_free(xfs_buf_cache, bp); 97 } 98 99 static void 100 xfs_buf_free( 101 struct xfs_buf *bp) 102 { 103 unsigned int size = BBTOB(bp->b_length); 104 105 might_sleep(); 106 trace_xfs_buf_free(bp, _RET_IP_); 107 108 ASSERT(list_empty(&bp->b_lru)); 109 110 if (!xfs_buftarg_is_mem(bp->b_target) && size >= PAGE_SIZE) 111 mm_account_reclaimed_pages(howmany(size, PAGE_SHIFT)); 112 113 if (is_vmalloc_addr(bp->b_addr)) 114 vfree(bp->b_addr); 115 else if (bp->b_flags & _XBF_KMEM) 116 kfree(bp->b_addr); 117 else 118 folio_put(virt_to_folio(bp->b_addr)); 119 120 call_rcu(&bp->b_rcu, xfs_buf_free_callback); 121 } 122 123 static int 124 xfs_buf_alloc_kmem( 125 struct xfs_buf *bp, 126 size_t size, 127 gfp_t gfp_mask) 128 { 129 ASSERT(is_power_of_2(size)); 130 ASSERT(size < PAGE_SIZE); 131 132 bp->b_addr = kmalloc(size, gfp_mask | __GFP_NOFAIL); 133 if (!bp->b_addr) 134 return -ENOMEM; 135 136 /* 137 * Slab guarantees that we get back naturally aligned allocations for 138 * power of two sizes. Keep this check as the canary in the coal mine 139 * if anything changes in slab. 140 */ 141 if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)bp->b_addr, size))) { 142 kfree(bp->b_addr); 143 bp->b_addr = NULL; 144 return -ENOMEM; 145 } 146 bp->b_flags |= _XBF_KMEM; 147 trace_xfs_buf_backing_kmem(bp, _RET_IP_); 148 return 0; 149 } 150 151 /* 152 * Allocate backing memory for a buffer. 153 * 154 * For tmpfs-backed buffers used by in-memory btrees this directly maps the 155 * tmpfs page cache folios. 156 * 157 * For real file system buffers there are three different kinds backing memory: 158 * 159 * The first type backs the buffer by a kmalloc allocation. This is done for 160 * less than PAGE_SIZE allocations to avoid wasting memory. 161 * 162 * The second type is a single folio buffer - this may be a high order folio or 163 * just a single page sized folio, but either way they get treated the same way 164 * by the rest of the code - the buffer memory spans a single contiguous memory 165 * region that we don't have to map and unmap to access the data directly. 166 * 167 * The third type of buffer is the vmalloc()d buffer. This provides the buffer 168 * with the required contiguous memory region but backed by discontiguous 169 * physical pages. 170 */ 171 static int 172 xfs_buf_alloc_backing_mem( 173 struct xfs_buf *bp, 174 xfs_buf_flags_t flags) 175 { 176 size_t size = BBTOB(bp->b_length); 177 gfp_t gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN; 178 struct folio *folio; 179 180 if (xfs_buftarg_is_mem(bp->b_target)) 181 return xmbuf_map_backing_mem(bp); 182 183 /* Assure zeroed buffer for non-read cases. */ 184 if (!(flags & XBF_READ)) 185 gfp_mask |= __GFP_ZERO; 186 187 if (flags & XBF_READ_AHEAD) 188 gfp_mask |= __GFP_NORETRY; 189 190 /* 191 * For buffers smaller than PAGE_SIZE use a kmalloc allocation if that 192 * is properly aligned. The slab allocator now guarantees an aligned 193 * allocation for all power of two sizes, which matches most of the 194 * smaller than PAGE_SIZE buffers used by XFS. 195 */ 196 if (size < PAGE_SIZE && is_power_of_2(size)) 197 return xfs_buf_alloc_kmem(bp, size, gfp_mask); 198 199 /* 200 * Don't bother with the retry loop for single PAGE allocations: vmalloc 201 * won't do any better. 202 */ 203 if (size <= PAGE_SIZE) 204 gfp_mask |= __GFP_NOFAIL; 205 206 /* 207 * Optimistically attempt a single high order folio allocation for 208 * larger than PAGE_SIZE buffers. 209 * 210 * Allocating a high order folio makes the assumption that buffers are a 211 * power-of-2 size, matching the power-of-2 folios sizes available. 212 * 213 * The exception here are user xattr data buffers, which can be arbitrarily 214 * sized up to 64kB plus structure metadata, skip straight to the vmalloc 215 * path for them instead of wasting memory here. 216 */ 217 if (size > PAGE_SIZE) { 218 if (!is_power_of_2(size)) 219 goto fallback; 220 gfp_mask &= ~__GFP_DIRECT_RECLAIM; 221 gfp_mask |= __GFP_NORETRY; 222 } 223 folio = folio_alloc(gfp_mask, get_order(size)); 224 if (!folio) { 225 if (size <= PAGE_SIZE) 226 return -ENOMEM; 227 trace_xfs_buf_backing_fallback(bp, _RET_IP_); 228 goto fallback; 229 } 230 bp->b_addr = folio_address(folio); 231 trace_xfs_buf_backing_folio(bp, _RET_IP_); 232 return 0; 233 234 fallback: 235 for (;;) { 236 bp->b_addr = __vmalloc(size, gfp_mask); 237 if (bp->b_addr) 238 break; 239 if (flags & XBF_READ_AHEAD) 240 return -ENOMEM; 241 XFS_STATS_INC(bp->b_mount, xb_page_retries); 242 memalloc_retry_wait(gfp_mask); 243 } 244 245 trace_xfs_buf_backing_vmalloc(bp, _RET_IP_); 246 return 0; 247 } 248 249 static int 250 xfs_buf_alloc( 251 struct xfs_buftarg *target, 252 struct xfs_buf_map *map, 253 int nmaps, 254 xfs_buf_flags_t flags, 255 struct xfs_buf **bpp) 256 { 257 struct xfs_buf *bp; 258 int error; 259 int i; 260 261 *bpp = NULL; 262 bp = kmem_cache_zalloc(xfs_buf_cache, 263 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); 264 265 /* 266 * We don't want certain flags to appear in b_flags unless they are 267 * specifically set by later operations on the buffer. 268 */ 269 flags &= ~(XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); 270 271 /* 272 * A new buffer is held and locked by the owner. This ensures that the 273 * buffer is owned by the caller and racing RCU lookups right after 274 * inserting into the hash table are safe (and will have to wait for 275 * the unlock to do anything non-trivial). 276 */ 277 lockref_init(&bp->b_lockref); 278 sema_init(&bp->b_sema, 0); /* held, no waiters */ 279 atomic_set(&bp->b_lru_ref, 1); 280 init_completion(&bp->b_iowait); 281 INIT_LIST_HEAD(&bp->b_lru); 282 INIT_LIST_HEAD(&bp->b_list); 283 INIT_LIST_HEAD(&bp->b_li_list); 284 bp->b_target = target; 285 bp->b_mount = target->bt_mount; 286 bp->b_flags = flags; 287 bp->b_rhash_key = map[0].bm_bn; 288 bp->b_length = 0; 289 bp->b_map_count = nmaps; 290 if (nmaps == 1) 291 bp->b_maps = &bp->__b_map; 292 else 293 bp->b_maps = kzalloc_objs(struct xfs_buf_map, nmaps, 294 GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); 295 for (i = 0; i < nmaps; i++) { 296 bp->b_maps[i].bm_bn = map[i].bm_bn; 297 bp->b_maps[i].bm_len = map[i].bm_len; 298 bp->b_length += map[i].bm_len; 299 } 300 301 atomic_set(&bp->b_pin_count, 0); 302 init_waitqueue_head(&bp->b_waiters); 303 304 XFS_STATS_INC(bp->b_mount, xb_create); 305 trace_xfs_buf_init(bp, _RET_IP_); 306 307 error = xfs_buf_alloc_backing_mem(bp, flags); 308 if (error) { 309 xfs_buf_free(bp); 310 return error; 311 } 312 313 *bpp = bp; 314 return 0; 315 } 316 317 /* 318 * Finding and Reading Buffers 319 */ 320 static int 321 _xfs_buf_obj_cmp( 322 struct rhashtable_compare_arg *arg, 323 const void *obj) 324 { 325 const struct xfs_buf_map *map = arg->key; 326 const struct xfs_buf *bp = obj; 327 328 /* 329 * The key hashing in the lookup path depends on the key being the 330 * first element of the compare_arg, make sure to assert this. 331 */ 332 BUILD_BUG_ON(offsetof(struct xfs_buf_map, bm_bn) != 0); 333 334 if (bp->b_rhash_key != map->bm_bn) 335 return 1; 336 337 if (unlikely(bp->b_length != map->bm_len)) { 338 /* 339 * found a block number match. If the range doesn't 340 * match, the only way this is allowed is if the buffer 341 * in the cache is stale and the transaction that made 342 * it stale has not yet committed. i.e. we are 343 * reallocating a busy extent. Skip this buffer and 344 * continue searching for an exact match. 345 * 346 * Note: If we're scanning for incore buffers to stale, don't 347 * complain if we find non-stale buffers. 348 */ 349 if (!(map->bm_flags & XBM_LIVESCAN)) 350 ASSERT(bp->b_flags & XBF_STALE); 351 return 1; 352 } 353 return 0; 354 } 355 356 static const struct rhashtable_params xfs_buf_hash_params = { 357 .min_size = 32, /* empty AGs have minimal footprint */ 358 .nelem_hint = 16, 359 .key_len = sizeof(xfs_daddr_t), 360 .key_offset = offsetof(struct xfs_buf, b_rhash_key), 361 .head_offset = offsetof(struct xfs_buf, b_rhash_head), 362 .automatic_shrinking = true, 363 .obj_cmpfn = _xfs_buf_obj_cmp, 364 }; 365 366 static int 367 xfs_buf_map_verify( 368 struct xfs_buftarg *btp, 369 struct xfs_buf_map *map) 370 { 371 /* Check for IOs smaller than the sector size / not sector aligned */ 372 ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize)); 373 ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); 374 375 /* 376 * Corrupted block numbers can get through to here, unfortunately, so we 377 * have to check that the buffer falls within the filesystem bounds. 378 */ 379 if (map->bm_bn < 0 || map->bm_bn >= btp->bt_nr_sectors) { 380 xfs_alert(btp->bt_mount, 381 "%s: daddr 0x%llx out of range, EOFS 0x%llx", 382 __func__, map->bm_bn, btp->bt_nr_sectors); 383 WARN_ON(1); 384 return -EFSCORRUPTED; 385 } 386 return 0; 387 } 388 389 static int 390 xfs_buf_find_lock( 391 struct xfs_buf *bp, 392 xfs_buf_flags_t flags) 393 { 394 if (flags & XBF_TRYLOCK) { 395 if (!xfs_buf_trylock(bp)) { 396 XFS_STATS_INC(bp->b_mount, xb_busy_locked); 397 return -EAGAIN; 398 } 399 } else { 400 xfs_buf_lock(bp); 401 XFS_STATS_INC(bp->b_mount, xb_get_locked_waited); 402 } 403 404 /* 405 * if the buffer is stale, clear all the external state associated with 406 * it. We need to keep flags such as how we allocated the buffer memory 407 * intact here. 408 */ 409 if (bp->b_flags & XBF_STALE) { 410 if (flags & XBF_LIVESCAN) { 411 xfs_buf_unlock(bp); 412 return -ENOENT; 413 } 414 ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); 415 bp->b_flags &= _XBF_KMEM; 416 bp->b_ops = NULL; 417 } 418 return 0; 419 } 420 421 static inline int 422 xfs_buf_lookup( 423 struct xfs_buftarg *btp, 424 struct xfs_buf_map *map, 425 xfs_buf_flags_t flags, 426 struct xfs_buf **bpp) 427 { 428 struct xfs_buf *bp; 429 int error; 430 431 rcu_read_lock(); 432 bp = rhashtable_lookup(&btp->bt_hash, map, xfs_buf_hash_params); 433 if (!bp || !lockref_get_not_dead(&bp->b_lockref)) { 434 rcu_read_unlock(); 435 return -ENOENT; 436 } 437 rcu_read_unlock(); 438 439 error = xfs_buf_find_lock(bp, flags); 440 if (error) { 441 xfs_buf_rele(bp); 442 return error; 443 } 444 445 trace_xfs_buf_find(bp, flags, _RET_IP_); 446 *bpp = bp; 447 return 0; 448 } 449 450 /* 451 * Insert the new_bp into the hash table. This consumes the perag reference 452 * taken for the lookup regardless of the result of the insert. 453 */ 454 static int 455 xfs_buf_find_insert( 456 struct xfs_buftarg *btp, 457 struct xfs_perag *pag, 458 struct xfs_buf_map *cmap, 459 struct xfs_buf_map *map, 460 int nmaps, 461 xfs_buf_flags_t flags, 462 struct xfs_buf **bpp) 463 { 464 struct xfs_buf *new_bp; 465 struct xfs_buf *bp; 466 int error; 467 468 error = xfs_buf_alloc(btp, map, nmaps, flags, &new_bp); 469 if (error) 470 goto out_drop_pag; 471 472 /* The new buffer keeps the perag reference until it is freed. */ 473 new_bp->b_pag = pag; 474 475 retry: 476 rcu_read_lock(); 477 bp = rhashtable_lookup_get_insert_fast(&btp->bt_hash, 478 &new_bp->b_rhash_head, xfs_buf_hash_params); 479 if (IS_ERR(bp)) { 480 rcu_read_unlock(); 481 error = PTR_ERR(bp); 482 goto out_free_buf; 483 } 484 if (bp) { 485 /* 486 * If there is an existing buffer with a dead lockref, retry 487 * until the new buffer is added, or a usable buffer is found. 488 */ 489 if (!lockref_get_not_dead(&bp->b_lockref)) { 490 rcu_read_unlock(); 491 cpu_relax(); 492 goto retry; 493 } 494 rcu_read_unlock(); 495 error = xfs_buf_find_lock(bp, flags); 496 if (error) 497 xfs_buf_rele(bp); 498 else 499 *bpp = bp; 500 goto out_free_buf; 501 } 502 rcu_read_unlock(); 503 504 *bpp = new_bp; 505 return 0; 506 507 out_free_buf: 508 xfs_buf_free(new_bp); 509 out_drop_pag: 510 if (pag) 511 xfs_perag_put(pag); 512 return error; 513 } 514 515 static inline struct xfs_perag * 516 xfs_buftarg_get_pag( 517 struct xfs_buftarg *btp, 518 const struct xfs_buf_map *map) 519 { 520 struct xfs_mount *mp = btp->bt_mount; 521 522 if (xfs_buftarg_is_mem(btp)) 523 return NULL; 524 return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn)); 525 } 526 527 /* 528 * Assembles a buffer covering the specified range. The code is optimised for 529 * cache hits, as metadata intensive workloads will see 3 orders of magnitude 530 * more hits than misses. 531 */ 532 int 533 xfs_buf_get_map( 534 struct xfs_buftarg *btp, 535 struct xfs_buf_map *map, 536 int nmaps, 537 xfs_buf_flags_t flags, 538 struct xfs_buf **bpp) 539 { 540 struct xfs_perag *pag; 541 struct xfs_buf *bp = NULL; 542 struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; 543 int error; 544 int i; 545 546 if (flags & XBF_LIVESCAN) 547 cmap.bm_flags |= XBM_LIVESCAN; 548 for (i = 0; i < nmaps; i++) 549 cmap.bm_len += map[i].bm_len; 550 551 error = xfs_buf_map_verify(btp, &cmap); 552 if (error) 553 return error; 554 555 pag = xfs_buftarg_get_pag(btp, &cmap); 556 557 error = xfs_buf_lookup(btp, &cmap, flags, &bp); 558 if (error && error != -ENOENT) 559 goto out_put_perag; 560 561 /* cache hits always outnumber misses by at least 10:1 */ 562 if (unlikely(!bp)) { 563 XFS_STATS_INC(btp->bt_mount, xb_miss_locked); 564 565 if (flags & XBF_INCORE) 566 goto out_put_perag; 567 568 /* xfs_buf_find_insert() consumes the perag reference. */ 569 error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps, 570 flags, &bp); 571 if (error) 572 return error; 573 } else { 574 XFS_STATS_INC(btp->bt_mount, xb_get_locked); 575 if (pag) 576 xfs_perag_put(pag); 577 } 578 579 /* 580 * Clear b_error if this is a lookup from a caller that doesn't expect 581 * valid data to be found in the buffer. 582 */ 583 if (!(flags & XBF_READ)) 584 xfs_buf_ioerror(bp, 0); 585 586 XFS_STATS_INC(btp->bt_mount, xb_get); 587 trace_xfs_buf_get(bp, flags, _RET_IP_); 588 *bpp = bp; 589 return 0; 590 591 out_put_perag: 592 if (pag) 593 xfs_perag_put(pag); 594 return error; 595 } 596 597 int 598 _xfs_buf_read( 599 struct xfs_buf *bp) 600 { 601 ASSERT(bp->b_maps[0].bm_bn != XFS_BUF_DADDR_NULL); 602 603 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE); 604 bp->b_flags |= XBF_READ; 605 xfs_buf_submit(bp); 606 return xfs_buf_iowait(bp); 607 } 608 609 /* 610 * Reverify a buffer found in cache without an attached ->b_ops. 611 * 612 * If the caller passed an ops structure and the buffer doesn't have ops 613 * assigned, set the ops and use it to verify the contents. If verification 614 * fails, clear XBF_DONE. We assume the buffer has no recorded errors and is 615 * already in XBF_DONE state on entry. 616 * 617 * Under normal operations, every in-core buffer is verified on read I/O 618 * completion. There are two scenarios that can lead to in-core buffers without 619 * an assigned ->b_ops. The first is during log recovery of buffers on a V4 620 * filesystem, though these buffers are purged at the end of recovery. The 621 * other is online repair, which intentionally reads with a NULL buffer ops to 622 * run several verifiers across an in-core buffer in order to establish buffer 623 * type. If repair can't establish that, the buffer will be left in memory 624 * with NULL buffer ops. 625 */ 626 static int 627 xfs_buf_reverify( 628 struct xfs_buf *bp, 629 const struct xfs_buf_ops *ops) 630 { 631 ASSERT(bp->b_flags & XBF_DONE); 632 ASSERT(bp->b_error == 0); 633 634 if (!ops || bp->b_ops) 635 return 0; 636 637 bp->b_ops = ops; 638 bp->b_ops->verify_read(bp); 639 if (bp->b_error) 640 bp->b_flags &= ~XBF_DONE; 641 return bp->b_error; 642 } 643 644 int 645 xfs_buf_read_map( 646 struct xfs_buftarg *target, 647 struct xfs_buf_map *map, 648 int nmaps, 649 xfs_buf_flags_t flags, 650 struct xfs_buf **bpp, 651 const struct xfs_buf_ops *ops, 652 xfs_failaddr_t fa) 653 { 654 struct xfs_buf *bp; 655 int error; 656 657 ASSERT(!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD))); 658 659 flags |= XBF_READ; 660 *bpp = NULL; 661 662 error = xfs_buf_get_map(target, map, nmaps, flags, &bp); 663 if (error) 664 return error; 665 666 trace_xfs_buf_read(bp, flags, _RET_IP_); 667 668 if (!(bp->b_flags & XBF_DONE)) { 669 /* Initiate the buffer read and wait. */ 670 XFS_STATS_INC(target->bt_mount, xb_get_read); 671 bp->b_ops = ops; 672 error = _xfs_buf_read(bp); 673 } else { 674 /* Buffer already read; all we need to do is check it. */ 675 error = xfs_buf_reverify(bp, ops); 676 677 /* We do not want read in the flags */ 678 bp->b_flags &= ~XBF_READ; 679 ASSERT(bp->b_ops != NULL || ops == NULL); 680 } 681 682 /* 683 * If we've had a read error, then the contents of the buffer are 684 * invalid and should not be used. To ensure that a followup read tries 685 * to pull the buffer from disk again, we clear the XBF_DONE flag and 686 * mark the buffer stale. This ensures that anyone who has a current 687 * reference to the buffer will interpret it's contents correctly and 688 * future cache lookups will also treat it as an empty, uninitialised 689 * buffer. 690 */ 691 if (error) { 692 /* 693 * Check against log shutdown for error reporting because 694 * metadata writeback may require a read first and we need to 695 * report errors in metadata writeback until the log is shut 696 * down. High level transaction read functions already check 697 * against mount shutdown, anyway, so we only need to be 698 * concerned about low level IO interactions here. 699 */ 700 if (!xlog_is_shutdown(target->bt_mount->m_log)) 701 xfs_buf_ioerror_alert(bp, fa); 702 703 bp->b_flags &= ~XBF_DONE; 704 xfs_buf_stale(bp); 705 xfs_buf_relse(bp); 706 707 /* bad CRC means corrupted metadata */ 708 if (error == -EFSBADCRC) 709 error = -EFSCORRUPTED; 710 return error; 711 } 712 713 *bpp = bp; 714 return 0; 715 } 716 717 /* 718 * If we are not low on memory then do the readahead in a deadlock 719 * safe manner. 720 */ 721 void 722 xfs_buf_readahead_map( 723 struct xfs_buftarg *target, 724 struct xfs_buf_map *map, 725 int nmaps, 726 const struct xfs_buf_ops *ops) 727 { 728 const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD; 729 struct xfs_buf *bp; 730 731 /* 732 * Currently we don't have a good means or justification for performing 733 * xmbuf_map_page asynchronously, so we don't do readahead. 734 */ 735 if (xfs_buftarg_is_mem(target)) 736 return; 737 738 if (xfs_buf_get_map(target, map, nmaps, flags | XBF_TRYLOCK, &bp)) 739 return; 740 trace_xfs_buf_readahead(bp, 0, _RET_IP_); 741 742 if (bp->b_flags & XBF_DONE) { 743 xfs_buf_reverify(bp, ops); 744 xfs_buf_relse(bp); 745 return; 746 } 747 XFS_STATS_INC(target->bt_mount, xb_get_read); 748 bp->b_ops = ops; 749 bp->b_flags &= ~(XBF_WRITE | XBF_DONE); 750 bp->b_flags |= flags; 751 percpu_counter_inc(&target->bt_readahead_count); 752 xfs_buf_submit(bp); 753 } 754 755 /* 756 * Read an uncached buffer from disk. Allocates and returns a locked 757 * buffer containing the disk contents or nothing. Uncached buffers always have 758 * a cache index of XFS_BUF_DADDR_NULL so we can easily determine if the buffer 759 * is cached or uncached during fault diagnosis. 760 */ 761 int 762 xfs_buf_read_uncached( 763 struct xfs_buftarg *target, 764 xfs_daddr_t daddr, 765 size_t numblks, 766 struct xfs_buf **bpp, 767 const struct xfs_buf_ops *ops) 768 { 769 struct xfs_buf *bp; 770 int error; 771 772 *bpp = NULL; 773 774 error = xfs_buf_get_uncached(target, numblks, &bp); 775 if (error) 776 return error; 777 778 /* set up the buffer for a read IO */ 779 ASSERT(bp->b_map_count == 1); 780 bp->b_rhash_key = XFS_BUF_DADDR_NULL; 781 bp->b_maps[0].bm_bn = daddr; 782 bp->b_flags |= XBF_READ; 783 bp->b_ops = ops; 784 785 xfs_buf_submit(bp); 786 error = xfs_buf_iowait(bp); 787 if (error) { 788 xfs_buf_relse(bp); 789 return error; 790 } 791 792 *bpp = bp; 793 return 0; 794 } 795 796 int 797 xfs_buf_get_uncached( 798 struct xfs_buftarg *target, 799 size_t numblks, 800 struct xfs_buf **bpp) 801 { 802 int error; 803 DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); 804 805 error = xfs_buf_alloc(target, &map, 1, 0, bpp); 806 if (!error) 807 trace_xfs_buf_get_uncached(*bpp, _RET_IP_); 808 return error; 809 } 810 811 /* 812 * Increment reference count on buffer, to hold the buffer concurrently 813 * with another thread which may release (free) the buffer asynchronously. 814 * Must hold the buffer already to call this function. 815 */ 816 void 817 xfs_buf_hold( 818 struct xfs_buf *bp) 819 { 820 trace_xfs_buf_hold(bp, _RET_IP_); 821 822 lockref_get(&bp->b_lockref); 823 } 824 825 static void 826 xfs_buf_destroy( 827 struct xfs_buf *bp) 828 { 829 ASSERT(__lockref_is_dead(&bp->b_lockref)); 830 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 831 832 if (bp->b_pag) 833 xfs_perag_put(bp->b_pag); 834 xfs_buf_free(bp); 835 } 836 837 static inline void 838 xfs_buf_kill( 839 struct xfs_buf *bp) 840 { 841 lockref_mark_dead(&bp->b_lockref); 842 if (!xfs_buf_is_uncached(bp)) { 843 rhashtable_remove_fast(&bp->b_target->bt_hash, 844 &bp->b_rhash_head, xfs_buf_hash_params); 845 } 846 } 847 848 /* 849 * Release a hold on the specified buffer. 850 */ 851 void 852 xfs_buf_rele( 853 struct xfs_buf *bp) 854 { 855 trace_xfs_buf_rele(bp, _RET_IP_); 856 857 if (lockref_put_or_lock(&bp->b_lockref)) 858 return; 859 if (!--bp->b_lockref.count) { 860 if (xfs_buf_is_uncached(bp) || !atomic_read(&bp->b_lru_ref)) 861 goto kill; 862 list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru); 863 } 864 spin_unlock(&bp->b_lockref.lock); 865 return; 866 867 kill: 868 xfs_buf_kill(bp); 869 list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru); 870 spin_unlock(&bp->b_lockref.lock); 871 872 xfs_buf_destroy(bp); 873 } 874 875 /* 876 * Lock a buffer object, if it is not already locked. 877 * 878 * If we come across a stale, pinned, locked buffer, we know that we are 879 * being asked to lock a buffer that has been reallocated. Because it is 880 * pinned, we know that the log has not been pushed to disk and hence it 881 * will still be locked. Rather than continuing to have trylock attempts 882 * fail until someone else pushes the log, push it ourselves before 883 * returning. This means that the xfsaild will not get stuck trying 884 * to push on stale inode buffers. 885 */ 886 int 887 xfs_buf_trylock( 888 struct xfs_buf *bp) 889 { 890 int locked; 891 892 locked = down_trylock(&bp->b_sema) == 0; 893 if (locked) 894 trace_xfs_buf_trylock(bp, _RET_IP_); 895 else 896 trace_xfs_buf_trylock_fail(bp, _RET_IP_); 897 return locked; 898 } 899 900 /* 901 * Lock a buffer object. 902 * 903 * If we come across a stale, pinned, locked buffer, we know that we 904 * are being asked to lock a buffer that has been reallocated. Because 905 * it is pinned, we know that the log has not been pushed to disk and 906 * hence it will still be locked. Rather than sleeping until someone 907 * else pushes the log, push it ourselves before trying to get the lock. 908 */ 909 void 910 xfs_buf_lock( 911 struct xfs_buf *bp) 912 { 913 trace_xfs_buf_lock(bp, _RET_IP_); 914 915 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 916 xfs_log_force(bp->b_mount, 0); 917 down(&bp->b_sema); 918 919 trace_xfs_buf_lock_done(bp, _RET_IP_); 920 } 921 922 void 923 xfs_buf_unlock( 924 struct xfs_buf *bp) 925 { 926 ASSERT(xfs_buf_islocked(bp)); 927 928 up(&bp->b_sema); 929 trace_xfs_buf_unlock(bp, _RET_IP_); 930 } 931 932 STATIC void 933 xfs_buf_wait_unpin( 934 struct xfs_buf *bp) 935 { 936 DECLARE_WAITQUEUE (wait, current); 937 938 if (atomic_read(&bp->b_pin_count) == 0) 939 return; 940 941 add_wait_queue(&bp->b_waiters, &wait); 942 for (;;) { 943 set_current_state(TASK_UNINTERRUPTIBLE); 944 if (atomic_read(&bp->b_pin_count) == 0) 945 break; 946 io_schedule(); 947 } 948 remove_wait_queue(&bp->b_waiters, &wait); 949 set_current_state(TASK_RUNNING); 950 } 951 952 static void 953 xfs_buf_ioerror_alert_ratelimited( 954 struct xfs_buf *bp) 955 { 956 static unsigned long lasttime; 957 static struct xfs_buftarg *lasttarg; 958 959 if (bp->b_target != lasttarg || 960 time_after(jiffies, (lasttime + 5*HZ))) { 961 lasttime = jiffies; 962 xfs_buf_ioerror_alert(bp, __this_address); 963 } 964 lasttarg = bp->b_target; 965 } 966 967 /* 968 * Account for this latest trip around the retry handler, and decide if 969 * we've failed enough times to constitute a permanent failure. 970 */ 971 static bool 972 xfs_buf_ioerror_permanent( 973 struct xfs_buf *bp, 974 struct xfs_error_cfg *cfg) 975 { 976 struct xfs_mount *mp = bp->b_mount; 977 978 if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && 979 ++bp->b_retries > cfg->max_retries) 980 return true; 981 if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && 982 time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) 983 return true; 984 985 /* At unmount we may treat errors differently */ 986 if (xfs_is_unmounting(mp) && mp->m_fail_unmount) 987 return true; 988 989 return false; 990 } 991 992 /* 993 * On a sync write or shutdown we just want to stale the buffer and let the 994 * caller handle the error in bp->b_error appropriately. 995 * 996 * If the write was asynchronous then no one will be looking for the error. If 997 * this is the first failure of this type, clear the error state and write the 998 * buffer out again. This means we always retry an async write failure at least 999 * once, but we also need to set the buffer up to behave correctly now for 1000 * repeated failures. 1001 * 1002 * If we get repeated async write failures, then we take action according to the 1003 * error configuration we have been set up to use. 1004 * 1005 * Returns true if this function took care of error handling and the caller must 1006 * not touch the buffer again. Return false if the caller should proceed with 1007 * normal I/O completion handling. 1008 */ 1009 static bool 1010 xfs_buf_ioend_handle_error( 1011 struct xfs_buf *bp) 1012 { 1013 struct xfs_mount *mp = bp->b_mount; 1014 struct xfs_error_cfg *cfg; 1015 struct xfs_log_item *lip; 1016 1017 /* 1018 * If we've already shutdown the journal because of I/O errors, there's 1019 * no point in giving this a retry. 1020 */ 1021 if (xlog_is_shutdown(mp->m_log)) 1022 goto out_stale; 1023 1024 xfs_buf_ioerror_alert_ratelimited(bp); 1025 1026 /* 1027 * We're not going to bother about retrying this during recovery. 1028 * One strike! 1029 */ 1030 if (bp->b_flags & _XBF_LOGRECOVERY) { 1031 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1032 return false; 1033 } 1034 1035 /* 1036 * Synchronous writes will have callers process the error. 1037 */ 1038 if (!(bp->b_flags & XBF_ASYNC)) 1039 goto out_stale; 1040 1041 trace_xfs_buf_iodone_async(bp, _RET_IP_); 1042 1043 cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); 1044 if (bp->b_last_error != bp->b_error || 1045 !(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL))) { 1046 bp->b_last_error = bp->b_error; 1047 if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && 1048 !bp->b_first_retry_time) 1049 bp->b_first_retry_time = jiffies; 1050 goto resubmit; 1051 } 1052 1053 /* 1054 * Permanent error - we need to trigger a shutdown if we haven't already 1055 * to indicate that inconsistency will result from this action. 1056 */ 1057 if (xfs_buf_ioerror_permanent(bp, cfg)) { 1058 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1059 goto out_stale; 1060 } 1061 1062 /* Still considered a transient error. Caller will schedule retries. */ 1063 list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { 1064 set_bit(XFS_LI_FAILED, &lip->li_flags); 1065 clear_bit(XFS_LI_FLUSHING, &lip->li_flags); 1066 } 1067 1068 xfs_buf_ioerror(bp, 0); 1069 xfs_buf_relse(bp); 1070 return true; 1071 1072 resubmit: 1073 xfs_buf_ioerror(bp, 0); 1074 bp->b_flags |= (XBF_DONE | XBF_WRITE_FAIL); 1075 reinit_completion(&bp->b_iowait); 1076 xfs_buf_submit(bp); 1077 return true; 1078 out_stale: 1079 xfs_buf_stale(bp); 1080 bp->b_flags |= XBF_DONE; 1081 bp->b_flags &= ~XBF_WRITE; 1082 trace_xfs_buf_error_relse(bp, _RET_IP_); 1083 return false; 1084 } 1085 1086 /* returns false if the caller needs to resubmit the I/O, else true */ 1087 static bool 1088 __xfs_buf_ioend( 1089 struct xfs_buf *bp) 1090 { 1091 trace_xfs_buf_iodone(bp, _RET_IP_); 1092 1093 if (bp->b_flags & XBF_READ) { 1094 if (!bp->b_error && is_vmalloc_addr(bp->b_addr)) 1095 invalidate_kernel_vmap_range(bp->b_addr, 1096 roundup(BBTOB(bp->b_length), PAGE_SIZE)); 1097 if (!bp->b_error && bp->b_ops) 1098 bp->b_ops->verify_read(bp); 1099 if (!bp->b_error) 1100 bp->b_flags |= XBF_DONE; 1101 if (bp->b_flags & XBF_READ_AHEAD) 1102 percpu_counter_dec(&bp->b_target->bt_readahead_count); 1103 } else { 1104 if (!bp->b_error) { 1105 bp->b_flags &= ~XBF_WRITE_FAIL; 1106 bp->b_flags |= XBF_DONE; 1107 } 1108 1109 if (unlikely(bp->b_error) && xfs_buf_ioend_handle_error(bp)) 1110 return false; 1111 1112 /* clear the retry state */ 1113 bp->b_last_error = 0; 1114 bp->b_retries = 0; 1115 bp->b_first_retry_time = 0; 1116 1117 /* 1118 * Note that for things like remote attribute buffers, there may 1119 * not be a buffer log item here, so processing the buffer log 1120 * item must remain optional. 1121 */ 1122 if (bp->b_log_item) 1123 xfs_buf_item_done(bp); 1124 1125 if (bp->b_iodone) 1126 bp->b_iodone(bp); 1127 } 1128 1129 bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD | 1130 _XBF_LOGRECOVERY); 1131 return true; 1132 } 1133 1134 static void 1135 xfs_buf_ioend( 1136 struct xfs_buf *bp) 1137 { 1138 if (!__xfs_buf_ioend(bp)) 1139 return; 1140 if (bp->b_flags & XBF_ASYNC) 1141 xfs_buf_relse(bp); 1142 else 1143 complete(&bp->b_iowait); 1144 } 1145 1146 static void 1147 xfs_buf_ioend_work( 1148 struct work_struct *work) 1149 { 1150 struct xfs_buf *bp = 1151 container_of(work, struct xfs_buf, b_ioend_work); 1152 1153 if (__xfs_buf_ioend(bp)) 1154 xfs_buf_relse(bp); 1155 } 1156 1157 void 1158 __xfs_buf_ioerror( 1159 struct xfs_buf *bp, 1160 int error, 1161 xfs_failaddr_t failaddr) 1162 { 1163 ASSERT(error <= 0 && error >= -1000); 1164 bp->b_error = error; 1165 trace_xfs_buf_ioerror(bp, error, failaddr); 1166 } 1167 1168 void 1169 xfs_buf_ioerror_alert( 1170 struct xfs_buf *bp, 1171 xfs_failaddr_t func) 1172 { 1173 xfs_buf_alert_ratelimited(bp, "XFS: metadata IO error", 1174 "metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", 1175 func, (uint64_t)xfs_buf_daddr(bp), 1176 bp->b_length, -bp->b_error); 1177 } 1178 1179 /* 1180 * To simulate an I/O failure, the buffer must be locked and held with at least 1181 * two references. 1182 * 1183 * The buf item reference is dropped via ioend processing. The second reference 1184 * is owned by the caller and is dropped on I/O completion if the buffer is 1185 * XBF_ASYNC. 1186 */ 1187 void 1188 xfs_buf_ioend_fail( 1189 struct xfs_buf *bp) 1190 { 1191 bp->b_flags &= ~XBF_DONE; 1192 xfs_buf_stale(bp); 1193 xfs_buf_ioerror(bp, -EIO); 1194 xfs_buf_ioend(bp); 1195 } 1196 1197 int 1198 xfs_bwrite( 1199 struct xfs_buf *bp) 1200 { 1201 int error; 1202 1203 ASSERT(xfs_buf_islocked(bp)); 1204 1205 bp->b_flags |= XBF_WRITE; 1206 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | 1207 XBF_DONE); 1208 1209 xfs_buf_submit(bp); 1210 error = xfs_buf_iowait(bp); 1211 if (error) 1212 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); 1213 return error; 1214 } 1215 1216 static void 1217 xfs_buf_bio_end_io( 1218 struct bio *bio) 1219 { 1220 struct xfs_buf *bp = bio->bi_private; 1221 1222 if (bio->bi_status) 1223 xfs_buf_ioerror(bp, blk_status_to_errno(bio->bi_status)); 1224 else if ((bp->b_flags & XBF_WRITE) && (bp->b_flags & XBF_ASYNC) && 1225 XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_IOERROR)) 1226 xfs_buf_ioerror(bp, -EIO); 1227 1228 if (bp->b_flags & XBF_ASYNC) { 1229 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); 1230 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); 1231 } else { 1232 complete(&bp->b_iowait); 1233 } 1234 1235 bio_put(bio); 1236 } 1237 1238 static inline blk_opf_t 1239 xfs_buf_bio_op( 1240 struct xfs_buf *bp) 1241 { 1242 blk_opf_t op; 1243 1244 if (bp->b_flags & XBF_WRITE) { 1245 op = REQ_OP_WRITE; 1246 } else { 1247 op = REQ_OP_READ; 1248 if (bp->b_flags & XBF_READ_AHEAD) 1249 op |= REQ_RAHEAD; 1250 } 1251 1252 return op | REQ_META; 1253 } 1254 1255 static void 1256 xfs_buf_submit_bio( 1257 struct xfs_buf *bp) 1258 { 1259 unsigned int len = BBTOB(bp->b_length); 1260 unsigned int nr_vecs = bio_add_max_vecs(bp->b_addr, len); 1261 unsigned int map = 0; 1262 struct blk_plug plug; 1263 struct bio *bio; 1264 1265 bio = bio_alloc(bp->b_target->bt_bdev, nr_vecs, xfs_buf_bio_op(bp), 1266 GFP_NOIO); 1267 if (is_vmalloc_addr(bp->b_addr)) 1268 bio_add_vmalloc(bio, bp->b_addr, len); 1269 else 1270 bio_add_virt_nofail(bio, bp->b_addr, len); 1271 bio->bi_private = bp; 1272 bio->bi_end_io = xfs_buf_bio_end_io; 1273 1274 /* 1275 * If there is more than one map segment, split out a new bio for each 1276 * map except of the last one. The last map is handled by the 1277 * remainder of the original bio outside the loop. 1278 */ 1279 blk_start_plug(&plug); 1280 for (map = 0; map < bp->b_map_count - 1; map++) { 1281 struct bio *split; 1282 1283 split = bio_split(bio, bp->b_maps[map].bm_len, GFP_NOFS, 1284 &fs_bio_set); 1285 split->bi_iter.bi_sector = bp->b_maps[map].bm_bn; 1286 bio_chain(split, bio); 1287 submit_bio(split); 1288 } 1289 bio->bi_iter.bi_sector = bp->b_maps[map].bm_bn; 1290 submit_bio(bio); 1291 blk_finish_plug(&plug); 1292 } 1293 1294 /* 1295 * Wait for I/O completion of a sync buffer and return the I/O error code. 1296 */ 1297 static int 1298 xfs_buf_iowait( 1299 struct xfs_buf *bp) 1300 { 1301 ASSERT(!(bp->b_flags & XBF_ASYNC)); 1302 1303 do { 1304 trace_xfs_buf_iowait(bp, _RET_IP_); 1305 wait_for_completion(&bp->b_iowait); 1306 trace_xfs_buf_iowait_done(bp, _RET_IP_); 1307 } while (!__xfs_buf_ioend(bp)); 1308 1309 return bp->b_error; 1310 } 1311 1312 /* 1313 * Run the write verifier callback function if it exists. If this fails, mark 1314 * the buffer with an error and do not dispatch the I/O. 1315 */ 1316 static bool 1317 xfs_buf_verify_write( 1318 struct xfs_buf *bp) 1319 { 1320 if (bp->b_ops) { 1321 bp->b_ops->verify_write(bp); 1322 if (bp->b_error) 1323 return false; 1324 } else if (bp->b_rhash_key != XFS_BUF_DADDR_NULL) { 1325 /* 1326 * Non-crc filesystems don't attach verifiers during log 1327 * recovery, so don't warn for such filesystems. 1328 */ 1329 if (xfs_has_crc(bp->b_mount)) { 1330 xfs_warn(bp->b_mount, 1331 "%s: no buf ops on daddr 0x%llx len %d", 1332 __func__, xfs_buf_daddr(bp), 1333 bp->b_length); 1334 xfs_hex_dump(bp->b_addr, XFS_CORRUPTION_DUMP_LEN); 1335 dump_stack(); 1336 } 1337 } 1338 1339 return true; 1340 } 1341 1342 /* 1343 * Buffer I/O submission path, read or write. Asynchronous submission transfers 1344 * the buffer lock ownership and the current reference to the IO. It is not 1345 * safe to reference the buffer after a call to this function unless the caller 1346 * holds an additional reference itself. 1347 */ 1348 static void 1349 xfs_buf_submit( 1350 struct xfs_buf *bp) 1351 { 1352 trace_xfs_buf_submit(bp, _RET_IP_); 1353 1354 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1355 1356 /* 1357 * On log shutdown we stale and complete the buffer immediately. We can 1358 * be called to read the superblock before the log has been set up, so 1359 * be careful checking the log state. 1360 * 1361 * Checking the mount shutdown state here can result in the log tail 1362 * moving inappropriately on disk as the log may not yet be shut down. 1363 * i.e. failing this buffer on mount shutdown can remove it from the AIL 1364 * and move the tail of the log forwards without having written this 1365 * buffer to disk. This corrupts the log tail state in memory, and 1366 * because the log may not be shut down yet, it can then be propagated 1367 * to disk before the log is shutdown. Hence we check log shutdown 1368 * state here rather than mount state to avoid corrupting the log tail 1369 * on shutdown. 1370 */ 1371 if (bp->b_mount->m_log && xlog_is_shutdown(bp->b_mount->m_log)) { 1372 xfs_buf_ioend_fail(bp); 1373 return; 1374 } 1375 1376 if (bp->b_flags & XBF_WRITE) 1377 xfs_buf_wait_unpin(bp); 1378 1379 /* 1380 * Make sure we capture only current IO errors rather than stale errors 1381 * left over from previous use of the buffer (e.g. failed readahead). 1382 */ 1383 bp->b_error = 0; 1384 1385 if ((bp->b_flags & XBF_WRITE) && !xfs_buf_verify_write(bp)) { 1386 xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE); 1387 xfs_buf_ioend(bp); 1388 return; 1389 } 1390 1391 /* In-memory targets are directly mapped, no I/O required. */ 1392 if (xfs_buftarg_is_mem(bp->b_target)) { 1393 xfs_buf_ioend(bp); 1394 return; 1395 } 1396 1397 xfs_buf_submit_bio(bp); 1398 } 1399 1400 /* 1401 * Log a message about and stale a buffer that a caller has decided is corrupt. 1402 * 1403 * This function should be called for the kinds of metadata corruption that 1404 * cannot be detect from a verifier, such as incorrect inter-block relationship 1405 * data. Do /not/ call this function from a verifier function. 1406 * 1407 * The buffer must be XBF_DONE prior to the call. Afterwards, the buffer will 1408 * be marked stale, but b_error will not be set. The caller is responsible for 1409 * releasing the buffer or fixing it. 1410 */ 1411 void 1412 __xfs_buf_mark_corrupt( 1413 struct xfs_buf *bp, 1414 xfs_failaddr_t fa) 1415 { 1416 ASSERT(bp->b_flags & XBF_DONE); 1417 1418 xfs_buf_corruption_error(bp, fa); 1419 xfs_buf_stale(bp); 1420 } 1421 1422 /* 1423 * Handling of buffer targets (buftargs). 1424 */ 1425 1426 /* 1427 * Wait for any bufs with callbacks that have been submitted but have not yet 1428 * returned. These buffers will have an elevated hold count, so wait on those 1429 * while freeing all the buffers only held by the LRU. 1430 */ 1431 static enum lru_status 1432 xfs_buftarg_drain_rele( 1433 struct list_head *item, 1434 struct list_lru_one *lru, 1435 void *arg) 1436 1437 { 1438 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); 1439 struct list_head *dispose = arg; 1440 1441 if (!spin_trylock(&bp->b_lockref.lock)) 1442 return LRU_SKIP; 1443 if (bp->b_lockref.count > 0) { 1444 /* need to wait, so skip it this pass */ 1445 spin_unlock(&bp->b_lockref.lock); 1446 trace_xfs_buf_drain_buftarg(bp, _RET_IP_); 1447 return LRU_SKIP; 1448 } 1449 1450 xfs_buf_kill(bp); 1451 list_lru_isolate_move(lru, item, dispose); 1452 spin_unlock(&bp->b_lockref.lock); 1453 return LRU_REMOVED; 1454 } 1455 1456 /* 1457 * Wait for outstanding I/O on the buftarg to complete. 1458 */ 1459 void 1460 xfs_buftarg_wait( 1461 struct xfs_buftarg *btp) 1462 { 1463 /* 1464 * First wait for all in-flight readahead buffers to be released. This is 1465 * critical as new buffers do not make the LRU until they are released. 1466 * 1467 * Next, flush the buffer workqueue to ensure all completion processing 1468 * has finished. Just waiting on buffer locks is not sufficient for 1469 * async IO as the reference count held over IO is not released until 1470 * after the buffer lock is dropped. Hence we need to ensure here that 1471 * all reference counts have been dropped before we start walking the 1472 * LRU list. 1473 */ 1474 while (percpu_counter_sum(&btp->bt_readahead_count)) 1475 delay(100); 1476 flush_workqueue(btp->bt_mount->m_buf_workqueue); 1477 } 1478 1479 void 1480 xfs_buftarg_drain( 1481 struct xfs_buftarg *btp) 1482 { 1483 LIST_HEAD(dispose); 1484 int loop = 0; 1485 bool write_fail = false; 1486 1487 xfs_buftarg_wait(btp); 1488 1489 /* loop until there is nothing left on the lru list. */ 1490 while (list_lru_count(&btp->bt_lru)) { 1491 list_lru_walk(&btp->bt_lru, xfs_buftarg_drain_rele, 1492 &dispose, LONG_MAX); 1493 1494 while (!list_empty(&dispose)) { 1495 struct xfs_buf *bp; 1496 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1497 list_del_init(&bp->b_lru); 1498 if (bp->b_flags & XBF_WRITE_FAIL) { 1499 write_fail = true; 1500 xfs_buf_alert_ratelimited(bp, 1501 "XFS: Corruption Alert", 1502 "Corruption Alert: Buffer at daddr 0x%llx had permanent write failures!", 1503 (long long)xfs_buf_daddr(bp)); 1504 } 1505 xfs_buf_destroy(bp); 1506 } 1507 if (loop++ != 0) 1508 delay(100); 1509 } 1510 1511 /* 1512 * If one or more failed buffers were freed, that means dirty metadata 1513 * was thrown away. This should only ever happen after I/O completion 1514 * handling has elevated I/O error(s) to permanent failures and shuts 1515 * down the journal. 1516 */ 1517 if (write_fail) { 1518 ASSERT(xlog_is_shutdown(btp->bt_mount->m_log)); 1519 xfs_alert(btp->bt_mount, 1520 "Please run xfs_repair to determine the extent of the problem."); 1521 } 1522 } 1523 1524 static enum lru_status 1525 xfs_buftarg_isolate( 1526 struct list_head *item, 1527 struct list_lru_one *lru, 1528 void *arg) 1529 { 1530 struct xfs_buf *bp = container_of(item, struct xfs_buf, b_lru); 1531 struct list_head *dispose = arg; 1532 1533 /* 1534 * We are inverting the lru lock vs bp->b_lockref.lock order here, so 1535 * use a trylock. If we fail to get the lock, just skip the buffer. 1536 */ 1537 if (!spin_trylock(&bp->b_lockref.lock)) 1538 return LRU_SKIP; 1539 1540 /* 1541 * If the buffer is in use, remove it from the LRU for now. We can't 1542 * free it while someone is using it, and we should also not count 1543 * eviction passed for it, just as if it hadn't been added to the LRU 1544 * yet. 1545 */ 1546 if (bp->b_lockref.count > 0) { 1547 list_lru_isolate(lru, &bp->b_lru); 1548 spin_unlock(&bp->b_lockref.lock); 1549 return LRU_REMOVED; 1550 } 1551 1552 /* 1553 * Decrement the b_lru_ref count unless the value is already 1554 * zero. If the value is already zero, we need to reclaim the 1555 * buffer, otherwise it gets another trip through the LRU. 1556 */ 1557 if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) { 1558 spin_unlock(&bp->b_lockref.lock); 1559 return LRU_ROTATE; 1560 } 1561 1562 xfs_buf_kill(bp); 1563 list_lru_isolate_move(lru, item, dispose); 1564 spin_unlock(&bp->b_lockref.lock); 1565 return LRU_REMOVED; 1566 } 1567 1568 static unsigned long 1569 xfs_buftarg_shrink_scan( 1570 struct shrinker *shrink, 1571 struct shrink_control *sc) 1572 { 1573 struct xfs_buftarg *btp = shrink->private_data; 1574 LIST_HEAD(dispose); 1575 unsigned long freed; 1576 1577 freed = list_lru_shrink_walk(&btp->bt_lru, sc, 1578 xfs_buftarg_isolate, &dispose); 1579 1580 while (!list_empty(&dispose)) { 1581 struct xfs_buf *bp; 1582 bp = list_first_entry(&dispose, struct xfs_buf, b_lru); 1583 list_del_init(&bp->b_lru); 1584 xfs_buf_destroy(bp); 1585 } 1586 1587 return freed; 1588 } 1589 1590 static unsigned long 1591 xfs_buftarg_shrink_count( 1592 struct shrinker *shrink, 1593 struct shrink_control *sc) 1594 { 1595 struct xfs_buftarg *btp = shrink->private_data; 1596 return list_lru_shrink_count(&btp->bt_lru, sc); 1597 } 1598 1599 void 1600 xfs_destroy_buftarg( 1601 struct xfs_buftarg *btp) 1602 { 1603 shrinker_free(btp->bt_shrinker); 1604 ASSERT(percpu_counter_sum(&btp->bt_readahead_count) == 0); 1605 percpu_counter_destroy(&btp->bt_readahead_count); 1606 list_lru_destroy(&btp->bt_lru); 1607 rhashtable_destroy(&btp->bt_hash); 1608 } 1609 1610 void 1611 xfs_free_buftarg( 1612 struct xfs_buftarg *btp) 1613 { 1614 xfs_destroy_buftarg(btp); 1615 fs_put_dax(btp->bt_daxdev, btp->bt_mount); 1616 /* the main block device is closed by kill_block_super */ 1617 if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev) 1618 bdev_fput(btp->bt_file); 1619 kfree(btp); 1620 } 1621 1622 /* 1623 * Configure this buffer target for hardware-assisted atomic writes if the 1624 * underlying block device supports is congruent with the filesystem geometry. 1625 */ 1626 static inline void 1627 xfs_configure_buftarg_atomic_writes( 1628 struct xfs_buftarg *btp) 1629 { 1630 struct xfs_mount *mp = btp->bt_mount; 1631 unsigned int min_bytes, max_bytes; 1632 1633 min_bytes = bdev_atomic_write_unit_min_bytes(btp->bt_bdev); 1634 max_bytes = bdev_atomic_write_unit_max_bytes(btp->bt_bdev); 1635 1636 /* 1637 * Ignore atomic write geometry that is nonsense or doesn't even cover 1638 * a single fsblock. 1639 */ 1640 if (min_bytes > max_bytes || 1641 min_bytes > mp->m_sb.sb_blocksize || 1642 max_bytes < mp->m_sb.sb_blocksize) { 1643 min_bytes = 0; 1644 max_bytes = 0; 1645 } 1646 1647 btp->bt_awu_min = min_bytes; 1648 btp->bt_awu_max = max_bytes; 1649 } 1650 1651 /* Configure a buffer target that abstracts a block device. */ 1652 int 1653 xfs_configure_buftarg( 1654 struct xfs_buftarg *btp, 1655 unsigned int sectorsize, 1656 xfs_rfsblock_t nr_blocks) 1657 { 1658 struct xfs_mount *mp = btp->bt_mount; 1659 1660 if (btp->bt_bdev) { 1661 int error; 1662 1663 error = bdev_validate_blocksize(btp->bt_bdev, sectorsize); 1664 if (error) { 1665 xfs_warn(mp, 1666 "Cannot use blocksize %u on device %pg, err %d", 1667 sectorsize, btp->bt_bdev, error); 1668 return -EINVAL; 1669 } 1670 1671 if (bdev_can_atomic_write(btp->bt_bdev)) 1672 xfs_configure_buftarg_atomic_writes(btp); 1673 } 1674 1675 btp->bt_meta_sectorsize = sectorsize; 1676 btp->bt_meta_sectormask = sectorsize - 1; 1677 /* m_blkbb_log is not set up yet */ 1678 btp->bt_nr_sectors = nr_blocks << (mp->m_sb.sb_blocklog - BBSHIFT); 1679 return 0; 1680 } 1681 1682 int 1683 xfs_init_buftarg( 1684 struct xfs_buftarg *btp, 1685 size_t logical_sectorsize, 1686 const char *descr) 1687 { 1688 /* The maximum size of the buftarg is only known once the sb is read. */ 1689 btp->bt_nr_sectors = XFS_BUF_DADDR_MAX; 1690 1691 /* Set up device logical sector size mask */ 1692 btp->bt_logical_sectorsize = logical_sectorsize; 1693 btp->bt_logical_sectormask = logical_sectorsize - 1; 1694 1695 /* 1696 * Buffer IO error rate limiting. Limit it to no more than 10 messages 1697 * per 30 seconds so as to not spam logs too much on repeated errors. 1698 */ 1699 ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ, 1700 DEFAULT_RATELIMIT_BURST); 1701 1702 if (rhashtable_init(&btp->bt_hash, &xfs_buf_hash_params)) 1703 return -ENOMEM; 1704 if (list_lru_init(&btp->bt_lru)) 1705 goto out_destroy_hash; 1706 if (percpu_counter_init(&btp->bt_readahead_count, 0, GFP_KERNEL)) 1707 goto out_destroy_lru; 1708 1709 btp->bt_shrinker = 1710 shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s", descr); 1711 if (!btp->bt_shrinker) 1712 goto out_destroy_io_count; 1713 btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count; 1714 btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan; 1715 btp->bt_shrinker->private_data = btp; 1716 shrinker_register(btp->bt_shrinker); 1717 return 0; 1718 1719 out_destroy_io_count: 1720 percpu_counter_destroy(&btp->bt_readahead_count); 1721 out_destroy_lru: 1722 list_lru_destroy(&btp->bt_lru); 1723 out_destroy_hash: 1724 rhashtable_destroy(&btp->bt_hash); 1725 return -ENOMEM; 1726 } 1727 1728 struct xfs_buftarg * 1729 xfs_alloc_buftarg( 1730 struct xfs_mount *mp, 1731 struct file *bdev_file) 1732 { 1733 struct xfs_buftarg *btp; 1734 const struct dax_holder_operations *ops = NULL; 1735 int error; 1736 1737 1738 #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE) 1739 ops = &xfs_dax_holder_operations; 1740 #endif 1741 btp = kzalloc_obj(*btp, GFP_KERNEL | __GFP_NOFAIL); 1742 1743 btp->bt_mount = mp; 1744 btp->bt_file = bdev_file; 1745 btp->bt_bdev = file_bdev(bdev_file); 1746 btp->bt_dev = btp->bt_bdev->bd_dev; 1747 btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off, 1748 mp, ops); 1749 1750 /* 1751 * Flush and invalidate all devices' pagecaches before reading any 1752 * metadata because XFS doesn't use the bdev pagecache. 1753 */ 1754 error = sync_blockdev(btp->bt_bdev); 1755 if (error) 1756 goto error_free; 1757 1758 /* 1759 * When allocating the buftargs we have not yet read the super block and 1760 * thus don't know the file system sector size yet. 1761 */ 1762 btp->bt_meta_sectorsize = bdev_logical_block_size(btp->bt_bdev); 1763 btp->bt_meta_sectormask = btp->bt_meta_sectorsize - 1; 1764 1765 error = xfs_init_buftarg(btp, btp->bt_meta_sectorsize, 1766 mp->m_super->s_id); 1767 if (error) 1768 goto error_free; 1769 1770 return btp; 1771 1772 error_free: 1773 fs_put_dax(btp->bt_daxdev, mp); 1774 kfree(btp); 1775 return ERR_PTR(error); 1776 } 1777 1778 static inline void 1779 xfs_buf_list_del( 1780 struct xfs_buf *bp) 1781 { 1782 list_del_init(&bp->b_list); 1783 wake_up_var(&bp->b_list); 1784 } 1785 1786 /* 1787 * Cancel a delayed write list. 1788 * 1789 * Remove each buffer from the list, clear the delwri queue flag and drop the 1790 * associated buffer reference. 1791 */ 1792 void 1793 xfs_buf_delwri_cancel( 1794 struct list_head *list) 1795 { 1796 struct xfs_buf *bp; 1797 1798 while (!list_empty(list)) { 1799 bp = list_first_entry(list, struct xfs_buf, b_list); 1800 1801 xfs_buf_lock(bp); 1802 bp->b_flags &= ~_XBF_DELWRI_Q; 1803 xfs_buf_list_del(bp); 1804 xfs_buf_relse(bp); 1805 } 1806 } 1807 1808 /* 1809 * Add a buffer to the delayed write list. 1810 * 1811 * This queues a buffer for writeout if it hasn't already been. Note that 1812 * neither this routine nor the buffer list submission functions perform 1813 * any internal synchronization. It is expected that the lists are thread-local 1814 * to the callers. 1815 * 1816 * Returns true if we queued up the buffer, or false if it already had 1817 * been on the buffer list. 1818 */ 1819 bool 1820 xfs_buf_delwri_queue( 1821 struct xfs_buf *bp, 1822 struct list_head *list) 1823 { 1824 ASSERT(xfs_buf_islocked(bp)); 1825 ASSERT(!(bp->b_flags & XBF_READ)); 1826 1827 /* 1828 * If the buffer is already marked delwri it already is queued up 1829 * by someone else for imediate writeout. Just ignore it in that 1830 * case. 1831 */ 1832 if (bp->b_flags & _XBF_DELWRI_Q) { 1833 trace_xfs_buf_delwri_queued(bp, _RET_IP_); 1834 return false; 1835 } 1836 1837 trace_xfs_buf_delwri_queue(bp, _RET_IP_); 1838 1839 /* 1840 * If a buffer gets written out synchronously or marked stale while it 1841 * is on a delwri list we lazily remove it. To do this, the other party 1842 * clears the _XBF_DELWRI_Q flag but otherwise leaves the buffer alone. 1843 * It remains referenced and on the list. In a rare corner case it 1844 * might get readded to a delwri list after the synchronous writeout, in 1845 * which case we need just need to re-add the flag here. 1846 */ 1847 bp->b_flags |= _XBF_DELWRI_Q; 1848 if (list_empty(&bp->b_list)) { 1849 xfs_buf_hold(bp); 1850 list_add_tail(&bp->b_list, list); 1851 } 1852 1853 return true; 1854 } 1855 1856 /* 1857 * Queue a buffer to this delwri list as part of a data integrity operation. 1858 * If the buffer is on any other delwri list, we'll wait for that to clear 1859 * so that the caller can submit the buffer for IO and wait for the result. 1860 * Callers must ensure the buffer is not already on the list. 1861 */ 1862 void 1863 xfs_buf_delwri_queue_here( 1864 struct xfs_buf *bp, 1865 struct list_head *buffer_list) 1866 { 1867 /* 1868 * We need this buffer to end up on the /caller's/ delwri list, not any 1869 * old list. This can happen if the buffer is marked stale (which 1870 * clears DELWRI_Q) after the AIL queues the buffer to its list but 1871 * before the AIL has a chance to submit the list. 1872 */ 1873 while (!list_empty(&bp->b_list)) { 1874 xfs_buf_unlock(bp); 1875 wait_var_event(&bp->b_list, list_empty(&bp->b_list)); 1876 xfs_buf_lock(bp); 1877 } 1878 1879 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1880 1881 xfs_buf_delwri_queue(bp, buffer_list); 1882 } 1883 1884 /* 1885 * Compare function is more complex than it needs to be because 1886 * the return value is only 32 bits and we are doing comparisons 1887 * on 64 bit values 1888 */ 1889 static int 1890 xfs_buf_cmp( 1891 void *priv, 1892 const struct list_head *a, 1893 const struct list_head *b) 1894 { 1895 struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); 1896 struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); 1897 xfs_daddr_t diff; 1898 1899 diff = ap->b_maps[0].bm_bn - bp->b_maps[0].bm_bn; 1900 if (diff < 0) 1901 return -1; 1902 if (diff > 0) 1903 return 1; 1904 return 0; 1905 } 1906 1907 static bool 1908 xfs_buf_delwri_submit_prep( 1909 struct xfs_buf *bp) 1910 { 1911 /* 1912 * Someone else might have written the buffer synchronously or marked it 1913 * stale in the meantime. In that case only the _XBF_DELWRI_Q flag got 1914 * cleared, and we have to drop the reference and remove it from the 1915 * list here. 1916 */ 1917 if (!(bp->b_flags & _XBF_DELWRI_Q)) { 1918 xfs_buf_list_del(bp); 1919 xfs_buf_relse(bp); 1920 return false; 1921 } 1922 1923 trace_xfs_buf_delwri_split(bp, _RET_IP_); 1924 bp->b_flags &= ~_XBF_DELWRI_Q; 1925 bp->b_flags |= XBF_WRITE; 1926 return true; 1927 } 1928 1929 /* 1930 * Write out a buffer list asynchronously. 1931 * 1932 * This will take the @buffer_list, write all non-locked and non-pinned buffers 1933 * out and not wait for I/O completion on any of the buffers. This interface 1934 * is only safely useable for callers that can track I/O completion by higher 1935 * level means, e.g. AIL pushing as the @buffer_list is consumed in this 1936 * function. 1937 * 1938 * Note: this function will skip buffers it would block on, and in doing so 1939 * leaves them on @buffer_list so they can be retried on a later pass. As such, 1940 * it is up to the caller to ensure that the buffer list is fully submitted or 1941 * cancelled appropriately when they are finished with the list. Failure to 1942 * cancel or resubmit the list until it is empty will result in leaked buffers 1943 * at unmount time. 1944 */ 1945 int 1946 xfs_buf_delwri_submit_nowait( 1947 struct list_head *buffer_list) 1948 { 1949 struct xfs_buf *bp, *n; 1950 int pinned = 0; 1951 struct blk_plug plug; 1952 1953 list_sort(NULL, buffer_list, xfs_buf_cmp); 1954 1955 blk_start_plug(&plug); 1956 list_for_each_entry_safe(bp, n, buffer_list, b_list) { 1957 if (!xfs_buf_trylock(bp)) 1958 continue; 1959 if (xfs_buf_ispinned(bp)) { 1960 xfs_buf_unlock(bp); 1961 pinned++; 1962 continue; 1963 } 1964 if (!xfs_buf_delwri_submit_prep(bp)) 1965 continue; 1966 bp->b_flags |= XBF_ASYNC; 1967 xfs_buf_list_del(bp); 1968 xfs_buf_submit(bp); 1969 } 1970 blk_finish_plug(&plug); 1971 1972 return pinned; 1973 } 1974 1975 /* 1976 * Write out a buffer list synchronously. 1977 * 1978 * This will take the @buffer_list, write all buffers out and wait for I/O 1979 * completion on all of the buffers. @buffer_list is consumed by the function, 1980 * so callers must have some other way of tracking buffers if they require such 1981 * functionality. 1982 */ 1983 int 1984 xfs_buf_delwri_submit( 1985 struct list_head *buffer_list) 1986 { 1987 LIST_HEAD (wait_list); 1988 int error = 0, error2; 1989 struct xfs_buf *bp, *n; 1990 struct blk_plug plug; 1991 1992 list_sort(NULL, buffer_list, xfs_buf_cmp); 1993 1994 blk_start_plug(&plug); 1995 list_for_each_entry_safe(bp, n, buffer_list, b_list) { 1996 xfs_buf_lock(bp); 1997 if (!xfs_buf_delwri_submit_prep(bp)) 1998 continue; 1999 bp->b_flags &= ~XBF_ASYNC; 2000 list_move_tail(&bp->b_list, &wait_list); 2001 xfs_buf_submit(bp); 2002 } 2003 blk_finish_plug(&plug); 2004 2005 /* Wait for IO to complete. */ 2006 while (!list_empty(&wait_list)) { 2007 bp = list_first_entry(&wait_list, struct xfs_buf, b_list); 2008 2009 xfs_buf_list_del(bp); 2010 2011 /* 2012 * Wait on the locked buffer, check for errors and unlock and 2013 * release the delwri queue reference. 2014 */ 2015 error2 = xfs_buf_iowait(bp); 2016 xfs_buf_relse(bp); 2017 if (!error) 2018 error = error2; 2019 } 2020 2021 return error; 2022 } 2023 2024 void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) 2025 { 2026 /* 2027 * Set the lru reference count to 0 based on the error injection tag. 2028 * This allows userspace to disrupt buffer caching for debug/testing 2029 * purposes. 2030 */ 2031 if (XFS_TEST_ERROR(bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) 2032 lru_ref = 0; 2033 2034 atomic_set(&bp->b_lru_ref, lru_ref); 2035 } 2036 2037 /* 2038 * Verify an on-disk magic value against the magic value specified in the 2039 * verifier structure. The verifier magic is in disk byte order so the caller is 2040 * expected to pass the value directly from disk. 2041 */ 2042 bool 2043 xfs_verify_magic( 2044 struct xfs_buf *bp, 2045 __be32 dmagic) 2046 { 2047 struct xfs_mount *mp = bp->b_mount; 2048 int idx; 2049 2050 idx = xfs_has_crc(mp); 2051 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])) 2052 return false; 2053 return dmagic == bp->b_ops->magic[idx]; 2054 } 2055 /* 2056 * Verify an on-disk magic value against the magic value specified in the 2057 * verifier structure. The verifier magic is in disk byte order so the caller is 2058 * expected to pass the value directly from disk. 2059 */ 2060 bool 2061 xfs_verify_magic16( 2062 struct xfs_buf *bp, 2063 __be16 dmagic) 2064 { 2065 struct xfs_mount *mp = bp->b_mount; 2066 int idx; 2067 2068 idx = xfs_has_crc(mp); 2069 if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])) 2070 return false; 2071 return dmagic == bp->b_ops->magic16[idx]; 2072 } 2073