1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include "messages.h" 5 #include "subpage.h" 6 #include "btrfs_inode.h" 7 8 /* 9 * Subpage (block size < folio size) support overview: 10 * 11 * Limitations: 12 * 13 * - Only support 64K page size for now 14 * This is to make metadata handling easier, as 64K page would ensure 15 * all nodesize would fit inside one page, thus we don't need to handle 16 * cases where a tree block crosses several pages. 17 * 18 * - Only metadata read-write for now 19 * The data read-write part is in development. 20 * 21 * - Metadata can't cross 64K page boundary 22 * btrfs-progs and kernel have done that for a while, thus only ancient 23 * filesystems could have such problem. For such case, do a graceful 24 * rejection. 25 * 26 * Special behavior: 27 * 28 * - Metadata 29 * Metadata read is fully supported. 30 * Meaning when reading one tree block will only trigger the read for the 31 * needed range, other unrelated range in the same page will not be touched. 32 * 33 * Metadata write support is partial. 34 * The writeback is still for the full page, but we will only submit 35 * the dirty extent buffers in the page. 36 * 37 * This means, if we have a metadata page like this: 38 * 39 * Page offset 40 * 0 16K 32K 48K 64K 41 * |/////////| |///////////| 42 * \- Tree block A \- Tree block B 43 * 44 * Even if we just want to writeback tree block A, we will also writeback 45 * tree block B if it's also dirty. 46 * 47 * This may cause extra metadata writeback which results more COW. 48 * 49 * Implementation: 50 * 51 * - Common 52 * Both metadata and data will use a new structure, btrfs_folio_state, to 53 * record the status of each sector inside a page. This provides the extra 54 * granularity needed. 55 * 56 * - Metadata 57 * Since we have multiple tree blocks inside one page, we can't rely on page 58 * locking anymore, or we will have greatly reduced concurrency or even 59 * deadlocks (hold one tree lock while trying to lock another tree lock in 60 * the same page). 61 * 62 * Thus for metadata locking, subpage support relies on io_tree locking only. 63 * This means a slightly higher tree locking latency. 64 */ 65 66 int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, 67 struct folio *folio, enum btrfs_folio_type type) 68 { 69 struct btrfs_folio_state *bfs; 70 71 /* For metadata we don't support large folio yet. */ 72 if (type == BTRFS_SUBPAGE_METADATA) 73 ASSERT(!folio_test_large(folio)); 74 75 /* 76 * We have cases like a dummy extent buffer page, which is not mapped 77 * and doesn't need to be locked. 78 */ 79 if (folio->mapping) 80 ASSERT(folio_test_locked(folio)); 81 82 /* Either not subpage, or the folio already has private attached. */ 83 if (folio_test_private(folio)) 84 return 0; 85 if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 86 return 0; 87 if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 88 return 0; 89 90 bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type); 91 if (IS_ERR(bfs)) 92 return PTR_ERR(bfs); 93 94 folio_attach_private(folio, bfs); 95 return 0; 96 } 97 98 void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, 99 enum btrfs_folio_type type) 100 { 101 struct btrfs_folio_state *bfs; 102 103 /* Either not subpage, or the folio already has private attached. */ 104 if (!folio_test_private(folio)) 105 return; 106 if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 107 return; 108 if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 109 return; 110 111 bfs = folio_detach_private(folio); 112 ASSERT(bfs); 113 btrfs_free_folio_state(bfs); 114 } 115 116 struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, 117 size_t fsize, enum btrfs_folio_type type) 118 { 119 struct btrfs_folio_state *ret; 120 unsigned int real_size; 121 122 ASSERT(fs_info->sectorsize < fsize); 123 124 real_size = struct_size(ret, bitmaps, 125 BITS_TO_LONGS(btrfs_bitmap_nr_max * 126 (fsize >> fs_info->sectorsize_bits))); 127 ret = kzalloc(real_size, GFP_NOFS); 128 if (!ret) 129 return ERR_PTR(-ENOMEM); 130 131 spin_lock_init(&ret->lock); 132 if (type == BTRFS_SUBPAGE_METADATA) 133 atomic_set(&ret->eb_refs, 0); 134 else 135 atomic_set(&ret->nr_locked, 0); 136 return ret; 137 } 138 139 /* 140 * Increase the eb_refs of current subpage. 141 * 142 * This is important for eb allocation, to prevent race with last eb freeing 143 * of the same page. 144 * With the eb_refs increased before the eb inserted into radix tree, 145 * detach_extent_buffer_page() won't detach the folio private while we're still 146 * allocating the extent buffer. 147 */ 148 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 149 { 150 struct btrfs_folio_state *bfs; 151 152 if (!btrfs_meta_is_subpage(fs_info)) 153 return; 154 155 ASSERT(folio_test_private(folio) && folio->mapping); 156 lockdep_assert_held(&folio->mapping->i_private_lock); 157 158 bfs = folio_get_private(folio); 159 atomic_inc(&bfs->eb_refs); 160 } 161 162 void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 163 { 164 struct btrfs_folio_state *bfs; 165 166 if (!btrfs_meta_is_subpage(fs_info)) 167 return; 168 169 ASSERT(folio_test_private(folio) && folio->mapping); 170 lockdep_assert_held(&folio->mapping->i_private_lock); 171 172 bfs = folio_get_private(folio); 173 ASSERT(atomic_read(&bfs->eb_refs)); 174 atomic_dec(&bfs->eb_refs); 175 } 176 177 static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, 178 struct folio *folio, u64 start, u32 len) 179 { 180 /* Basic checks */ 181 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 182 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 183 IS_ALIGNED(len, fs_info->sectorsize)); 184 /* 185 * The range check only works for mapped page, we can still have 186 * unmapped page like dummy extent buffer pages. 187 */ 188 if (folio->mapping) 189 ASSERT(folio_pos(folio) <= start && start + len <= folio_end(folio), 190 "start=%llu len=%u folio_pos=%llu folio_size=%zu", 191 start, len, folio_pos(folio), folio_size(folio)); 192 } 193 194 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 195 ({ \ 196 unsigned int __start_bit; \ 197 const unsigned int blocks_per_folio = \ 198 btrfs_blocks_per_folio(fs_info, folio); \ 199 \ 200 btrfs_subpage_assert(fs_info, folio, start, len); \ 201 __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ 202 __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ 203 __start_bit; \ 204 }) 205 206 static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) 207 { 208 u64 orig_start = *start; 209 u32 orig_len = *len; 210 211 *start = max_t(u64, folio_pos(folio), orig_start); 212 /* 213 * For certain call sites like btrfs_drop_pages(), we may have pages 214 * beyond the target range. In that case, just set @len to 0, subpage 215 * helpers can handle @len == 0 without any problem. 216 */ 217 if (folio_pos(folio) >= orig_start + orig_len) 218 *len = 0; 219 else 220 *len = min_t(u64, folio_end(folio), orig_start + orig_len) - *start; 221 } 222 223 static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, 224 struct folio *folio, u64 start, u32 len) 225 { 226 struct btrfs_folio_state *bfs = folio_get_private(folio); 227 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 228 const int nbits = (len >> fs_info->sectorsize_bits); 229 unsigned long flags; 230 unsigned int cleared = 0; 231 int bit = start_bit; 232 bool last; 233 234 btrfs_subpage_assert(fs_info, folio, start, len); 235 236 spin_lock_irqsave(&bfs->lock, flags); 237 /* 238 * We have call sites passing @lock_page into 239 * extent_clear_unlock_delalloc() for compression path. 240 * 241 * This @locked_page is locked by plain lock_page(), thus its 242 * subpage::locked is 0. Handle them in a special way. 243 */ 244 if (atomic_read(&bfs->nr_locked) == 0) { 245 spin_unlock_irqrestore(&bfs->lock, flags); 246 return true; 247 } 248 249 for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) { 250 clear_bit(bit, bfs->bitmaps); 251 cleared++; 252 } 253 ASSERT(atomic_read(&bfs->nr_locked) >= cleared); 254 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 255 spin_unlock_irqrestore(&bfs->lock, flags); 256 return last; 257 } 258 259 /* 260 * Handle different locked folios: 261 * 262 * - Non-subpage folio 263 * Just unlock it. 264 * 265 * - folio locked but without any subpage locked 266 * This happens either before writepage_delalloc() or the delalloc range is 267 * already handled by previous folio. 268 * We can simple unlock it. 269 * 270 * - folio locked with subpage range locked. 271 * We go through the locked sectors inside the range and clear their locked 272 * bitmap, reduce the writer lock number, and unlock the page if that's 273 * the last locked range. 274 */ 275 void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, 276 struct folio *folio, u64 start, u32 len) 277 { 278 struct btrfs_folio_state *bfs = folio_get_private(folio); 279 280 ASSERT(folio_test_locked(folio)); 281 282 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) { 283 folio_unlock(folio); 284 return; 285 } 286 287 /* 288 * For subpage case, there are two types of locked page. With or 289 * without locked number. 290 * 291 * Since we own the page lock, no one else could touch subpage::locked 292 * and we are safe to do several atomic operations without spinlock. 293 */ 294 if (atomic_read(&bfs->nr_locked) == 0) { 295 /* No subpage lock, locked by plain lock_page(). */ 296 folio_unlock(folio); 297 return; 298 } 299 300 btrfs_subpage_clamp_range(folio, &start, &len); 301 if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len)) 302 folio_unlock(folio); 303 } 304 305 void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, 306 struct folio *folio, unsigned long bitmap) 307 { 308 struct btrfs_folio_state *bfs = folio_get_private(folio); 309 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 310 const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; 311 unsigned long flags; 312 bool last = false; 313 int cleared = 0; 314 int bit; 315 316 if (!btrfs_is_subpage(fs_info, folio)) { 317 folio_unlock(folio); 318 return; 319 } 320 321 if (atomic_read(&bfs->nr_locked) == 0) { 322 /* No subpage lock, locked by plain lock_page(). */ 323 folio_unlock(folio); 324 return; 325 } 326 327 spin_lock_irqsave(&bfs->lock, flags); 328 for_each_set_bit(bit, &bitmap, blocks_per_folio) { 329 if (test_and_clear_bit(bit + start_bit, bfs->bitmaps)) 330 cleared++; 331 } 332 ASSERT(atomic_read(&bfs->nr_locked) >= cleared); 333 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 334 spin_unlock_irqrestore(&bfs->lock, flags); 335 if (last) 336 folio_unlock(folio); 337 } 338 339 #define subpage_test_bitmap_all_set(fs_info, folio, name) \ 340 ({ \ 341 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 342 const unsigned int blocks_per_folio = \ 343 btrfs_blocks_per_folio(fs_info, folio); \ 344 \ 345 bitmap_test_range_all_set(bfs->bitmaps, \ 346 blocks_per_folio * btrfs_bitmap_nr_##name, \ 347 blocks_per_folio); \ 348 }) 349 350 #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ 351 ({ \ 352 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 353 const unsigned int blocks_per_folio = \ 354 btrfs_blocks_per_folio(fs_info, folio); \ 355 \ 356 bitmap_test_range_all_zero(bfs->bitmaps, \ 357 blocks_per_folio * btrfs_bitmap_nr_##name, \ 358 blocks_per_folio); \ 359 }) 360 361 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, 362 struct folio *folio, u64 start, u32 len) 363 { 364 struct btrfs_folio_state *bfs = folio_get_private(folio); 365 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 366 uptodate, start, len); 367 unsigned long flags; 368 369 spin_lock_irqsave(&bfs->lock, flags); 370 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 371 if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) 372 folio_mark_uptodate(folio); 373 spin_unlock_irqrestore(&bfs->lock, flags); 374 } 375 376 void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, 377 struct folio *folio, u64 start, u32 len) 378 { 379 struct btrfs_folio_state *bfs = folio_get_private(folio); 380 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 381 uptodate, start, len); 382 unsigned long flags; 383 384 spin_lock_irqsave(&bfs->lock, flags); 385 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 386 folio_clear_uptodate(folio); 387 spin_unlock_irqrestore(&bfs->lock, flags); 388 } 389 390 void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, 391 struct folio *folio, u64 start, u32 len) 392 { 393 struct btrfs_folio_state *bfs = folio_get_private(folio); 394 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 395 dirty, start, len); 396 unsigned long flags; 397 398 spin_lock_irqsave(&bfs->lock, flags); 399 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 400 spin_unlock_irqrestore(&bfs->lock, flags); 401 folio_mark_dirty(folio); 402 } 403 404 /* 405 * Extra clear_and_test function for subpage dirty bitmap. 406 * 407 * Return true if we're the last bits in the dirty_bitmap and clear the 408 * dirty_bitmap. 409 * Return false otherwise. 410 * 411 * NOTE: Callers should manually clear page dirty for true case, as we have 412 * extra handling for tree blocks. 413 */ 414 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, 415 struct folio *folio, u64 start, u32 len) 416 { 417 struct btrfs_folio_state *bfs = folio_get_private(folio); 418 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 419 dirty, start, len); 420 unsigned long flags; 421 bool last = false; 422 423 spin_lock_irqsave(&bfs->lock, flags); 424 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 425 if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) 426 last = true; 427 spin_unlock_irqrestore(&bfs->lock, flags); 428 return last; 429 } 430 431 void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, 432 struct folio *folio, u64 start, u32 len) 433 { 434 bool last; 435 436 last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); 437 if (last) 438 folio_clear_dirty_for_io(folio); 439 } 440 441 void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, 442 struct folio *folio, u64 start, u32 len) 443 { 444 struct btrfs_folio_state *bfs = folio_get_private(folio); 445 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 446 writeback, start, len); 447 unsigned long flags; 448 449 spin_lock_irqsave(&bfs->lock, flags); 450 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 451 452 /* 453 * Don't clear the TOWRITE tag when starting writeback on a still-dirty 454 * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, 455 * assume writeback is complete, and exit too early — violating sync 456 * ordering guarantees. 457 */ 458 if (!folio_test_writeback(folio)) 459 __folio_start_writeback(folio, true); 460 if (!folio_test_dirty(folio)) { 461 struct address_space *mapping = folio_mapping(folio); 462 XA_STATE(xas, &mapping->i_pages, folio->index); 463 unsigned long flags; 464 465 xas_lock_irqsave(&xas, flags); 466 xas_load(&xas); 467 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); 468 xas_unlock_irqrestore(&xas, flags); 469 } 470 spin_unlock_irqrestore(&bfs->lock, flags); 471 } 472 473 void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, 474 struct folio *folio, u64 start, u32 len) 475 { 476 struct btrfs_folio_state *bfs = folio_get_private(folio); 477 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 478 writeback, start, len); 479 unsigned long flags; 480 481 spin_lock_irqsave(&bfs->lock, flags); 482 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 483 if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { 484 ASSERT(folio_test_writeback(folio)); 485 folio_end_writeback(folio); 486 } 487 spin_unlock_irqrestore(&bfs->lock, flags); 488 } 489 490 void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, 491 struct folio *folio, u64 start, u32 len) 492 { 493 struct btrfs_folio_state *bfs = folio_get_private(folio); 494 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 495 ordered, start, len); 496 unsigned long flags; 497 498 spin_lock_irqsave(&bfs->lock, flags); 499 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 500 folio_set_ordered(folio); 501 spin_unlock_irqrestore(&bfs->lock, flags); 502 } 503 504 void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, 505 struct folio *folio, u64 start, u32 len) 506 { 507 struct btrfs_folio_state *bfs = folio_get_private(folio); 508 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 509 ordered, start, len); 510 unsigned long flags; 511 512 spin_lock_irqsave(&bfs->lock, flags); 513 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 514 if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) 515 folio_clear_ordered(folio); 516 spin_unlock_irqrestore(&bfs->lock, flags); 517 } 518 519 void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, 520 struct folio *folio, u64 start, u32 len) 521 { 522 struct btrfs_folio_state *bfs = folio_get_private(folio); 523 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 524 checked, start, len); 525 unsigned long flags; 526 527 spin_lock_irqsave(&bfs->lock, flags); 528 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 529 if (subpage_test_bitmap_all_set(fs_info, folio, checked)) 530 folio_set_checked(folio); 531 spin_unlock_irqrestore(&bfs->lock, flags); 532 } 533 534 void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, 535 struct folio *folio, u64 start, u32 len) 536 { 537 struct btrfs_folio_state *bfs = folio_get_private(folio); 538 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 539 checked, start, len); 540 unsigned long flags; 541 542 spin_lock_irqsave(&bfs->lock, flags); 543 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 544 folio_clear_checked(folio); 545 spin_unlock_irqrestore(&bfs->lock, flags); 546 } 547 548 /* 549 * Unlike set/clear which is dependent on each page status, for test all bits 550 * are tested in the same way. 551 */ 552 #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ 553 bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ 554 struct folio *folio, u64 start, u32 len) \ 555 { \ 556 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 557 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ 558 name, start, len); \ 559 unsigned long flags; \ 560 bool ret; \ 561 \ 562 spin_lock_irqsave(&bfs->lock, flags); \ 563 ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \ 564 len >> fs_info->sectorsize_bits); \ 565 spin_unlock_irqrestore(&bfs->lock, flags); \ 566 return ret; \ 567 } 568 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); 569 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); 570 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); 571 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); 572 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); 573 574 /* 575 * Note that, in selftests (extent-io-tests), we can have empty fs_info passed 576 * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall 577 * back to regular sectorsize branch. 578 */ 579 #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ 580 folio_clear_func, folio_test_func) \ 581 void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ 582 struct folio *folio, u64 start, u32 len) \ 583 { \ 584 if (unlikely(!fs_info) || \ 585 !btrfs_is_subpage(fs_info, folio)) { \ 586 folio_set_func(folio); \ 587 return; \ 588 } \ 589 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 590 } \ 591 void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ 592 struct folio *folio, u64 start, u32 len) \ 593 { \ 594 if (unlikely(!fs_info) || \ 595 !btrfs_is_subpage(fs_info, folio)) { \ 596 folio_clear_func(folio); \ 597 return; \ 598 } \ 599 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 600 } \ 601 bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ 602 struct folio *folio, u64 start, u32 len) \ 603 { \ 604 if (unlikely(!fs_info) || \ 605 !btrfs_is_subpage(fs_info, folio)) \ 606 return folio_test_func(folio); \ 607 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 608 } \ 609 void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ 610 struct folio *folio, u64 start, u32 len) \ 611 { \ 612 if (unlikely(!fs_info) || \ 613 !btrfs_is_subpage(fs_info, folio)) { \ 614 folio_set_func(folio); \ 615 return; \ 616 } \ 617 btrfs_subpage_clamp_range(folio, &start, &len); \ 618 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 619 } \ 620 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ 621 struct folio *folio, u64 start, u32 len) \ 622 { \ 623 if (unlikely(!fs_info) || \ 624 !btrfs_is_subpage(fs_info, folio)) { \ 625 folio_clear_func(folio); \ 626 return; \ 627 } \ 628 btrfs_subpage_clamp_range(folio, &start, &len); \ 629 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 630 } \ 631 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ 632 struct folio *folio, u64 start, u32 len) \ 633 { \ 634 if (unlikely(!fs_info) || \ 635 !btrfs_is_subpage(fs_info, folio)) \ 636 return folio_test_func(folio); \ 637 btrfs_subpage_clamp_range(folio, &start, &len); \ 638 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 639 } \ 640 void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \ 641 { \ 642 if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 643 folio_set_func(folio); \ 644 return; \ 645 } \ 646 btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \ 647 } \ 648 void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \ 649 { \ 650 if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 651 folio_clear_func(folio); \ 652 return; \ 653 } \ 654 btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \ 655 } \ 656 bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \ 657 { \ 658 if (!btrfs_meta_is_subpage(eb->fs_info)) \ 659 return folio_test_func(folio); \ 660 return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \ 661 } 662 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, 663 folio_test_uptodate); 664 IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, 665 folio_test_dirty); 666 IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, 667 folio_test_writeback); 668 IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, 669 folio_test_ordered); 670 IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, 671 folio_test_checked); 672 673 #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \ 674 { \ 675 const unsigned int blocks_per_folio = \ 676 btrfs_blocks_per_folio(fs_info, folio); \ 677 const struct btrfs_folio_state *bfs = folio_get_private(folio); \ 678 \ 679 ASSERT(blocks_per_folio <= BITS_PER_LONG); \ 680 *dst = bitmap_read(bfs->bitmaps, \ 681 blocks_per_folio * btrfs_bitmap_nr_##name, \ 682 blocks_per_folio); \ 683 } 684 685 #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ 686 { \ 687 unsigned long bitmap; \ 688 const unsigned int blocks_per_folio = \ 689 btrfs_blocks_per_folio(fs_info, folio); \ 690 \ 691 GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ 692 btrfs_warn(fs_info, \ 693 "dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 694 start, len, folio_pos(folio), \ 695 blocks_per_folio, &bitmap); \ 696 } 697 698 /* 699 * Make sure not only the page dirty bit is cleared, but also subpage dirty bit 700 * is cleared. 701 */ 702 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, 703 struct folio *folio, u64 start, u32 len) 704 { 705 struct btrfs_folio_state *bfs; 706 unsigned int start_bit; 707 unsigned int nbits; 708 unsigned long flags; 709 710 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) 711 return; 712 713 if (!btrfs_is_subpage(fs_info, folio)) { 714 ASSERT(!folio_test_dirty(folio)); 715 return; 716 } 717 718 start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); 719 nbits = len >> fs_info->sectorsize_bits; 720 bfs = folio_get_private(folio); 721 ASSERT(bfs); 722 spin_lock_irqsave(&bfs->lock, flags); 723 if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { 724 SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); 725 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 726 } 727 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 728 spin_unlock_irqrestore(&bfs->lock, flags); 729 } 730 731 /* 732 * This is for folio already locked by plain lock_page()/folio_lock(), which 733 * doesn't have any subpage awareness. 734 * 735 * This populates the involved subpage ranges so that subpage helpers can 736 * properly unlock them. 737 */ 738 void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, 739 struct folio *folio, u64 start, u32 len) 740 { 741 struct btrfs_folio_state *bfs; 742 unsigned long flags; 743 unsigned int start_bit; 744 unsigned int nbits; 745 int ret; 746 747 ASSERT(folio_test_locked(folio)); 748 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) 749 return; 750 751 bfs = folio_get_private(folio); 752 start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 753 nbits = len >> fs_info->sectorsize_bits; 754 spin_lock_irqsave(&bfs->lock, flags); 755 /* Target range should not yet be locked. */ 756 if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { 757 SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); 758 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 759 } 760 bitmap_set(bfs->bitmaps, start_bit, nbits); 761 ret = atomic_add_return(nbits, &bfs->nr_locked); 762 ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); 763 spin_unlock_irqrestore(&bfs->lock, flags); 764 } 765 766 /* 767 * Clear the dirty flag for the folio. 768 * 769 * If the affected folio is no longer dirty, return true. Otherwise return false. 770 */ 771 bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb) 772 { 773 bool last; 774 775 if (!btrfs_meta_is_subpage(eb->fs_info)) { 776 folio_clear_dirty_for_io(folio); 777 return true; 778 } 779 780 last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len); 781 if (last) { 782 folio_clear_dirty_for_io(folio); 783 return true; 784 } 785 return false; 786 } 787 788 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, 789 struct folio *folio, u64 start, u32 len) 790 { 791 struct btrfs_folio_state *bfs; 792 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 793 unsigned long uptodate_bitmap; 794 unsigned long dirty_bitmap; 795 unsigned long writeback_bitmap; 796 unsigned long ordered_bitmap; 797 unsigned long checked_bitmap; 798 unsigned long locked_bitmap; 799 unsigned long flags; 800 801 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 802 ASSERT(blocks_per_folio > 1); 803 bfs = folio_get_private(folio); 804 805 spin_lock_irqsave(&bfs->lock, flags); 806 GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); 807 GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); 808 GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); 809 GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); 810 GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); 811 GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); 812 spin_unlock_irqrestore(&bfs->lock, flags); 813 814 dump_page(folio_page(folio, 0), "btrfs folio state dump"); 815 btrfs_warn(fs_info, 816 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", 817 start, len, folio_pos(folio), 818 blocks_per_folio, &uptodate_bitmap, 819 blocks_per_folio, &dirty_bitmap, 820 blocks_per_folio, &locked_bitmap, 821 blocks_per_folio, &writeback_bitmap, 822 blocks_per_folio, &ordered_bitmap, 823 blocks_per_folio, &checked_bitmap); 824 } 825 826 void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, 827 struct folio *folio, 828 unsigned long *ret_bitmap) 829 { 830 struct btrfs_folio_state *bfs; 831 unsigned long flags; 832 833 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 834 ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); 835 bfs = folio_get_private(folio); 836 837 spin_lock_irqsave(&bfs->lock, flags); 838 GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); 839 spin_unlock_irqrestore(&bfs->lock, flags); 840 } 841