1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include "messages.h" 5 #include "subpage.h" 6 #include "btrfs_inode.h" 7 8 /* 9 * Subpage (block size < folio size) support overview: 10 * 11 * Limitations: 12 * 13 * - Only support 64K page size for now 14 * This is to make metadata handling easier, as 64K page would ensure 15 * all nodesize would fit inside one page, thus we don't need to handle 16 * cases where a tree block crosses several pages. 17 * 18 * - Only metadata read-write for now 19 * The data read-write part is in development. 20 * 21 * - Metadata can't cross 64K page boundary 22 * btrfs-progs and kernel have done that for a while, thus only ancient 23 * filesystems could have such problem. For such case, do a graceful 24 * rejection. 25 * 26 * Special behavior: 27 * 28 * - Metadata 29 * Metadata read is fully supported. 30 * Meaning when reading one tree block will only trigger the read for the 31 * needed range, other unrelated range in the same page will not be touched. 32 * 33 * Metadata write support is partial. 34 * The writeback is still for the full page, but we will only submit 35 * the dirty extent buffers in the page. 36 * 37 * This means, if we have a metadata page like this: 38 * 39 * Page offset 40 * 0 16K 32K 48K 64K 41 * |/////////| |///////////| 42 * \- Tree block A \- Tree block B 43 * 44 * Even if we just want to writeback tree block A, we will also writeback 45 * tree block B if it's also dirty. 46 * 47 * This may cause extra metadata writeback which results more COW. 48 * 49 * Implementation: 50 * 51 * - Common 52 * Both metadata and data will use a new structure, btrfs_folio_state, to 53 * record the status of each sector inside a page. This provides the extra 54 * granularity needed. 55 * 56 * - Metadata 57 * Since we have multiple tree blocks inside one page, we can't rely on page 58 * locking anymore, or we will have greatly reduced concurrency or even 59 * deadlocks (hold one tree lock while trying to lock another tree lock in 60 * the same page). 61 * 62 * Thus for metadata locking, subpage support relies on io_tree locking only. 63 * This means a slightly higher tree locking latency. 64 */ 65 66 int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, 67 struct folio *folio, enum btrfs_folio_type type) 68 { 69 struct btrfs_folio_state *bfs; 70 71 /* For metadata we don't support large folio yet. */ 72 if (type == BTRFS_SUBPAGE_METADATA) 73 ASSERT(!folio_test_large(folio)); 74 75 /* 76 * We have cases like a dummy extent buffer page, which is not mapped 77 * and doesn't need to be locked. 78 */ 79 if (folio->mapping) 80 ASSERT(folio_test_locked(folio)); 81 82 /* Either not subpage, or the folio already has private attached. */ 83 if (folio_test_private(folio)) 84 return 0; 85 if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 86 return 0; 87 if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 88 return 0; 89 90 bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type); 91 if (IS_ERR(bfs)) 92 return PTR_ERR(bfs); 93 94 folio_attach_private(folio, bfs); 95 return 0; 96 } 97 98 void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, 99 enum btrfs_folio_type type) 100 { 101 struct btrfs_folio_state *bfs; 102 103 /* Either not subpage, or the folio already has private attached. */ 104 if (!folio_test_private(folio)) 105 return; 106 if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 107 return; 108 if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 109 return; 110 111 bfs = folio_detach_private(folio); 112 ASSERT(bfs); 113 btrfs_free_folio_state(bfs); 114 } 115 116 struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, 117 size_t fsize, enum btrfs_folio_type type) 118 { 119 struct btrfs_folio_state *ret; 120 unsigned int real_size; 121 122 ASSERT(fs_info->sectorsize < fsize); 123 124 real_size = struct_size(ret, bitmaps, 125 BITS_TO_LONGS(btrfs_bitmap_nr_max * 126 (fsize >> fs_info->sectorsize_bits))); 127 ret = kzalloc(real_size, GFP_NOFS); 128 if (!ret) 129 return ERR_PTR(-ENOMEM); 130 131 spin_lock_init(&ret->lock); 132 if (type == BTRFS_SUBPAGE_METADATA) 133 atomic_set(&ret->eb_refs, 0); 134 else 135 atomic_set(&ret->nr_locked, 0); 136 return ret; 137 } 138 139 /* 140 * Increase the eb_refs of current subpage. 141 * 142 * This is important for eb allocation, to prevent race with last eb freeing 143 * of the same page. 144 * With the eb_refs increased before the eb inserted into radix tree, 145 * detach_extent_buffer_page() won't detach the folio private while we're still 146 * allocating the extent buffer. 147 */ 148 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 149 { 150 struct btrfs_folio_state *bfs; 151 152 if (!btrfs_meta_is_subpage(fs_info)) 153 return; 154 155 ASSERT(folio_test_private(folio) && folio->mapping); 156 lockdep_assert_held(&folio->mapping->i_private_lock); 157 158 bfs = folio_get_private(folio); 159 atomic_inc(&bfs->eb_refs); 160 } 161 162 void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 163 { 164 struct btrfs_folio_state *bfs; 165 166 if (!btrfs_meta_is_subpage(fs_info)) 167 return; 168 169 ASSERT(folio_test_private(folio) && folio->mapping); 170 lockdep_assert_held(&folio->mapping->i_private_lock); 171 172 bfs = folio_get_private(folio); 173 ASSERT(atomic_read(&bfs->eb_refs)); 174 atomic_dec(&bfs->eb_refs); 175 } 176 177 static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, 178 struct folio *folio, u64 start, u32 len) 179 { 180 /* Basic checks */ 181 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 182 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 183 IS_ALIGNED(len, fs_info->sectorsize)); 184 /* 185 * The range check only works for mapped page, we can still have 186 * unmapped page like dummy extent buffer pages. 187 */ 188 if (folio->mapping) 189 ASSERT(folio_pos(folio) <= start && 190 start + len <= folio_next_pos(folio), 191 "start=%llu len=%u folio_pos=%llu folio_size=%zu", 192 start, len, folio_pos(folio), folio_size(folio)); 193 } 194 195 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 196 ({ \ 197 unsigned int __start_bit; \ 198 const unsigned int blocks_per_folio = \ 199 btrfs_blocks_per_folio(fs_info, folio); \ 200 \ 201 btrfs_subpage_assert(fs_info, folio, start, len); \ 202 __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ 203 __start_bit += blocks_per_folio * btrfs_bitmap_nr_##name; \ 204 __start_bit; \ 205 }) 206 207 static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) 208 { 209 u64 orig_start = *start; 210 u32 orig_len = *len; 211 212 *start = max_t(u64, folio_pos(folio), orig_start); 213 /* 214 * For certain call sites like btrfs_drop_pages(), we may have pages 215 * beyond the target range. In that case, just set @len to 0, subpage 216 * helpers can handle @len == 0 without any problem. 217 */ 218 if (folio_pos(folio) >= orig_start + orig_len) 219 *len = 0; 220 else 221 *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start; 222 } 223 224 static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, 225 struct folio *folio, u64 start, u32 len) 226 { 227 struct btrfs_folio_state *bfs = folio_get_private(folio); 228 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 229 const int nbits = (len >> fs_info->sectorsize_bits); 230 unsigned long flags; 231 unsigned int cleared = 0; 232 int bit = start_bit; 233 bool last; 234 235 btrfs_subpage_assert(fs_info, folio, start, len); 236 237 spin_lock_irqsave(&bfs->lock, flags); 238 /* 239 * We have call sites passing @lock_page into 240 * extent_clear_unlock_delalloc() for compression path. 241 * 242 * This @locked_page is locked by plain lock_page(), thus its 243 * subpage::locked is 0. Handle them in a special way. 244 */ 245 if (atomic_read(&bfs->nr_locked) == 0) { 246 spin_unlock_irqrestore(&bfs->lock, flags); 247 return true; 248 } 249 250 for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) { 251 clear_bit(bit, bfs->bitmaps); 252 cleared++; 253 } 254 ASSERT(atomic_read(&bfs->nr_locked) >= cleared); 255 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 256 spin_unlock_irqrestore(&bfs->lock, flags); 257 return last; 258 } 259 260 /* 261 * Handle different locked folios: 262 * 263 * - Non-subpage folio 264 * Just unlock it. 265 * 266 * - folio locked but without any subpage locked 267 * This happens either before writepage_delalloc() or the delalloc range is 268 * already handled by previous folio. 269 * We can simple unlock it. 270 * 271 * - folio locked with subpage range locked. 272 * We go through the locked sectors inside the range and clear their locked 273 * bitmap, reduce the writer lock number, and unlock the page if that's 274 * the last locked range. 275 */ 276 void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, 277 struct folio *folio, u64 start, u32 len) 278 { 279 struct btrfs_folio_state *bfs = folio_get_private(folio); 280 281 ASSERT(folio_test_locked(folio)); 282 283 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) { 284 folio_unlock(folio); 285 return; 286 } 287 288 /* 289 * For subpage case, there are two types of locked page. With or 290 * without locked number. 291 * 292 * Since we own the page lock, no one else could touch subpage::locked 293 * and we are safe to do several atomic operations without spinlock. 294 */ 295 if (atomic_read(&bfs->nr_locked) == 0) { 296 /* No subpage lock, locked by plain lock_page(). */ 297 folio_unlock(folio); 298 return; 299 } 300 301 btrfs_subpage_clamp_range(folio, &start, &len); 302 if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len)) 303 folio_unlock(folio); 304 } 305 306 void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, 307 struct folio *folio, unsigned long bitmap) 308 { 309 struct btrfs_folio_state *bfs = folio_get_private(folio); 310 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 311 const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; 312 unsigned long flags; 313 bool last = false; 314 int cleared = 0; 315 int bit; 316 317 if (!btrfs_is_subpage(fs_info, folio)) { 318 folio_unlock(folio); 319 return; 320 } 321 322 if (atomic_read(&bfs->nr_locked) == 0) { 323 /* No subpage lock, locked by plain lock_page(). */ 324 folio_unlock(folio); 325 return; 326 } 327 328 spin_lock_irqsave(&bfs->lock, flags); 329 for_each_set_bit(bit, &bitmap, blocks_per_folio) { 330 if (test_and_clear_bit(bit + start_bit, bfs->bitmaps)) 331 cleared++; 332 } 333 ASSERT(atomic_read(&bfs->nr_locked) >= cleared); 334 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 335 spin_unlock_irqrestore(&bfs->lock, flags); 336 if (last) 337 folio_unlock(folio); 338 } 339 340 #define subpage_test_bitmap_all_set(fs_info, folio, name) \ 341 ({ \ 342 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 343 const unsigned int blocks_per_folio = \ 344 btrfs_blocks_per_folio(fs_info, folio); \ 345 \ 346 bitmap_test_range_all_set(bfs->bitmaps, \ 347 blocks_per_folio * btrfs_bitmap_nr_##name, \ 348 blocks_per_folio); \ 349 }) 350 351 #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ 352 ({ \ 353 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 354 const unsigned int blocks_per_folio = \ 355 btrfs_blocks_per_folio(fs_info, folio); \ 356 \ 357 bitmap_test_range_all_zero(bfs->bitmaps, \ 358 blocks_per_folio * btrfs_bitmap_nr_##name, \ 359 blocks_per_folio); \ 360 }) 361 362 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, 363 struct folio *folio, u64 start, u32 len) 364 { 365 struct btrfs_folio_state *bfs = folio_get_private(folio); 366 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 367 uptodate, start, len); 368 unsigned long flags; 369 370 spin_lock_irqsave(&bfs->lock, flags); 371 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 372 if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) 373 folio_mark_uptodate(folio); 374 spin_unlock_irqrestore(&bfs->lock, flags); 375 } 376 377 void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, 378 struct folio *folio, u64 start, u32 len) 379 { 380 struct btrfs_folio_state *bfs = folio_get_private(folio); 381 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 382 uptodate, start, len); 383 unsigned long flags; 384 385 spin_lock_irqsave(&bfs->lock, flags); 386 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 387 folio_clear_uptodate(folio); 388 spin_unlock_irqrestore(&bfs->lock, flags); 389 } 390 391 void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, 392 struct folio *folio, u64 start, u32 len) 393 { 394 struct btrfs_folio_state *bfs = folio_get_private(folio); 395 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 396 dirty, start, len); 397 unsigned long flags; 398 399 spin_lock_irqsave(&bfs->lock, flags); 400 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 401 spin_unlock_irqrestore(&bfs->lock, flags); 402 folio_mark_dirty(folio); 403 } 404 405 /* 406 * Extra clear_and_test function for subpage dirty bitmap. 407 * 408 * Return true if we're the last bits in the dirty_bitmap and clear the 409 * dirty_bitmap. 410 * Return false otherwise. 411 * 412 * NOTE: Callers should manually clear page dirty for true case, as we have 413 * extra handling for tree blocks. 414 */ 415 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, 416 struct folio *folio, u64 start, u32 len) 417 { 418 struct btrfs_folio_state *bfs = folio_get_private(folio); 419 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 420 dirty, start, len); 421 unsigned long flags; 422 bool last = false; 423 424 spin_lock_irqsave(&bfs->lock, flags); 425 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 426 if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) 427 last = true; 428 spin_unlock_irqrestore(&bfs->lock, flags); 429 return last; 430 } 431 432 void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, 433 struct folio *folio, u64 start, u32 len) 434 { 435 bool last; 436 437 last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); 438 if (last) 439 folio_clear_dirty_for_io(folio); 440 } 441 442 void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, 443 struct folio *folio, u64 start, u32 len) 444 { 445 struct btrfs_folio_state *bfs = folio_get_private(folio); 446 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 447 writeback, start, len); 448 unsigned long flags; 449 450 spin_lock_irqsave(&bfs->lock, flags); 451 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 452 453 /* 454 * Don't clear the TOWRITE tag when starting writeback on a still-dirty 455 * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, 456 * assume writeback is complete, and exit too early — violating sync 457 * ordering guarantees. 458 */ 459 if (!folio_test_writeback(folio)) 460 __folio_start_writeback(folio, true); 461 if (!folio_test_dirty(folio)) { 462 struct address_space *mapping = folio_mapping(folio); 463 XA_STATE(xas, &mapping->i_pages, folio->index); 464 unsigned long flags; 465 466 xas_lock_irqsave(&xas, flags); 467 xas_load(&xas); 468 xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); 469 xas_unlock_irqrestore(&xas, flags); 470 } 471 spin_unlock_irqrestore(&bfs->lock, flags); 472 } 473 474 void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, 475 struct folio *folio, u64 start, u32 len) 476 { 477 struct btrfs_folio_state *bfs = folio_get_private(folio); 478 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 479 writeback, start, len); 480 unsigned long flags; 481 482 spin_lock_irqsave(&bfs->lock, flags); 483 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 484 if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { 485 ASSERT(folio_test_writeback(folio)); 486 folio_end_writeback(folio); 487 } 488 spin_unlock_irqrestore(&bfs->lock, flags); 489 } 490 491 void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, 492 struct folio *folio, u64 start, u32 len) 493 { 494 struct btrfs_folio_state *bfs = folio_get_private(folio); 495 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 496 ordered, start, len); 497 unsigned long flags; 498 499 spin_lock_irqsave(&bfs->lock, flags); 500 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 501 folio_set_ordered(folio); 502 spin_unlock_irqrestore(&bfs->lock, flags); 503 } 504 505 void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, 506 struct folio *folio, u64 start, u32 len) 507 { 508 struct btrfs_folio_state *bfs = folio_get_private(folio); 509 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 510 ordered, start, len); 511 unsigned long flags; 512 513 spin_lock_irqsave(&bfs->lock, flags); 514 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 515 if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) 516 folio_clear_ordered(folio); 517 spin_unlock_irqrestore(&bfs->lock, flags); 518 } 519 520 void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, 521 struct folio *folio, u64 start, u32 len) 522 { 523 struct btrfs_folio_state *bfs = folio_get_private(folio); 524 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 525 checked, start, len); 526 unsigned long flags; 527 528 spin_lock_irqsave(&bfs->lock, flags); 529 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 530 if (subpage_test_bitmap_all_set(fs_info, folio, checked)) 531 folio_set_checked(folio); 532 spin_unlock_irqrestore(&bfs->lock, flags); 533 } 534 535 void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, 536 struct folio *folio, u64 start, u32 len) 537 { 538 struct btrfs_folio_state *bfs = folio_get_private(folio); 539 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 540 checked, start, len); 541 unsigned long flags; 542 543 spin_lock_irqsave(&bfs->lock, flags); 544 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 545 folio_clear_checked(folio); 546 spin_unlock_irqrestore(&bfs->lock, flags); 547 } 548 549 /* 550 * Unlike set/clear which is dependent on each page status, for test all bits 551 * are tested in the same way. 552 */ 553 #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ 554 bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ 555 struct folio *folio, u64 start, u32 len) \ 556 { \ 557 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 558 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ 559 name, start, len); \ 560 unsigned long flags; \ 561 bool ret; \ 562 \ 563 spin_lock_irqsave(&bfs->lock, flags); \ 564 ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \ 565 len >> fs_info->sectorsize_bits); \ 566 spin_unlock_irqrestore(&bfs->lock, flags); \ 567 return ret; \ 568 } 569 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); 570 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); 571 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); 572 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); 573 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); 574 575 /* 576 * Note that, in selftests (extent-io-tests), we can have empty fs_info passed 577 * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall 578 * back to regular sectorsize branch. 579 */ 580 #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ 581 folio_clear_func, folio_test_func) \ 582 void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ 583 struct folio *folio, u64 start, u32 len) \ 584 { \ 585 if (unlikely(!fs_info) || \ 586 !btrfs_is_subpage(fs_info, folio)) { \ 587 folio_set_func(folio); \ 588 return; \ 589 } \ 590 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 591 } \ 592 void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ 593 struct folio *folio, u64 start, u32 len) \ 594 { \ 595 if (unlikely(!fs_info) || \ 596 !btrfs_is_subpage(fs_info, folio)) { \ 597 folio_clear_func(folio); \ 598 return; \ 599 } \ 600 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 601 } \ 602 bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ 603 struct folio *folio, u64 start, u32 len) \ 604 { \ 605 if (unlikely(!fs_info) || \ 606 !btrfs_is_subpage(fs_info, folio)) \ 607 return folio_test_func(folio); \ 608 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 609 } \ 610 void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ 611 struct folio *folio, u64 start, u32 len) \ 612 { \ 613 if (unlikely(!fs_info) || \ 614 !btrfs_is_subpage(fs_info, folio)) { \ 615 folio_set_func(folio); \ 616 return; \ 617 } \ 618 btrfs_subpage_clamp_range(folio, &start, &len); \ 619 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 620 } \ 621 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ 622 struct folio *folio, u64 start, u32 len) \ 623 { \ 624 if (unlikely(!fs_info) || \ 625 !btrfs_is_subpage(fs_info, folio)) { \ 626 folio_clear_func(folio); \ 627 return; \ 628 } \ 629 btrfs_subpage_clamp_range(folio, &start, &len); \ 630 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 631 } \ 632 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ 633 struct folio *folio, u64 start, u32 len) \ 634 { \ 635 if (unlikely(!fs_info) || \ 636 !btrfs_is_subpage(fs_info, folio)) \ 637 return folio_test_func(folio); \ 638 btrfs_subpage_clamp_range(folio, &start, &len); \ 639 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 640 } \ 641 void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \ 642 { \ 643 if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 644 folio_set_func(folio); \ 645 return; \ 646 } \ 647 btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \ 648 } \ 649 void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \ 650 { \ 651 if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 652 folio_clear_func(folio); \ 653 return; \ 654 } \ 655 btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \ 656 } \ 657 bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \ 658 { \ 659 if (!btrfs_meta_is_subpage(eb->fs_info)) \ 660 return folio_test_func(folio); \ 661 return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \ 662 } 663 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, 664 folio_test_uptodate); 665 IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, 666 folio_test_dirty); 667 IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, 668 folio_test_writeback); 669 IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, 670 folio_test_ordered); 671 IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, 672 folio_test_checked); 673 674 #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \ 675 { \ 676 const unsigned int blocks_per_folio = \ 677 btrfs_blocks_per_folio(fs_info, folio); \ 678 const struct btrfs_folio_state *bfs = folio_get_private(folio); \ 679 \ 680 ASSERT(blocks_per_folio <= BITS_PER_LONG); \ 681 *dst = bitmap_read(bfs->bitmaps, \ 682 blocks_per_folio * btrfs_bitmap_nr_##name, \ 683 blocks_per_folio); \ 684 } 685 686 #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ 687 { \ 688 unsigned long bitmap; \ 689 const unsigned int blocks_per_folio = \ 690 btrfs_blocks_per_folio(fs_info, folio); \ 691 \ 692 GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ 693 btrfs_warn(fs_info, \ 694 "dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 695 start, len, folio_pos(folio), \ 696 blocks_per_folio, &bitmap); \ 697 } 698 699 /* 700 * Make sure not only the page dirty bit is cleared, but also subpage dirty bit 701 * is cleared. 702 */ 703 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, 704 struct folio *folio, u64 start, u32 len) 705 { 706 struct btrfs_folio_state *bfs; 707 unsigned int start_bit; 708 unsigned int nbits; 709 unsigned long flags; 710 711 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) 712 return; 713 714 if (!btrfs_is_subpage(fs_info, folio)) { 715 ASSERT(!folio_test_dirty(folio)); 716 return; 717 } 718 719 start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); 720 nbits = len >> fs_info->sectorsize_bits; 721 bfs = folio_get_private(folio); 722 ASSERT(bfs); 723 spin_lock_irqsave(&bfs->lock, flags); 724 if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { 725 SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); 726 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 727 } 728 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 729 spin_unlock_irqrestore(&bfs->lock, flags); 730 } 731 732 /* 733 * This is for folio already locked by plain lock_page()/folio_lock(), which 734 * doesn't have any subpage awareness. 735 * 736 * This populates the involved subpage ranges so that subpage helpers can 737 * properly unlock them. 738 */ 739 void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, 740 struct folio *folio, u64 start, u32 len) 741 { 742 struct btrfs_folio_state *bfs; 743 unsigned long flags; 744 unsigned int start_bit; 745 unsigned int nbits; 746 int ret; 747 748 ASSERT(folio_test_locked(folio)); 749 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) 750 return; 751 752 bfs = folio_get_private(folio); 753 start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 754 nbits = len >> fs_info->sectorsize_bits; 755 spin_lock_irqsave(&bfs->lock, flags); 756 /* Target range should not yet be locked. */ 757 if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { 758 SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); 759 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 760 } 761 bitmap_set(bfs->bitmaps, start_bit, nbits); 762 ret = atomic_add_return(nbits, &bfs->nr_locked); 763 ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); 764 spin_unlock_irqrestore(&bfs->lock, flags); 765 } 766 767 /* 768 * Clear the dirty flag for the folio. 769 * 770 * If the affected folio is no longer dirty, return true. Otherwise return false. 771 */ 772 bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb) 773 { 774 bool last; 775 776 if (!btrfs_meta_is_subpage(eb->fs_info)) { 777 folio_clear_dirty_for_io(folio); 778 return true; 779 } 780 781 last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len); 782 if (last) { 783 folio_clear_dirty_for_io(folio); 784 return true; 785 } 786 return false; 787 } 788 789 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, 790 struct folio *folio, u64 start, u32 len) 791 { 792 struct btrfs_folio_state *bfs; 793 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 794 unsigned long uptodate_bitmap; 795 unsigned long dirty_bitmap; 796 unsigned long writeback_bitmap; 797 unsigned long ordered_bitmap; 798 unsigned long checked_bitmap; 799 unsigned long locked_bitmap; 800 unsigned long flags; 801 802 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 803 ASSERT(blocks_per_folio > 1); 804 bfs = folio_get_private(folio); 805 806 spin_lock_irqsave(&bfs->lock, flags); 807 GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); 808 GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); 809 GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); 810 GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); 811 GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); 812 GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); 813 spin_unlock_irqrestore(&bfs->lock, flags); 814 815 dump_page(folio_page(folio, 0), "btrfs folio state dump"); 816 btrfs_warn(fs_info, 817 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", 818 start, len, folio_pos(folio), 819 blocks_per_folio, &uptodate_bitmap, 820 blocks_per_folio, &dirty_bitmap, 821 blocks_per_folio, &locked_bitmap, 822 blocks_per_folio, &writeback_bitmap, 823 blocks_per_folio, &ordered_bitmap, 824 blocks_per_folio, &checked_bitmap); 825 } 826 827 void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, 828 struct folio *folio, 829 unsigned long *ret_bitmap) 830 { 831 struct btrfs_folio_state *bfs; 832 unsigned long flags; 833 834 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 835 ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); 836 bfs = folio_get_private(folio); 837 838 spin_lock_irqsave(&bfs->lock, flags); 839 GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); 840 spin_unlock_irqrestore(&bfs->lock, flags); 841 } 842