1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include "messages.h" 5 #include "subpage.h" 6 #include "btrfs_inode.h" 7 8 /* 9 * Subpage (block size < folio size) support overview: 10 * 11 * Limitations: 12 * 13 * - Only support 64K page size for now 14 * This is to make metadata handling easier, as 64K page would ensure 15 * all nodesize would fit inside one page, thus we don't need to handle 16 * cases where a tree block crosses several pages. 17 * 18 * - Only metadata read-write for now 19 * The data read-write part is in development. 20 * 21 * - Metadata can't cross 64K page boundary 22 * btrfs-progs and kernel have done that for a while, thus only ancient 23 * filesystems could have such problem. For such case, do a graceful 24 * rejection. 25 * 26 * Special behavior: 27 * 28 * - Metadata 29 * Metadata read is fully supported. 30 * Meaning when reading one tree block will only trigger the read for the 31 * needed range, other unrelated range in the same page will not be touched. 32 * 33 * Metadata write support is partial. 34 * The writeback is still for the full page, but we will only submit 35 * the dirty extent buffers in the page. 36 * 37 * This means, if we have a metadata page like this: 38 * 39 * Page offset 40 * 0 16K 32K 48K 64K 41 * |/////////| |///////////| 42 * \- Tree block A \- Tree block B 43 * 44 * Even if we just want to writeback tree block A, we will also writeback 45 * tree block B if it's also dirty. 46 * 47 * This may cause extra metadata writeback which results more COW. 48 * 49 * Implementation: 50 * 51 * - Common 52 * Both metadata and data will use a new structure, btrfs_folio_state, to 53 * record the status of each sector inside a page. This provides the extra 54 * granularity needed. 55 * 56 * - Metadata 57 * Since we have multiple tree blocks inside one page, we can't rely on page 58 * locking anymore, or we will have greatly reduced concurrency or even 59 * deadlocks (hold one tree lock while trying to lock another tree lock in 60 * the same page). 61 * 62 * Thus for metadata locking, subpage support relies on io_tree locking only. 63 * This means a slightly higher tree locking latency. 64 */ 65 66 int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info, 67 struct folio *folio, enum btrfs_folio_type type) 68 { 69 struct btrfs_folio_state *bfs; 70 71 /* For metadata we don't support large folio yet. */ 72 if (type == BTRFS_SUBPAGE_METADATA) 73 ASSERT(!folio_test_large(folio)); 74 75 /* 76 * We have cases like a dummy extent buffer page, which is not mapped 77 * and doesn't need to be locked. 78 */ 79 if (folio->mapping) 80 ASSERT(folio_test_locked(folio)); 81 82 /* Either not subpage, or the folio already has private attached. */ 83 if (folio_test_private(folio)) 84 return 0; 85 if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 86 return 0; 87 if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 88 return 0; 89 90 bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type); 91 if (IS_ERR(bfs)) 92 return PTR_ERR(bfs); 93 94 folio_attach_private(folio, bfs); 95 return 0; 96 } 97 98 void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio, 99 enum btrfs_folio_type type) 100 { 101 struct btrfs_folio_state *bfs; 102 103 /* Either not subpage, or the folio already has private attached. */ 104 if (!folio_test_private(folio)) 105 return; 106 if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info)) 107 return; 108 if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio)) 109 return; 110 111 bfs = folio_detach_private(folio); 112 ASSERT(bfs); 113 btrfs_free_folio_state(bfs); 114 } 115 116 struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info, 117 size_t fsize, enum btrfs_folio_type type) 118 { 119 struct btrfs_folio_state *ret; 120 unsigned int real_size; 121 122 ASSERT(fs_info->sectorsize < fsize); 123 124 real_size = struct_size(ret, bitmaps, 125 BITS_TO_LONGS(btrfs_bitmap_nr_max * 126 (fsize >> fs_info->sectorsize_bits))); 127 ret = kzalloc(real_size, GFP_NOFS); 128 if (!ret) 129 return ERR_PTR(-ENOMEM); 130 131 spin_lock_init(&ret->lock); 132 if (type == BTRFS_SUBPAGE_METADATA) 133 atomic_set(&ret->eb_refs, 0); 134 else 135 atomic_set(&ret->nr_locked, 0); 136 return ret; 137 } 138 139 /* 140 * Increase the eb_refs of current subpage. 141 * 142 * This is important for eb allocation, to prevent race with last eb freeing 143 * of the same page. 144 * With the eb_refs increased before the eb inserted into radix tree, 145 * detach_extent_buffer_page() won't detach the folio private while we're still 146 * allocating the extent buffer. 147 */ 148 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 149 { 150 struct btrfs_folio_state *bfs; 151 152 if (!btrfs_meta_is_subpage(fs_info)) 153 return; 154 155 ASSERT(folio_test_private(folio) && folio->mapping); 156 lockdep_assert_held(&folio->mapping->i_private_lock); 157 158 bfs = folio_get_private(folio); 159 atomic_inc(&bfs->eb_refs); 160 } 161 162 void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 163 { 164 struct btrfs_folio_state *bfs; 165 166 if (!btrfs_meta_is_subpage(fs_info)) 167 return; 168 169 ASSERT(folio_test_private(folio) && folio->mapping); 170 lockdep_assert_held(&folio->mapping->i_private_lock); 171 172 bfs = folio_get_private(folio); 173 ASSERT(atomic_read(&bfs->eb_refs)); 174 atomic_dec(&bfs->eb_refs); 175 } 176 177 static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, 178 struct folio *folio, u64 start, u32 len) 179 { 180 /* Basic checks */ 181 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 182 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 183 IS_ALIGNED(len, fs_info->sectorsize), "start=%llu len=%u", start, len); 184 /* 185 * The range check only works for mapped page, we can still have 186 * unmapped page like dummy extent buffer pages. 187 */ 188 if (folio->mapping) 189 ASSERT(folio_pos(folio) <= start && 190 start + len <= folio_next_pos(folio), 191 "start=%llu len=%u folio_pos=%llu folio_size=%zu", 192 start, len, folio_pos(folio), folio_size(folio)); 193 } 194 195 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 196 ({ \ 197 unsigned int __start_bit; \ 198 const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 199 \ 200 btrfs_subpage_assert(fs_info, folio, start, len); \ 201 __start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \ 202 __start_bit += __bpf * btrfs_bitmap_nr_##name; \ 203 __start_bit; \ 204 }) 205 206 static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) 207 { 208 u64 orig_start = *start; 209 u32 orig_len = *len; 210 211 *start = max_t(u64, folio_pos(folio), orig_start); 212 /* 213 * For certain call sites like btrfs_drop_pages(), we may have pages 214 * beyond the target range. In that case, just set @len to 0, subpage 215 * helpers can handle @len == 0 without any problem. 216 */ 217 if (folio_pos(folio) >= orig_start + orig_len) 218 *len = 0; 219 else 220 *len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start; 221 } 222 223 static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info, 224 struct folio *folio, u64 start, u32 len) 225 { 226 struct btrfs_folio_state *bfs = folio_get_private(folio); 227 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 228 const int nbits = (len >> fs_info->sectorsize_bits); 229 unsigned long flags; 230 unsigned int cleared = 0; 231 int bit = start_bit; 232 bool last; 233 234 btrfs_subpage_assert(fs_info, folio, start, len); 235 236 spin_lock_irqsave(&bfs->lock, flags); 237 /* 238 * We have call sites passing @lock_page into 239 * extent_clear_unlock_delalloc() for compression path. 240 * 241 * This @locked_page is locked by plain lock_page(), thus its 242 * subpage::locked is 0. Handle them in a special way. 243 */ 244 if (atomic_read(&bfs->nr_locked) == 0) { 245 spin_unlock_irqrestore(&bfs->lock, flags); 246 return true; 247 } 248 249 for_each_set_bit_from(bit, bfs->bitmaps, start_bit + nbits) { 250 clear_bit(bit, bfs->bitmaps); 251 cleared++; 252 } 253 ASSERT(atomic_read(&bfs->nr_locked) >= cleared, 254 "atomic_read(&bfs->nr_locked)=%d cleared=%d", 255 atomic_read(&bfs->nr_locked), cleared); 256 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 257 spin_unlock_irqrestore(&bfs->lock, flags); 258 return last; 259 } 260 261 /* 262 * Handle different locked folios: 263 * 264 * - Non-subpage folio 265 * Just unlock it. 266 * 267 * - folio locked but without any subpage locked 268 * This happens either before writepage_delalloc() or the delalloc range is 269 * already handled by previous folio. 270 * We can simple unlock it. 271 * 272 * - folio locked with subpage range locked. 273 * We go through the locked sectors inside the range and clear their locked 274 * bitmap, reduce the writer lock number, and unlock the page if that's 275 * the last locked range. 276 */ 277 void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info, 278 struct folio *folio, u64 start, u32 len) 279 { 280 struct btrfs_folio_state *bfs = folio_get_private(folio); 281 282 ASSERT(folio_test_locked(folio)); 283 284 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) { 285 folio_unlock(folio); 286 return; 287 } 288 289 /* 290 * For subpage case, there are two types of locked page. With or 291 * without locked number. 292 * 293 * Since we own the page lock, no one else could touch subpage::locked 294 * and we are safe to do several atomic operations without spinlock. 295 */ 296 if (atomic_read(&bfs->nr_locked) == 0) { 297 /* No subpage lock, locked by plain lock_page(). */ 298 folio_unlock(folio); 299 return; 300 } 301 302 btrfs_subpage_clamp_range(folio, &start, &len); 303 if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len)) 304 folio_unlock(folio); 305 } 306 307 void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info, 308 struct folio *folio, unsigned long bitmap) 309 { 310 struct btrfs_folio_state *bfs = folio_get_private(folio); 311 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 312 const int start_bit = blocks_per_folio * btrfs_bitmap_nr_locked; 313 unsigned long flags; 314 bool last = false; 315 int cleared = 0; 316 int bit; 317 318 if (!btrfs_is_subpage(fs_info, folio)) { 319 folio_unlock(folio); 320 return; 321 } 322 323 if (atomic_read(&bfs->nr_locked) == 0) { 324 /* No subpage lock, locked by plain lock_page(). */ 325 folio_unlock(folio); 326 return; 327 } 328 329 spin_lock_irqsave(&bfs->lock, flags); 330 for_each_set_bit(bit, &bitmap, blocks_per_folio) { 331 if (test_and_clear_bit(bit + start_bit, bfs->bitmaps)) 332 cleared++; 333 } 334 ASSERT(atomic_read(&bfs->nr_locked) >= cleared, 335 "atomic_read(&bfs->nr_locked)=%d cleared=%d", 336 atomic_read(&bfs->nr_locked), cleared); 337 last = atomic_sub_and_test(cleared, &bfs->nr_locked); 338 spin_unlock_irqrestore(&bfs->lock, flags); 339 if (last) 340 folio_unlock(folio); 341 } 342 343 #define subpage_test_bitmap_all_set(fs_info, folio, name) \ 344 ({ \ 345 struct btrfs_folio_state *__bfs = folio_get_private(folio); \ 346 const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 347 \ 348 bitmap_test_range_all_set(__bfs->bitmaps, \ 349 __bpf * btrfs_bitmap_nr_##name, __bpf); \ 350 }) 351 352 #define subpage_test_bitmap_all_zero(fs_info, folio, name) \ 353 ({ \ 354 struct btrfs_folio_state *__bfs = folio_get_private(folio); \ 355 const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 356 \ 357 bitmap_test_range_all_zero(__bfs->bitmaps, \ 358 __bpf * btrfs_bitmap_nr_##name, __bpf); \ 359 }) 360 361 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, 362 struct folio *folio, u64 start, u32 len) 363 { 364 struct btrfs_folio_state *bfs = folio_get_private(folio); 365 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 366 uptodate, start, len); 367 unsigned long flags; 368 369 spin_lock_irqsave(&bfs->lock, flags); 370 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 371 if (subpage_test_bitmap_all_set(fs_info, folio, uptodate)) 372 folio_mark_uptodate(folio); 373 spin_unlock_irqrestore(&bfs->lock, flags); 374 } 375 376 void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, 377 struct folio *folio, u64 start, u32 len) 378 { 379 struct btrfs_folio_state *bfs = folio_get_private(folio); 380 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 381 uptodate, start, len); 382 unsigned long flags; 383 384 spin_lock_irqsave(&bfs->lock, flags); 385 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 386 folio_clear_uptodate(folio); 387 spin_unlock_irqrestore(&bfs->lock, flags); 388 } 389 390 void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, 391 struct folio *folio, u64 start, u32 len) 392 { 393 struct btrfs_folio_state *bfs = folio_get_private(folio); 394 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 395 dirty, start, len); 396 unsigned long flags; 397 398 spin_lock_irqsave(&bfs->lock, flags); 399 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 400 spin_unlock_irqrestore(&bfs->lock, flags); 401 folio_mark_dirty(folio); 402 } 403 404 /* 405 * Extra clear_and_test function for subpage dirty bitmap. 406 * 407 * Return true if we're the last bits in the dirty_bitmap and clear the 408 * dirty_bitmap. 409 * Return false otherwise. 410 * 411 * NOTE: Callers should manually clear page dirty for true case, as we have 412 * extra handling for tree blocks. 413 */ 414 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, 415 struct folio *folio, u64 start, u32 len) 416 { 417 struct btrfs_folio_state *bfs = folio_get_private(folio); 418 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 419 dirty, start, len); 420 unsigned long flags; 421 bool last = false; 422 423 spin_lock_irqsave(&bfs->lock, flags); 424 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 425 if (subpage_test_bitmap_all_zero(fs_info, folio, dirty)) 426 last = true; 427 spin_unlock_irqrestore(&bfs->lock, flags); 428 return last; 429 } 430 431 void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, 432 struct folio *folio, u64 start, u32 len) 433 { 434 bool last; 435 436 last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); 437 if (last) 438 folio_clear_dirty_for_io(folio); 439 } 440 441 void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, 442 struct folio *folio, u64 start, u32 len) 443 { 444 struct btrfs_folio_state *bfs = folio_get_private(folio); 445 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 446 writeback, start, len); 447 unsigned long flags; 448 bool keep_write; 449 450 spin_lock_irqsave(&bfs->lock, flags); 451 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 452 453 /* 454 * Don't clear the TOWRITE tag when starting writeback on a still-dirty 455 * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, 456 * assume writeback is complete, and exit too early — violating sync 457 * ordering guarantees. 458 */ 459 keep_write = folio_test_dirty(folio); 460 if (!folio_test_writeback(folio)) 461 __folio_start_writeback(folio, keep_write); 462 spin_unlock_irqrestore(&bfs->lock, flags); 463 } 464 465 void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, 466 struct folio *folio, u64 start, u32 len) 467 { 468 struct btrfs_folio_state *bfs = folio_get_private(folio); 469 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 470 writeback, start, len); 471 unsigned long flags; 472 473 spin_lock_irqsave(&bfs->lock, flags); 474 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 475 if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) { 476 ASSERT(folio_test_writeback(folio)); 477 folio_end_writeback(folio); 478 } 479 spin_unlock_irqrestore(&bfs->lock, flags); 480 } 481 482 void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, 483 struct folio *folio, u64 start, u32 len) 484 { 485 struct btrfs_folio_state *bfs = folio_get_private(folio); 486 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 487 ordered, start, len); 488 unsigned long flags; 489 490 spin_lock_irqsave(&bfs->lock, flags); 491 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 492 folio_set_ordered(folio); 493 spin_unlock_irqrestore(&bfs->lock, flags); 494 } 495 496 void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, 497 struct folio *folio, u64 start, u32 len) 498 { 499 struct btrfs_folio_state *bfs = folio_get_private(folio); 500 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 501 ordered, start, len); 502 unsigned long flags; 503 504 spin_lock_irqsave(&bfs->lock, flags); 505 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 506 if (subpage_test_bitmap_all_zero(fs_info, folio, ordered)) 507 folio_clear_ordered(folio); 508 spin_unlock_irqrestore(&bfs->lock, flags); 509 } 510 511 void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, 512 struct folio *folio, u64 start, u32 len) 513 { 514 struct btrfs_folio_state *bfs = folio_get_private(folio); 515 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 516 checked, start, len); 517 unsigned long flags; 518 519 spin_lock_irqsave(&bfs->lock, flags); 520 bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 521 if (subpage_test_bitmap_all_set(fs_info, folio, checked)) 522 folio_set_checked(folio); 523 spin_unlock_irqrestore(&bfs->lock, flags); 524 } 525 526 void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, 527 struct folio *folio, u64 start, u32 len) 528 { 529 struct btrfs_folio_state *bfs = folio_get_private(folio); 530 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 531 checked, start, len); 532 unsigned long flags; 533 534 spin_lock_irqsave(&bfs->lock, flags); 535 bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 536 folio_clear_checked(folio); 537 spin_unlock_irqrestore(&bfs->lock, flags); 538 } 539 540 /* 541 * Unlike set/clear which is dependent on each page status, for test all bits 542 * are tested in the same way. 543 */ 544 #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ 545 bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ 546 struct folio *folio, u64 start, u32 len) \ 547 { \ 548 struct btrfs_folio_state *bfs = folio_get_private(folio); \ 549 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ 550 name, start, len); \ 551 unsigned long flags; \ 552 bool ret; \ 553 \ 554 spin_lock_irqsave(&bfs->lock, flags); \ 555 ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit, \ 556 len >> fs_info->sectorsize_bits); \ 557 spin_unlock_irqrestore(&bfs->lock, flags); \ 558 return ret; \ 559 } 560 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); 561 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); 562 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); 563 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); 564 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); 565 566 /* 567 * Note that, in selftests (extent-io-tests), we can have empty fs_info passed 568 * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall 569 * back to regular sectorsize branch. 570 */ 571 #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ 572 folio_clear_func, folio_test_func) \ 573 void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ 574 struct folio *folio, u64 start, u32 len) \ 575 { \ 576 if (unlikely(!fs_info) || \ 577 !btrfs_is_subpage(fs_info, folio)) { \ 578 folio_set_func(folio); \ 579 return; \ 580 } \ 581 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 582 } \ 583 void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ 584 struct folio *folio, u64 start, u32 len) \ 585 { \ 586 if (unlikely(!fs_info) || \ 587 !btrfs_is_subpage(fs_info, folio)) { \ 588 folio_clear_func(folio); \ 589 return; \ 590 } \ 591 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 592 } \ 593 bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ 594 struct folio *folio, u64 start, u32 len) \ 595 { \ 596 if (unlikely(!fs_info) || \ 597 !btrfs_is_subpage(fs_info, folio)) \ 598 return folio_test_func(folio); \ 599 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 600 } \ 601 void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ 602 struct folio *folio, u64 start, u32 len) \ 603 { \ 604 if (unlikely(!fs_info) || \ 605 !btrfs_is_subpage(fs_info, folio)) { \ 606 folio_set_func(folio); \ 607 return; \ 608 } \ 609 btrfs_subpage_clamp_range(folio, &start, &len); \ 610 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 611 } \ 612 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ 613 struct folio *folio, u64 start, u32 len) \ 614 { \ 615 if (unlikely(!fs_info) || \ 616 !btrfs_is_subpage(fs_info, folio)) { \ 617 folio_clear_func(folio); \ 618 return; \ 619 } \ 620 btrfs_subpage_clamp_range(folio, &start, &len); \ 621 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 622 } \ 623 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ 624 struct folio *folio, u64 start, u32 len) \ 625 { \ 626 if (unlikely(!fs_info) || \ 627 !btrfs_is_subpage(fs_info, folio)) \ 628 return folio_test_func(folio); \ 629 btrfs_subpage_clamp_range(folio, &start, &len); \ 630 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 631 } \ 632 void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \ 633 { \ 634 if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 635 folio_set_func(folio); \ 636 return; \ 637 } \ 638 btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \ 639 } \ 640 void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \ 641 { \ 642 if (!btrfs_meta_is_subpage(eb->fs_info)) { \ 643 folio_clear_func(folio); \ 644 return; \ 645 } \ 646 btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \ 647 } \ 648 bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \ 649 { \ 650 if (!btrfs_meta_is_subpage(eb->fs_info)) \ 651 return folio_test_func(folio); \ 652 return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \ 653 } 654 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, 655 folio_test_uptodate); 656 IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, 657 folio_test_dirty); 658 IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, 659 folio_test_writeback); 660 IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, 661 folio_test_ordered); 662 IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, 663 folio_test_checked); 664 665 #define GET_SUBPAGE_BITMAP(fs_info, folio, name, dst) \ 666 { \ 667 const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 668 const struct btrfs_folio_state *__bfs = folio_get_private(folio); \ 669 \ 670 ASSERT(__bpf <= BITS_PER_LONG); \ 671 *dst = bitmap_read(__bfs->bitmaps, \ 672 __bpf * btrfs_bitmap_nr_##name, __bpf); \ 673 } 674 675 #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len) \ 676 { \ 677 unsigned long bitmap; \ 678 const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \ 679 \ 680 GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \ 681 btrfs_warn(fs_info, \ 682 "dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \ 683 start, len, folio_pos(folio), __bpf, &bitmap); \ 684 } 685 686 /* 687 * Make sure not only the page dirty bit is cleared, but also subpage dirty bit 688 * is cleared. 689 */ 690 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, 691 struct folio *folio, u64 start, u32 len) 692 { 693 struct btrfs_folio_state *bfs; 694 unsigned int start_bit; 695 unsigned int nbits; 696 unsigned long flags; 697 698 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) 699 return; 700 701 if (!btrfs_is_subpage(fs_info, folio)) { 702 ASSERT(!folio_test_dirty(folio)); 703 return; 704 } 705 706 start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); 707 nbits = len >> fs_info->sectorsize_bits; 708 bfs = folio_get_private(folio); 709 ASSERT(bfs); 710 spin_lock_irqsave(&bfs->lock, flags); 711 if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { 712 SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len); 713 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 714 } 715 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 716 spin_unlock_irqrestore(&bfs->lock, flags); 717 } 718 719 /* 720 * This is for folio already locked by plain lock_page()/folio_lock(), which 721 * doesn't have any subpage awareness. 722 * 723 * This populates the involved subpage ranges so that subpage helpers can 724 * properly unlock them. 725 */ 726 void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info, 727 struct folio *folio, u64 start, u32 len) 728 { 729 struct btrfs_folio_state *bfs; 730 unsigned long flags; 731 unsigned int start_bit; 732 unsigned int nbits; 733 int ret; 734 735 ASSERT(folio_test_locked(folio)); 736 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) 737 return; 738 739 bfs = folio_get_private(folio); 740 start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 741 nbits = len >> fs_info->sectorsize_bits; 742 spin_lock_irqsave(&bfs->lock, flags); 743 /* Target range should not yet be locked. */ 744 if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) { 745 SUBPAGE_DUMP_BITMAP(fs_info, folio, locked, start, len); 746 ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits)); 747 } 748 bitmap_set(bfs->bitmaps, start_bit, nbits); 749 ret = atomic_add_return(nbits, &bfs->nr_locked); 750 ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio)); 751 spin_unlock_irqrestore(&bfs->lock, flags); 752 } 753 754 /* 755 * Clear the dirty flag for the folio. 756 * 757 * If the affected folio is no longer dirty, return true. Otherwise return false. 758 */ 759 bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb) 760 { 761 bool last; 762 763 if (!btrfs_meta_is_subpage(eb->fs_info)) { 764 folio_clear_dirty_for_io(folio); 765 return true; 766 } 767 768 last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len); 769 if (last) { 770 folio_clear_dirty_for_io(folio); 771 return true; 772 } 773 return false; 774 } 775 776 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, 777 struct folio *folio, u64 start, u32 len) 778 { 779 struct btrfs_folio_state *bfs; 780 const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio); 781 unsigned long uptodate_bitmap; 782 unsigned long dirty_bitmap; 783 unsigned long writeback_bitmap; 784 unsigned long ordered_bitmap; 785 unsigned long checked_bitmap; 786 unsigned long locked_bitmap; 787 unsigned long flags; 788 789 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 790 ASSERT(blocks_per_folio > 1); 791 bfs = folio_get_private(folio); 792 793 spin_lock_irqsave(&bfs->lock, flags); 794 GET_SUBPAGE_BITMAP(fs_info, folio, uptodate, &uptodate_bitmap); 795 GET_SUBPAGE_BITMAP(fs_info, folio, dirty, &dirty_bitmap); 796 GET_SUBPAGE_BITMAP(fs_info, folio, writeback, &writeback_bitmap); 797 GET_SUBPAGE_BITMAP(fs_info, folio, ordered, &ordered_bitmap); 798 GET_SUBPAGE_BITMAP(fs_info, folio, checked, &checked_bitmap); 799 GET_SUBPAGE_BITMAP(fs_info, folio, locked, &locked_bitmap); 800 spin_unlock_irqrestore(&bfs->lock, flags); 801 802 dump_page(folio_page(folio, 0), "btrfs folio state dump"); 803 btrfs_warn(fs_info, 804 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl locked=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", 805 start, len, folio_pos(folio), 806 blocks_per_folio, &uptodate_bitmap, 807 blocks_per_folio, &dirty_bitmap, 808 blocks_per_folio, &locked_bitmap, 809 blocks_per_folio, &writeback_bitmap, 810 blocks_per_folio, &ordered_bitmap, 811 blocks_per_folio, &checked_bitmap); 812 } 813 814 void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, 815 struct folio *folio, 816 unsigned long *ret_bitmap) 817 { 818 struct btrfs_folio_state *bfs; 819 unsigned long flags; 820 821 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 822 ASSERT(btrfs_blocks_per_folio(fs_info, folio) > 1); 823 bfs = folio_get_private(folio); 824 825 spin_lock_irqsave(&bfs->lock, flags); 826 GET_SUBPAGE_BITMAP(fs_info, folio, dirty, ret_bitmap); 827 spin_unlock_irqrestore(&bfs->lock, flags); 828 } 829