1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include "messages.h" 5 #include "ctree.h" 6 #include "subpage.h" 7 #include "btrfs_inode.h" 8 9 /* 10 * Subpage (sectorsize < PAGE_SIZE) support overview: 11 * 12 * Limitations: 13 * 14 * - Only support 64K page size for now 15 * This is to make metadata handling easier, as 64K page would ensure 16 * all nodesize would fit inside one page, thus we don't need to handle 17 * cases where a tree block crosses several pages. 18 * 19 * - Only metadata read-write for now 20 * The data read-write part is in development. 21 * 22 * - Metadata can't cross 64K page boundary 23 * btrfs-progs and kernel have done that for a while, thus only ancient 24 * filesystems could have such problem. For such case, do a graceful 25 * rejection. 26 * 27 * Special behavior: 28 * 29 * - Metadata 30 * Metadata read is fully supported. 31 * Meaning when reading one tree block will only trigger the read for the 32 * needed range, other unrelated range in the same page will not be touched. 33 * 34 * Metadata write support is partial. 35 * The writeback is still for the full page, but we will only submit 36 * the dirty extent buffers in the page. 37 * 38 * This means, if we have a metadata page like this: 39 * 40 * Page offset 41 * 0 16K 32K 48K 64K 42 * |/////////| |///////////| 43 * \- Tree block A \- Tree block B 44 * 45 * Even if we just want to writeback tree block A, we will also writeback 46 * tree block B if it's also dirty. 47 * 48 * This may cause extra metadata writeback which results more COW. 49 * 50 * Implementation: 51 * 52 * - Common 53 * Both metadata and data will use a new structure, btrfs_subpage, to 54 * record the status of each sector inside a page. This provides the extra 55 * granularity needed. 56 * 57 * - Metadata 58 * Since we have multiple tree blocks inside one page, we can't rely on page 59 * locking anymore, or we will have greatly reduced concurrency or even 60 * deadlocks (hold one tree lock while trying to lock another tree lock in 61 * the same page). 62 * 63 * Thus for metadata locking, subpage support relies on io_tree locking only. 64 * This means a slightly higher tree locking latency. 65 */ 66 67 #if PAGE_SIZE > SZ_4K 68 bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping) 69 { 70 if (fs_info->sectorsize >= PAGE_SIZE) 71 return false; 72 73 /* 74 * Only data pages (either through DIO or compression) can have no 75 * mapping. And if page->mapping->host is data inode, it's subpage. 76 * As we have ruled our sectorsize >= PAGE_SIZE case already. 77 */ 78 if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host))) 79 return true; 80 81 /* 82 * Now the only remaining case is metadata, which we only go subpage 83 * routine if nodesize < PAGE_SIZE. 84 */ 85 if (fs_info->nodesize < PAGE_SIZE) 86 return true; 87 return false; 88 } 89 #endif 90 91 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, 92 struct folio *folio, enum btrfs_subpage_type type) 93 { 94 struct btrfs_subpage *subpage; 95 96 /* 97 * We have cases like a dummy extent buffer page, which is not mapped 98 * and doesn't need to be locked. 99 */ 100 if (folio->mapping) 101 ASSERT(folio_test_locked(folio)); 102 103 /* Either not subpage, or the folio already has private attached. */ 104 if (!btrfs_is_subpage(fs_info, folio->mapping) || folio_test_private(folio)) 105 return 0; 106 107 subpage = btrfs_alloc_subpage(fs_info, type); 108 if (IS_ERR(subpage)) 109 return PTR_ERR(subpage); 110 111 folio_attach_private(folio, subpage); 112 return 0; 113 } 114 115 void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio) 116 { 117 struct btrfs_subpage *subpage; 118 119 /* Either not subpage, or the folio already has private attached. */ 120 if (!btrfs_is_subpage(fs_info, folio->mapping) || !folio_test_private(folio)) 121 return; 122 123 subpage = folio_detach_private(folio); 124 ASSERT(subpage); 125 btrfs_free_subpage(subpage); 126 } 127 128 struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, 129 enum btrfs_subpage_type type) 130 { 131 struct btrfs_subpage *ret; 132 unsigned int real_size; 133 134 ASSERT(fs_info->sectorsize < PAGE_SIZE); 135 136 real_size = struct_size(ret, bitmaps, 137 BITS_TO_LONGS(btrfs_bitmap_nr_max * fs_info->sectors_per_page)); 138 ret = kzalloc(real_size, GFP_NOFS); 139 if (!ret) 140 return ERR_PTR(-ENOMEM); 141 142 spin_lock_init(&ret->lock); 143 if (type == BTRFS_SUBPAGE_METADATA) { 144 atomic_set(&ret->eb_refs, 0); 145 } else { 146 atomic_set(&ret->readers, 0); 147 atomic_set(&ret->writers, 0); 148 } 149 return ret; 150 } 151 152 void btrfs_free_subpage(struct btrfs_subpage *subpage) 153 { 154 kfree(subpage); 155 } 156 157 /* 158 * Increase the eb_refs of current subpage. 159 * 160 * This is important for eb allocation, to prevent race with last eb freeing 161 * of the same page. 162 * With the eb_refs increased before the eb inserted into radix tree, 163 * detach_extent_buffer_page() won't detach the folio private while we're still 164 * allocating the extent buffer. 165 */ 166 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 167 { 168 struct btrfs_subpage *subpage; 169 170 if (!btrfs_is_subpage(fs_info, folio->mapping)) 171 return; 172 173 ASSERT(folio_test_private(folio) && folio->mapping); 174 lockdep_assert_held(&folio->mapping->i_private_lock); 175 176 subpage = folio_get_private(folio); 177 atomic_inc(&subpage->eb_refs); 178 } 179 180 void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 181 { 182 struct btrfs_subpage *subpage; 183 184 if (!btrfs_is_subpage(fs_info, folio->mapping)) 185 return; 186 187 ASSERT(folio_test_private(folio) && folio->mapping); 188 lockdep_assert_held(&folio->mapping->i_private_lock); 189 190 subpage = folio_get_private(folio); 191 ASSERT(atomic_read(&subpage->eb_refs)); 192 atomic_dec(&subpage->eb_refs); 193 } 194 195 static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, 196 struct folio *folio, u64 start, u32 len) 197 { 198 /* For subpage support, the folio must be single page. */ 199 ASSERT(folio_order(folio) == 0); 200 201 /* Basic checks */ 202 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 203 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 204 IS_ALIGNED(len, fs_info->sectorsize)); 205 /* 206 * The range check only works for mapped page, we can still have 207 * unmapped page like dummy extent buffer pages. 208 */ 209 if (folio->mapping) 210 ASSERT(folio_pos(folio) <= start && 211 start + len <= folio_pos(folio) + PAGE_SIZE); 212 } 213 214 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 215 ({ \ 216 unsigned int __start_bit; \ 217 \ 218 btrfs_subpage_assert(fs_info, folio, start, len); \ 219 __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ 220 __start_bit += fs_info->sectors_per_page * btrfs_bitmap_nr_##name; \ 221 __start_bit; \ 222 }) 223 224 void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, 225 struct folio *folio, u64 start, u32 len) 226 { 227 struct btrfs_subpage *subpage = folio_get_private(folio); 228 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 229 const int nbits = len >> fs_info->sectorsize_bits; 230 unsigned long flags; 231 232 233 btrfs_subpage_assert(fs_info, folio, start, len); 234 235 spin_lock_irqsave(&subpage->lock, flags); 236 /* 237 * Even though it's just for reading the page, no one should have 238 * locked the subpage range. 239 */ 240 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 241 bitmap_set(subpage->bitmaps, start_bit, nbits); 242 atomic_add(nbits, &subpage->readers); 243 spin_unlock_irqrestore(&subpage->lock, flags); 244 } 245 246 void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, 247 struct folio *folio, u64 start, u32 len) 248 { 249 struct btrfs_subpage *subpage = folio_get_private(folio); 250 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 251 const int nbits = len >> fs_info->sectorsize_bits; 252 unsigned long flags; 253 bool is_data; 254 bool last; 255 256 btrfs_subpage_assert(fs_info, folio, start, len); 257 is_data = is_data_inode(BTRFS_I(folio->mapping->host)); 258 259 spin_lock_irqsave(&subpage->lock, flags); 260 261 /* The range should have already been locked. */ 262 ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits)); 263 ASSERT(atomic_read(&subpage->readers) >= nbits); 264 265 bitmap_clear(subpage->bitmaps, start_bit, nbits); 266 last = atomic_sub_and_test(nbits, &subpage->readers); 267 268 /* 269 * For data we need to unlock the page if the last read has finished. 270 * 271 * And please don't replace @last with atomic_sub_and_test() call 272 * inside if () condition. 273 * As we want the atomic_sub_and_test() to be always executed. 274 */ 275 if (is_data && last) 276 folio_unlock(folio); 277 spin_unlock_irqrestore(&subpage->lock, flags); 278 } 279 280 static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) 281 { 282 u64 orig_start = *start; 283 u32 orig_len = *len; 284 285 *start = max_t(u64, folio_pos(folio), orig_start); 286 /* 287 * For certain call sites like btrfs_drop_pages(), we may have pages 288 * beyond the target range. In that case, just set @len to 0, subpage 289 * helpers can handle @len == 0 without any problem. 290 */ 291 if (folio_pos(folio) >= orig_start + orig_len) 292 *len = 0; 293 else 294 *len = min_t(u64, folio_pos(folio) + PAGE_SIZE, 295 orig_start + orig_len) - *start; 296 } 297 298 static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, 299 struct folio *folio, u64 start, u32 len) 300 { 301 struct btrfs_subpage *subpage = folio_get_private(folio); 302 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 303 const int nbits = (len >> fs_info->sectorsize_bits); 304 unsigned long flags; 305 int ret; 306 307 btrfs_subpage_assert(fs_info, folio, start, len); 308 309 spin_lock_irqsave(&subpage->lock, flags); 310 ASSERT(atomic_read(&subpage->readers) == 0); 311 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 312 bitmap_set(subpage->bitmaps, start_bit, nbits); 313 ret = atomic_add_return(nbits, &subpage->writers); 314 ASSERT(ret == nbits); 315 spin_unlock_irqrestore(&subpage->lock, flags); 316 } 317 318 static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, 319 struct folio *folio, u64 start, u32 len) 320 { 321 struct btrfs_subpage *subpage = folio_get_private(folio); 322 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 323 const int nbits = (len >> fs_info->sectorsize_bits); 324 unsigned long flags; 325 unsigned int cleared = 0; 326 int bit = start_bit; 327 bool last; 328 329 btrfs_subpage_assert(fs_info, folio, start, len); 330 331 spin_lock_irqsave(&subpage->lock, flags); 332 /* 333 * We have call sites passing @lock_page into 334 * extent_clear_unlock_delalloc() for compression path. 335 * 336 * This @locked_page is locked by plain lock_page(), thus its 337 * subpage::writers is 0. Handle them in a special way. 338 */ 339 if (atomic_read(&subpage->writers) == 0) { 340 spin_unlock_irqrestore(&subpage->lock, flags); 341 return true; 342 } 343 344 for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) { 345 clear_bit(bit, subpage->bitmaps); 346 cleared++; 347 } 348 ASSERT(atomic_read(&subpage->writers) >= cleared); 349 last = atomic_sub_and_test(cleared, &subpage->writers); 350 spin_unlock_irqrestore(&subpage->lock, flags); 351 return last; 352 } 353 354 /* 355 * Lock a folio for delalloc page writeback. 356 * 357 * Return -EAGAIN if the page is not properly initialized. 358 * Return 0 with the page locked, and writer counter updated. 359 * 360 * Even with 0 returned, the page still need extra check to make sure 361 * it's really the correct page, as the caller is using 362 * filemap_get_folios_contig(), which can race with page invalidating. 363 */ 364 int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info, 365 struct folio *folio, u64 start, u32 len) 366 { 367 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { 368 folio_lock(folio); 369 return 0; 370 } 371 folio_lock(folio); 372 if (!folio_test_private(folio) || !folio_get_private(folio)) { 373 folio_unlock(folio); 374 return -EAGAIN; 375 } 376 btrfs_subpage_clamp_range(folio, &start, &len); 377 btrfs_subpage_start_writer(fs_info, folio, start, len); 378 return 0; 379 } 380 381 /* 382 * Handle different locked folios: 383 * 384 * - Non-subpage folio 385 * Just unlock it. 386 * 387 * - folio locked but without any subpage locked 388 * This happens either before writepage_delalloc() or the delalloc range is 389 * already handled by previous folio. 390 * We can simple unlock it. 391 * 392 * - folio locked with subpage range locked. 393 * We go through the locked sectors inside the range and clear their locked 394 * bitmap, reduce the writer lock number, and unlock the page if that's 395 * the last locked range. 396 */ 397 void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, 398 struct folio *folio, u64 start, u32 len) 399 { 400 struct btrfs_subpage *subpage = folio_get_private(folio); 401 402 ASSERT(folio_test_locked(folio)); 403 404 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { 405 folio_unlock(folio); 406 return; 407 } 408 409 /* 410 * For subpage case, there are two types of locked page. With or 411 * without writers number. 412 * 413 * Since we own the page lock, no one else could touch subpage::writers 414 * and we are safe to do several atomic operations without spinlock. 415 */ 416 if (atomic_read(&subpage->writers) == 0) { 417 /* No writers, locked by plain lock_page(). */ 418 folio_unlock(folio); 419 return; 420 } 421 422 btrfs_subpage_clamp_range(folio, &start, &len); 423 if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len)) 424 folio_unlock(folio); 425 } 426 427 void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, 428 struct folio *folio, unsigned long bitmap) 429 { 430 struct btrfs_subpage *subpage = folio_get_private(folio); 431 const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked; 432 unsigned long flags; 433 bool last = false; 434 int cleared = 0; 435 int bit; 436 437 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { 438 folio_unlock(folio); 439 return; 440 } 441 442 if (atomic_read(&subpage->writers) == 0) { 443 /* No writers, locked by plain lock_page(). */ 444 folio_unlock(folio); 445 return; 446 } 447 448 spin_lock_irqsave(&subpage->lock, flags); 449 for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) { 450 if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) 451 cleared++; 452 } 453 ASSERT(atomic_read(&subpage->writers) >= cleared); 454 last = atomic_sub_and_test(cleared, &subpage->writers); 455 spin_unlock_irqrestore(&subpage->lock, flags); 456 if (last) 457 folio_unlock(folio); 458 } 459 460 #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ 461 bitmap_test_range_all_set(subpage->bitmaps, \ 462 fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ 463 fs_info->sectors_per_page) 464 465 #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ 466 bitmap_test_range_all_zero(subpage->bitmaps, \ 467 fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ 468 fs_info->sectors_per_page) 469 470 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, 471 struct folio *folio, u64 start, u32 len) 472 { 473 struct btrfs_subpage *subpage = folio_get_private(folio); 474 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 475 uptodate, start, len); 476 unsigned long flags; 477 478 spin_lock_irqsave(&subpage->lock, flags); 479 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 480 if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) 481 folio_mark_uptodate(folio); 482 spin_unlock_irqrestore(&subpage->lock, flags); 483 } 484 485 void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, 486 struct folio *folio, u64 start, u32 len) 487 { 488 struct btrfs_subpage *subpage = folio_get_private(folio); 489 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 490 uptodate, start, len); 491 unsigned long flags; 492 493 spin_lock_irqsave(&subpage->lock, flags); 494 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 495 folio_clear_uptodate(folio); 496 spin_unlock_irqrestore(&subpage->lock, flags); 497 } 498 499 void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, 500 struct folio *folio, u64 start, u32 len) 501 { 502 struct btrfs_subpage *subpage = folio_get_private(folio); 503 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 504 dirty, start, len); 505 unsigned long flags; 506 507 spin_lock_irqsave(&subpage->lock, flags); 508 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 509 spin_unlock_irqrestore(&subpage->lock, flags); 510 folio_mark_dirty(folio); 511 } 512 513 /* 514 * Extra clear_and_test function for subpage dirty bitmap. 515 * 516 * Return true if we're the last bits in the dirty_bitmap and clear the 517 * dirty_bitmap. 518 * Return false otherwise. 519 * 520 * NOTE: Callers should manually clear page dirty for true case, as we have 521 * extra handling for tree blocks. 522 */ 523 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, 524 struct folio *folio, u64 start, u32 len) 525 { 526 struct btrfs_subpage *subpage = folio_get_private(folio); 527 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 528 dirty, start, len); 529 unsigned long flags; 530 bool last = false; 531 532 spin_lock_irqsave(&subpage->lock, flags); 533 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 534 if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) 535 last = true; 536 spin_unlock_irqrestore(&subpage->lock, flags); 537 return last; 538 } 539 540 void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, 541 struct folio *folio, u64 start, u32 len) 542 { 543 bool last; 544 545 last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); 546 if (last) 547 folio_clear_dirty_for_io(folio); 548 } 549 550 void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, 551 struct folio *folio, u64 start, u32 len) 552 { 553 struct btrfs_subpage *subpage = folio_get_private(folio); 554 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 555 writeback, start, len); 556 unsigned long flags; 557 558 spin_lock_irqsave(&subpage->lock, flags); 559 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 560 if (!folio_test_writeback(folio)) 561 folio_start_writeback(folio); 562 spin_unlock_irqrestore(&subpage->lock, flags); 563 } 564 565 void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, 566 struct folio *folio, u64 start, u32 len) 567 { 568 struct btrfs_subpage *subpage = folio_get_private(folio); 569 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 570 writeback, start, len); 571 unsigned long flags; 572 573 spin_lock_irqsave(&subpage->lock, flags); 574 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 575 if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { 576 ASSERT(folio_test_writeback(folio)); 577 folio_end_writeback(folio); 578 } 579 spin_unlock_irqrestore(&subpage->lock, flags); 580 } 581 582 void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, 583 struct folio *folio, u64 start, u32 len) 584 { 585 struct btrfs_subpage *subpage = folio_get_private(folio); 586 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 587 ordered, start, len); 588 unsigned long flags; 589 590 spin_lock_irqsave(&subpage->lock, flags); 591 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 592 folio_set_ordered(folio); 593 spin_unlock_irqrestore(&subpage->lock, flags); 594 } 595 596 void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, 597 struct folio *folio, u64 start, u32 len) 598 { 599 struct btrfs_subpage *subpage = folio_get_private(folio); 600 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 601 ordered, start, len); 602 unsigned long flags; 603 604 spin_lock_irqsave(&subpage->lock, flags); 605 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 606 if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) 607 folio_clear_ordered(folio); 608 spin_unlock_irqrestore(&subpage->lock, flags); 609 } 610 611 void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, 612 struct folio *folio, u64 start, u32 len) 613 { 614 struct btrfs_subpage *subpage = folio_get_private(folio); 615 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 616 checked, start, len); 617 unsigned long flags; 618 619 spin_lock_irqsave(&subpage->lock, flags); 620 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 621 if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) 622 folio_set_checked(folio); 623 spin_unlock_irqrestore(&subpage->lock, flags); 624 } 625 626 void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, 627 struct folio *folio, u64 start, u32 len) 628 { 629 struct btrfs_subpage *subpage = folio_get_private(folio); 630 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 631 checked, start, len); 632 unsigned long flags; 633 634 spin_lock_irqsave(&subpage->lock, flags); 635 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 636 folio_clear_checked(folio); 637 spin_unlock_irqrestore(&subpage->lock, flags); 638 } 639 640 /* 641 * Unlike set/clear which is dependent on each page status, for test all bits 642 * are tested in the same way. 643 */ 644 #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ 645 bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ 646 struct folio *folio, u64 start, u32 len) \ 647 { \ 648 struct btrfs_subpage *subpage = folio_get_private(folio); \ 649 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ 650 name, start, len); \ 651 unsigned long flags; \ 652 bool ret; \ 653 \ 654 spin_lock_irqsave(&subpage->lock, flags); \ 655 ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ 656 len >> fs_info->sectorsize_bits); \ 657 spin_unlock_irqrestore(&subpage->lock, flags); \ 658 return ret; \ 659 } 660 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); 661 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); 662 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); 663 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); 664 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); 665 666 /* 667 * Note that, in selftests (extent-io-tests), we can have empty fs_info passed 668 * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall 669 * back to regular sectorsize branch. 670 */ 671 #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ 672 folio_clear_func, folio_test_func) \ 673 void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ 674 struct folio *folio, u64 start, u32 len) \ 675 { \ 676 if (unlikely(!fs_info) || \ 677 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 678 folio_set_func(folio); \ 679 return; \ 680 } \ 681 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 682 } \ 683 void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ 684 struct folio *folio, u64 start, u32 len) \ 685 { \ 686 if (unlikely(!fs_info) || \ 687 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 688 folio_clear_func(folio); \ 689 return; \ 690 } \ 691 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 692 } \ 693 bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ 694 struct folio *folio, u64 start, u32 len) \ 695 { \ 696 if (unlikely(!fs_info) || \ 697 !btrfs_is_subpage(fs_info, folio->mapping)) \ 698 return folio_test_func(folio); \ 699 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 700 } \ 701 void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ 702 struct folio *folio, u64 start, u32 len) \ 703 { \ 704 if (unlikely(!fs_info) || \ 705 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 706 folio_set_func(folio); \ 707 return; \ 708 } \ 709 btrfs_subpage_clamp_range(folio, &start, &len); \ 710 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 711 } \ 712 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ 713 struct folio *folio, u64 start, u32 len) \ 714 { \ 715 if (unlikely(!fs_info) || \ 716 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 717 folio_clear_func(folio); \ 718 return; \ 719 } \ 720 btrfs_subpage_clamp_range(folio, &start, &len); \ 721 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 722 } \ 723 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ 724 struct folio *folio, u64 start, u32 len) \ 725 { \ 726 if (unlikely(!fs_info) || \ 727 !btrfs_is_subpage(fs_info, folio->mapping)) \ 728 return folio_test_func(folio); \ 729 btrfs_subpage_clamp_range(folio, &start, &len); \ 730 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 731 } 732 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, 733 folio_test_uptodate); 734 IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, 735 folio_test_dirty); 736 IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, 737 folio_test_writeback); 738 IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, 739 folio_test_ordered); 740 IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, 741 folio_test_checked); 742 743 /* 744 * Make sure not only the page dirty bit is cleared, but also subpage dirty bit 745 * is cleared. 746 */ 747 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, 748 struct folio *folio, u64 start, u32 len) 749 { 750 struct btrfs_subpage *subpage; 751 unsigned int start_bit; 752 unsigned int nbits; 753 unsigned long flags; 754 755 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) 756 return; 757 758 if (!btrfs_is_subpage(fs_info, folio->mapping)) { 759 ASSERT(!folio_test_dirty(folio)); 760 return; 761 } 762 763 start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); 764 nbits = len >> fs_info->sectorsize_bits; 765 subpage = folio_get_private(folio); 766 ASSERT(subpage); 767 spin_lock_irqsave(&subpage->lock, flags); 768 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 769 spin_unlock_irqrestore(&subpage->lock, flags); 770 } 771 772 /* 773 * This is for folio already locked by plain lock_page()/folio_lock(), which 774 * doesn't have any subpage awareness. 775 * 776 * This populates the involved subpage ranges so that subpage helpers can 777 * properly unlock them. 778 */ 779 void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, 780 struct folio *folio, u64 start, u32 len) 781 { 782 struct btrfs_subpage *subpage; 783 unsigned long flags; 784 unsigned int start_bit; 785 unsigned int nbits; 786 int ret; 787 788 ASSERT(folio_test_locked(folio)); 789 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) 790 return; 791 792 subpage = folio_get_private(folio); 793 start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 794 nbits = len >> fs_info->sectorsize_bits; 795 spin_lock_irqsave(&subpage->lock, flags); 796 /* Target range should not yet be locked. */ 797 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 798 bitmap_set(subpage->bitmaps, start_bit, nbits); 799 ret = atomic_add_return(nbits, &subpage->writers); 800 ASSERT(ret <= fs_info->sectors_per_page); 801 spin_unlock_irqrestore(&subpage->lock, flags); 802 } 803 804 /* 805 * Find any subpage writer locked range inside @folio, starting at file offset 806 * @search_start. The caller should ensure the folio is locked. 807 * 808 * Return true and update @found_start_ret and @found_len_ret to the first 809 * writer locked range. 810 * Return false if there is no writer locked range. 811 */ 812 bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info, 813 struct folio *folio, u64 search_start, 814 u64 *found_start_ret, u32 *found_len_ret) 815 { 816 struct btrfs_subpage *subpage = folio_get_private(folio); 817 const u32 sectors_per_page = fs_info->sectors_per_page; 818 const unsigned int len = PAGE_SIZE - offset_in_page(search_start); 819 const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 820 locked, search_start, len); 821 const unsigned int locked_bitmap_start = sectors_per_page * btrfs_bitmap_nr_locked; 822 const unsigned int locked_bitmap_end = locked_bitmap_start + sectors_per_page; 823 unsigned long flags; 824 int first_zero; 825 int first_set; 826 bool found = false; 827 828 ASSERT(folio_test_locked(folio)); 829 spin_lock_irqsave(&subpage->lock, flags); 830 first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit); 831 if (first_set >= locked_bitmap_end) 832 goto out; 833 834 found = true; 835 836 *found_start_ret = folio_pos(folio) + 837 ((first_set - locked_bitmap_start) << fs_info->sectorsize_bits); 838 /* 839 * Since @first_set is ensured to be smaller than locked_bitmap_end 840 * here, @found_start_ret should be inside the folio. 841 */ 842 ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE); 843 844 first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set); 845 *found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits; 846 out: 847 spin_unlock_irqrestore(&subpage->lock, flags); 848 return found; 849 } 850 851 #define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \ 852 { \ 853 const int sectors_per_page = fs_info->sectors_per_page; \ 854 \ 855 ASSERT(sectors_per_page < BITS_PER_LONG); \ 856 *dst = bitmap_read(subpage->bitmaps, \ 857 sectors_per_page * btrfs_bitmap_nr_##name, \ 858 sectors_per_page); \ 859 } 860 861 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, 862 struct folio *folio, u64 start, u32 len) 863 { 864 struct btrfs_subpage *subpage; 865 const u32 sectors_per_page = fs_info->sectors_per_page; 866 unsigned long uptodate_bitmap; 867 unsigned long dirty_bitmap; 868 unsigned long writeback_bitmap; 869 unsigned long ordered_bitmap; 870 unsigned long checked_bitmap; 871 unsigned long flags; 872 873 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 874 ASSERT(sectors_per_page > 1); 875 subpage = folio_get_private(folio); 876 877 spin_lock_irqsave(&subpage->lock, flags); 878 GET_SUBPAGE_BITMAP(subpage, fs_info, uptodate, &uptodate_bitmap); 879 GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, &dirty_bitmap); 880 GET_SUBPAGE_BITMAP(subpage, fs_info, writeback, &writeback_bitmap); 881 GET_SUBPAGE_BITMAP(subpage, fs_info, ordered, &ordered_bitmap); 882 GET_SUBPAGE_BITMAP(subpage, fs_info, checked, &checked_bitmap); 883 GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &checked_bitmap); 884 spin_unlock_irqrestore(&subpage->lock, flags); 885 886 dump_page(folio_page(folio, 0), "btrfs subpage dump"); 887 btrfs_warn(fs_info, 888 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", 889 start, len, folio_pos(folio), 890 sectors_per_page, &uptodate_bitmap, 891 sectors_per_page, &dirty_bitmap, 892 sectors_per_page, &writeback_bitmap, 893 sectors_per_page, &ordered_bitmap, 894 sectors_per_page, &checked_bitmap); 895 } 896 897 void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info, 898 struct folio *folio, 899 unsigned long *ret_bitmap) 900 { 901 struct btrfs_subpage *subpage; 902 unsigned long flags; 903 904 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 905 ASSERT(fs_info->sectors_per_page > 1); 906 subpage = folio_get_private(folio); 907 908 spin_lock_irqsave(&subpage->lock, flags); 909 GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, ret_bitmap); 910 spin_unlock_irqrestore(&subpage->lock, flags); 911 } 912