1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/slab.h> 4 #include "messages.h" 5 #include "ctree.h" 6 #include "subpage.h" 7 #include "btrfs_inode.h" 8 9 /* 10 * Subpage (sectorsize < PAGE_SIZE) support overview: 11 * 12 * Limitations: 13 * 14 * - Only support 64K page size for now 15 * This is to make metadata handling easier, as 64K page would ensure 16 * all nodesize would fit inside one page, thus we don't need to handle 17 * cases where a tree block crosses several pages. 18 * 19 * - Only metadata read-write for now 20 * The data read-write part is in development. 21 * 22 * - Metadata can't cross 64K page boundary 23 * btrfs-progs and kernel have done that for a while, thus only ancient 24 * filesystems could have such problem. For such case, do a graceful 25 * rejection. 26 * 27 * Special behavior: 28 * 29 * - Metadata 30 * Metadata read is fully supported. 31 * Meaning when reading one tree block will only trigger the read for the 32 * needed range, other unrelated range in the same page will not be touched. 33 * 34 * Metadata write support is partial. 35 * The writeback is still for the full page, but we will only submit 36 * the dirty extent buffers in the page. 37 * 38 * This means, if we have a metadata page like this: 39 * 40 * Page offset 41 * 0 16K 32K 48K 64K 42 * |/////////| |///////////| 43 * \- Tree block A \- Tree block B 44 * 45 * Even if we just want to writeback tree block A, we will also writeback 46 * tree block B if it's also dirty. 47 * 48 * This may cause extra metadata writeback which results more COW. 49 * 50 * Implementation: 51 * 52 * - Common 53 * Both metadata and data will use a new structure, btrfs_subpage, to 54 * record the status of each sector inside a page. This provides the extra 55 * granularity needed. 56 * 57 * - Metadata 58 * Since we have multiple tree blocks inside one page, we can't rely on page 59 * locking anymore, or we will have greatly reduced concurrency or even 60 * deadlocks (hold one tree lock while trying to lock another tree lock in 61 * the same page). 62 * 63 * Thus for metadata locking, subpage support relies on io_tree locking only. 64 * This means a slightly higher tree locking latency. 65 */ 66 67 bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping) 68 { 69 if (fs_info->sectorsize >= PAGE_SIZE) 70 return false; 71 72 /* 73 * Only data pages (either through DIO or compression) can have no 74 * mapping. And if page->mapping->host is data inode, it's subpage. 75 * As we have ruled our sectorsize >= PAGE_SIZE case already. 76 */ 77 if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host))) 78 return true; 79 80 /* 81 * Now the only remaining case is metadata, which we only go subpage 82 * routine if nodesize < PAGE_SIZE. 83 */ 84 if (fs_info->nodesize < PAGE_SIZE) 85 return true; 86 return false; 87 } 88 89 void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize) 90 { 91 unsigned int cur = 0; 92 unsigned int nr_bits; 93 94 ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize)); 95 96 nr_bits = PAGE_SIZE / sectorsize; 97 subpage_info->bitmap_nr_bits = nr_bits; 98 99 subpage_info->uptodate_offset = cur; 100 cur += nr_bits; 101 102 subpage_info->dirty_offset = cur; 103 cur += nr_bits; 104 105 subpage_info->writeback_offset = cur; 106 cur += nr_bits; 107 108 subpage_info->ordered_offset = cur; 109 cur += nr_bits; 110 111 subpage_info->checked_offset = cur; 112 cur += nr_bits; 113 114 subpage_info->locked_offset = cur; 115 cur += nr_bits; 116 117 subpage_info->total_nr_bits = cur; 118 } 119 120 int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info, 121 struct folio *folio, enum btrfs_subpage_type type) 122 { 123 struct btrfs_subpage *subpage; 124 125 /* 126 * We have cases like a dummy extent buffer page, which is not mapped 127 * and doesn't need to be locked. 128 */ 129 if (folio->mapping) 130 ASSERT(folio_test_locked(folio)); 131 132 /* Either not subpage, or the folio already has private attached. */ 133 if (!btrfs_is_subpage(fs_info, folio->mapping) || folio_test_private(folio)) 134 return 0; 135 136 subpage = btrfs_alloc_subpage(fs_info, type); 137 if (IS_ERR(subpage)) 138 return PTR_ERR(subpage); 139 140 folio_attach_private(folio, subpage); 141 return 0; 142 } 143 144 void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio) 145 { 146 struct btrfs_subpage *subpage; 147 148 /* Either not subpage, or the folio already has private attached. */ 149 if (!btrfs_is_subpage(fs_info, folio->mapping) || !folio_test_private(folio)) 150 return; 151 152 subpage = folio_detach_private(folio); 153 ASSERT(subpage); 154 btrfs_free_subpage(subpage); 155 } 156 157 struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info, 158 enum btrfs_subpage_type type) 159 { 160 struct btrfs_subpage *ret; 161 unsigned int real_size; 162 163 ASSERT(fs_info->sectorsize < PAGE_SIZE); 164 165 real_size = struct_size(ret, bitmaps, 166 BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits)); 167 ret = kzalloc(real_size, GFP_NOFS); 168 if (!ret) 169 return ERR_PTR(-ENOMEM); 170 171 spin_lock_init(&ret->lock); 172 if (type == BTRFS_SUBPAGE_METADATA) { 173 atomic_set(&ret->eb_refs, 0); 174 } else { 175 atomic_set(&ret->readers, 0); 176 atomic_set(&ret->writers, 0); 177 } 178 return ret; 179 } 180 181 void btrfs_free_subpage(struct btrfs_subpage *subpage) 182 { 183 kfree(subpage); 184 } 185 186 /* 187 * Increase the eb_refs of current subpage. 188 * 189 * This is important for eb allocation, to prevent race with last eb freeing 190 * of the same page. 191 * With the eb_refs increased before the eb inserted into radix tree, 192 * detach_extent_buffer_page() won't detach the folio private while we're still 193 * allocating the extent buffer. 194 */ 195 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 196 { 197 struct btrfs_subpage *subpage; 198 199 if (!btrfs_is_subpage(fs_info, folio->mapping)) 200 return; 201 202 ASSERT(folio_test_private(folio) && folio->mapping); 203 lockdep_assert_held(&folio->mapping->i_private_lock); 204 205 subpage = folio_get_private(folio); 206 atomic_inc(&subpage->eb_refs); 207 } 208 209 void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio) 210 { 211 struct btrfs_subpage *subpage; 212 213 if (!btrfs_is_subpage(fs_info, folio->mapping)) 214 return; 215 216 ASSERT(folio_test_private(folio) && folio->mapping); 217 lockdep_assert_held(&folio->mapping->i_private_lock); 218 219 subpage = folio_get_private(folio); 220 ASSERT(atomic_read(&subpage->eb_refs)); 221 atomic_dec(&subpage->eb_refs); 222 } 223 224 static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info, 225 struct folio *folio, u64 start, u32 len) 226 { 227 /* For subpage support, the folio must be single page. */ 228 ASSERT(folio_order(folio) == 0); 229 230 /* Basic checks */ 231 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 232 ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && 233 IS_ALIGNED(len, fs_info->sectorsize)); 234 /* 235 * The range check only works for mapped page, we can still have 236 * unmapped page like dummy extent buffer pages. 237 */ 238 if (folio->mapping) 239 ASSERT(folio_pos(folio) <= start && 240 start + len <= folio_pos(folio) + PAGE_SIZE); 241 } 242 243 #define subpage_calc_start_bit(fs_info, folio, name, start, len) \ 244 ({ \ 245 unsigned int __start_bit; \ 246 \ 247 btrfs_subpage_assert(fs_info, folio, start, len); \ 248 __start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \ 249 __start_bit += fs_info->subpage_info->name##_offset; \ 250 __start_bit; \ 251 }) 252 253 void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info, 254 struct folio *folio, u64 start, u32 len) 255 { 256 struct btrfs_subpage *subpage = folio_get_private(folio); 257 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 258 const int nbits = len >> fs_info->sectorsize_bits; 259 unsigned long flags; 260 261 262 btrfs_subpage_assert(fs_info, folio, start, len); 263 264 spin_lock_irqsave(&subpage->lock, flags); 265 /* 266 * Even though it's just for reading the page, no one should have 267 * locked the subpage range. 268 */ 269 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 270 bitmap_set(subpage->bitmaps, start_bit, nbits); 271 atomic_add(nbits, &subpage->readers); 272 spin_unlock_irqrestore(&subpage->lock, flags); 273 } 274 275 void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info, 276 struct folio *folio, u64 start, u32 len) 277 { 278 struct btrfs_subpage *subpage = folio_get_private(folio); 279 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 280 const int nbits = len >> fs_info->sectorsize_bits; 281 unsigned long flags; 282 bool is_data; 283 bool last; 284 285 btrfs_subpage_assert(fs_info, folio, start, len); 286 is_data = is_data_inode(BTRFS_I(folio->mapping->host)); 287 288 spin_lock_irqsave(&subpage->lock, flags); 289 290 /* The range should have already been locked. */ 291 ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits)); 292 ASSERT(atomic_read(&subpage->readers) >= nbits); 293 294 bitmap_clear(subpage->bitmaps, start_bit, nbits); 295 last = atomic_sub_and_test(nbits, &subpage->readers); 296 297 /* 298 * For data we need to unlock the page if the last read has finished. 299 * 300 * And please don't replace @last with atomic_sub_and_test() call 301 * inside if () condition. 302 * As we want the atomic_sub_and_test() to be always executed. 303 */ 304 if (is_data && last) 305 folio_unlock(folio); 306 spin_unlock_irqrestore(&subpage->lock, flags); 307 } 308 309 static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len) 310 { 311 u64 orig_start = *start; 312 u32 orig_len = *len; 313 314 *start = max_t(u64, folio_pos(folio), orig_start); 315 /* 316 * For certain call sites like btrfs_drop_pages(), we may have pages 317 * beyond the target range. In that case, just set @len to 0, subpage 318 * helpers can handle @len == 0 without any problem. 319 */ 320 if (folio_pos(folio) >= orig_start + orig_len) 321 *len = 0; 322 else 323 *len = min_t(u64, folio_pos(folio) + PAGE_SIZE, 324 orig_start + orig_len) - *start; 325 } 326 327 static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info, 328 struct folio *folio, u64 start, u32 len) 329 { 330 struct btrfs_subpage *subpage = folio_get_private(folio); 331 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 332 const int nbits = (len >> fs_info->sectorsize_bits); 333 unsigned long flags; 334 int ret; 335 336 btrfs_subpage_assert(fs_info, folio, start, len); 337 338 spin_lock_irqsave(&subpage->lock, flags); 339 ASSERT(atomic_read(&subpage->readers) == 0); 340 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 341 bitmap_set(subpage->bitmaps, start_bit, nbits); 342 ret = atomic_add_return(nbits, &subpage->writers); 343 ASSERT(ret == nbits); 344 spin_unlock_irqrestore(&subpage->lock, flags); 345 } 346 347 static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info, 348 struct folio *folio, u64 start, u32 len) 349 { 350 struct btrfs_subpage *subpage = folio_get_private(folio); 351 const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 352 const int nbits = (len >> fs_info->sectorsize_bits); 353 unsigned long flags; 354 bool last; 355 356 btrfs_subpage_assert(fs_info, folio, start, len); 357 358 spin_lock_irqsave(&subpage->lock, flags); 359 /* 360 * We have call sites passing @lock_page into 361 * extent_clear_unlock_delalloc() for compression path. 362 * 363 * This @locked_page is locked by plain lock_page(), thus its 364 * subpage::writers is 0. Handle them in a special way. 365 */ 366 if (atomic_read(&subpage->writers) == 0) { 367 spin_unlock_irqrestore(&subpage->lock, flags); 368 return true; 369 } 370 371 ASSERT(atomic_read(&subpage->writers) >= nbits); 372 /* The target range should have been locked. */ 373 ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits)); 374 bitmap_clear(subpage->bitmaps, start_bit, nbits); 375 last = atomic_sub_and_test(nbits, &subpage->writers); 376 spin_unlock_irqrestore(&subpage->lock, flags); 377 return last; 378 } 379 380 /* 381 * Lock a folio for delalloc page writeback. 382 * 383 * Return -EAGAIN if the page is not properly initialized. 384 * Return 0 with the page locked, and writer counter updated. 385 * 386 * Even with 0 returned, the page still need extra check to make sure 387 * it's really the correct page, as the caller is using 388 * filemap_get_folios_contig(), which can race with page invalidating. 389 */ 390 int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info, 391 struct folio *folio, u64 start, u32 len) 392 { 393 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { 394 folio_lock(folio); 395 return 0; 396 } 397 folio_lock(folio); 398 if (!folio_test_private(folio) || !folio_get_private(folio)) { 399 folio_unlock(folio); 400 return -EAGAIN; 401 } 402 btrfs_subpage_clamp_range(folio, &start, &len); 403 btrfs_subpage_start_writer(fs_info, folio, start, len); 404 return 0; 405 } 406 407 void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, 408 struct folio *folio, u64 start, u32 len) 409 { 410 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { 411 folio_unlock(folio); 412 return; 413 } 414 btrfs_subpage_clamp_range(folio, &start, &len); 415 if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len)) 416 folio_unlock(folio); 417 } 418 419 #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ 420 bitmap_test_range_all_set(subpage->bitmaps, \ 421 fs_info->subpage_info->name##_offset, \ 422 fs_info->subpage_info->bitmap_nr_bits) 423 424 #define subpage_test_bitmap_all_zero(fs_info, subpage, name) \ 425 bitmap_test_range_all_zero(subpage->bitmaps, \ 426 fs_info->subpage_info->name##_offset, \ 427 fs_info->subpage_info->bitmap_nr_bits) 428 429 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info, 430 struct folio *folio, u64 start, u32 len) 431 { 432 struct btrfs_subpage *subpage = folio_get_private(folio); 433 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 434 uptodate, start, len); 435 unsigned long flags; 436 437 spin_lock_irqsave(&subpage->lock, flags); 438 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 439 if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate)) 440 folio_mark_uptodate(folio); 441 spin_unlock_irqrestore(&subpage->lock, flags); 442 } 443 444 void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info, 445 struct folio *folio, u64 start, u32 len) 446 { 447 struct btrfs_subpage *subpage = folio_get_private(folio); 448 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 449 uptodate, start, len); 450 unsigned long flags; 451 452 spin_lock_irqsave(&subpage->lock, flags); 453 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 454 folio_clear_uptodate(folio); 455 spin_unlock_irqrestore(&subpage->lock, flags); 456 } 457 458 void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info, 459 struct folio *folio, u64 start, u32 len) 460 { 461 struct btrfs_subpage *subpage = folio_get_private(folio); 462 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 463 dirty, start, len); 464 unsigned long flags; 465 466 spin_lock_irqsave(&subpage->lock, flags); 467 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 468 spin_unlock_irqrestore(&subpage->lock, flags); 469 folio_mark_dirty(folio); 470 } 471 472 /* 473 * Extra clear_and_test function for subpage dirty bitmap. 474 * 475 * Return true if we're the last bits in the dirty_bitmap and clear the 476 * dirty_bitmap. 477 * Return false otherwise. 478 * 479 * NOTE: Callers should manually clear page dirty for true case, as we have 480 * extra handling for tree blocks. 481 */ 482 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info, 483 struct folio *folio, u64 start, u32 len) 484 { 485 struct btrfs_subpage *subpage = folio_get_private(folio); 486 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 487 dirty, start, len); 488 unsigned long flags; 489 bool last = false; 490 491 spin_lock_irqsave(&subpage->lock, flags); 492 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 493 if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty)) 494 last = true; 495 spin_unlock_irqrestore(&subpage->lock, flags); 496 return last; 497 } 498 499 void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info, 500 struct folio *folio, u64 start, u32 len) 501 { 502 bool last; 503 504 last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len); 505 if (last) 506 folio_clear_dirty_for_io(folio); 507 } 508 509 void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, 510 struct folio *folio, u64 start, u32 len) 511 { 512 struct btrfs_subpage *subpage = folio_get_private(folio); 513 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 514 writeback, start, len); 515 unsigned long flags; 516 517 spin_lock_irqsave(&subpage->lock, flags); 518 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 519 if (!folio_test_writeback(folio)) 520 folio_start_writeback(folio); 521 spin_unlock_irqrestore(&subpage->lock, flags); 522 } 523 524 void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, 525 struct folio *folio, u64 start, u32 len) 526 { 527 struct btrfs_subpage *subpage = folio_get_private(folio); 528 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 529 writeback, start, len); 530 unsigned long flags; 531 532 spin_lock_irqsave(&subpage->lock, flags); 533 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 534 if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) { 535 ASSERT(folio_test_writeback(folio)); 536 folio_end_writeback(folio); 537 } 538 spin_unlock_irqrestore(&subpage->lock, flags); 539 } 540 541 void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info, 542 struct folio *folio, u64 start, u32 len) 543 { 544 struct btrfs_subpage *subpage = folio_get_private(folio); 545 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 546 ordered, start, len); 547 unsigned long flags; 548 549 spin_lock_irqsave(&subpage->lock, flags); 550 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 551 folio_set_ordered(folio); 552 spin_unlock_irqrestore(&subpage->lock, flags); 553 } 554 555 void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info, 556 struct folio *folio, u64 start, u32 len) 557 { 558 struct btrfs_subpage *subpage = folio_get_private(folio); 559 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 560 ordered, start, len); 561 unsigned long flags; 562 563 spin_lock_irqsave(&subpage->lock, flags); 564 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 565 if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered)) 566 folio_clear_ordered(folio); 567 spin_unlock_irqrestore(&subpage->lock, flags); 568 } 569 570 void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info, 571 struct folio *folio, u64 start, u32 len) 572 { 573 struct btrfs_subpage *subpage = folio_get_private(folio); 574 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 575 checked, start, len); 576 unsigned long flags; 577 578 spin_lock_irqsave(&subpage->lock, flags); 579 bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 580 if (subpage_test_bitmap_all_set(fs_info, subpage, checked)) 581 folio_set_checked(folio); 582 spin_unlock_irqrestore(&subpage->lock, flags); 583 } 584 585 void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info, 586 struct folio *folio, u64 start, u32 len) 587 { 588 struct btrfs_subpage *subpage = folio_get_private(folio); 589 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 590 checked, start, len); 591 unsigned long flags; 592 593 spin_lock_irqsave(&subpage->lock, flags); 594 bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits); 595 folio_clear_checked(folio); 596 spin_unlock_irqrestore(&subpage->lock, flags); 597 } 598 599 /* 600 * Unlike set/clear which is dependent on each page status, for test all bits 601 * are tested in the same way. 602 */ 603 #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name) \ 604 bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \ 605 struct folio *folio, u64 start, u32 len) \ 606 { \ 607 struct btrfs_subpage *subpage = folio_get_private(folio); \ 608 unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, \ 609 name, start, len); \ 610 unsigned long flags; \ 611 bool ret; \ 612 \ 613 spin_lock_irqsave(&subpage->lock, flags); \ 614 ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \ 615 len >> fs_info->sectorsize_bits); \ 616 spin_unlock_irqrestore(&subpage->lock, flags); \ 617 return ret; \ 618 } 619 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate); 620 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty); 621 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback); 622 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered); 623 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked); 624 625 /* 626 * Note that, in selftests (extent-io-tests), we can have empty fs_info passed 627 * in. We only test sectorsize == PAGE_SIZE cases so far, thus we can fall 628 * back to regular sectorsize branch. 629 */ 630 #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func, \ 631 folio_clear_func, folio_test_func) \ 632 void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info, \ 633 struct folio *folio, u64 start, u32 len) \ 634 { \ 635 if (unlikely(!fs_info) || \ 636 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 637 folio_set_func(folio); \ 638 return; \ 639 } \ 640 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 641 } \ 642 void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info, \ 643 struct folio *folio, u64 start, u32 len) \ 644 { \ 645 if (unlikely(!fs_info) || \ 646 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 647 folio_clear_func(folio); \ 648 return; \ 649 } \ 650 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 651 } \ 652 bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info, \ 653 struct folio *folio, u64 start, u32 len) \ 654 { \ 655 if (unlikely(!fs_info) || \ 656 !btrfs_is_subpage(fs_info, folio->mapping)) \ 657 return folio_test_func(folio); \ 658 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 659 } \ 660 void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info, \ 661 struct folio *folio, u64 start, u32 len) \ 662 { \ 663 if (unlikely(!fs_info) || \ 664 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 665 folio_set_func(folio); \ 666 return; \ 667 } \ 668 btrfs_subpage_clamp_range(folio, &start, &len); \ 669 btrfs_subpage_set_##name(fs_info, folio, start, len); \ 670 } \ 671 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \ 672 struct folio *folio, u64 start, u32 len) \ 673 { \ 674 if (unlikely(!fs_info) || \ 675 !btrfs_is_subpage(fs_info, folio->mapping)) { \ 676 folio_clear_func(folio); \ 677 return; \ 678 } \ 679 btrfs_subpage_clamp_range(folio, &start, &len); \ 680 btrfs_subpage_clear_##name(fs_info, folio, start, len); \ 681 } \ 682 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info, \ 683 struct folio *folio, u64 start, u32 len) \ 684 { \ 685 if (unlikely(!fs_info) || \ 686 !btrfs_is_subpage(fs_info, folio->mapping)) \ 687 return folio_test_func(folio); \ 688 btrfs_subpage_clamp_range(folio, &start, &len); \ 689 return btrfs_subpage_test_##name(fs_info, folio, start, len); \ 690 } 691 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate, 692 folio_test_uptodate); 693 IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io, 694 folio_test_dirty); 695 IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback, 696 folio_test_writeback); 697 IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered, 698 folio_test_ordered); 699 IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked, 700 folio_test_checked); 701 702 /* 703 * Make sure not only the page dirty bit is cleared, but also subpage dirty bit 704 * is cleared. 705 */ 706 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, 707 struct folio *folio, u64 start, u32 len) 708 { 709 struct btrfs_subpage *subpage; 710 unsigned int start_bit; 711 unsigned int nbits; 712 unsigned long flags; 713 714 if (!IS_ENABLED(CONFIG_BTRFS_ASSERT)) 715 return; 716 717 if (!btrfs_is_subpage(fs_info, folio->mapping)) { 718 ASSERT(!folio_test_dirty(folio)); 719 return; 720 } 721 722 start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len); 723 nbits = len >> fs_info->sectorsize_bits; 724 subpage = folio_get_private(folio); 725 ASSERT(subpage); 726 spin_lock_irqsave(&subpage->lock, flags); 727 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 728 spin_unlock_irqrestore(&subpage->lock, flags); 729 } 730 731 /* 732 * Handle different locked pages with different page sizes: 733 * 734 * - Page locked by plain lock_page() 735 * It should not have any subpage::writers count. 736 * Can be unlocked by unlock_page(). 737 * This is the most common locked page for __extent_writepage() called 738 * inside extent_write_cache_pages(). 739 * Rarer cases include the @locked_page from extent_write_locked_range(). 740 * 741 * - Page locked by lock_delalloc_pages() 742 * There is only one caller, all pages except @locked_page for 743 * extent_write_locked_range(). 744 * In this case, we have to call subpage helper to handle the case. 745 */ 746 void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info, 747 struct folio *folio, u64 start, u32 len) 748 { 749 struct btrfs_subpage *subpage; 750 751 ASSERT(folio_test_locked(folio)); 752 /* For non-subpage case, we just unlock the page */ 753 if (!btrfs_is_subpage(fs_info, folio->mapping)) { 754 folio_unlock(folio); 755 return; 756 } 757 758 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 759 subpage = folio_get_private(folio); 760 761 /* 762 * For subpage case, there are two types of locked page. With or 763 * without writers number. 764 * 765 * Since we own the page lock, no one else could touch subpage::writers 766 * and we are safe to do several atomic operations without spinlock. 767 */ 768 if (atomic_read(&subpage->writers) == 0) { 769 /* No writers, locked by plain lock_page() */ 770 folio_unlock(folio); 771 return; 772 } 773 774 /* Have writers, use proper subpage helper to end it */ 775 btrfs_folio_end_writer_lock(fs_info, folio, start, len); 776 } 777 778 /* 779 * This is for folio already locked by plain lock_page()/folio_lock(), which 780 * doesn't have any subpage awareness. 781 * 782 * This populates the involved subpage ranges so that subpage helpers can 783 * properly unlock them. 784 */ 785 void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, 786 struct folio *folio, u64 start, u32 len) 787 { 788 struct btrfs_subpage *subpage; 789 unsigned long flags; 790 unsigned int start_bit; 791 unsigned int nbits; 792 int ret; 793 794 ASSERT(folio_test_locked(folio)); 795 if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) 796 return; 797 798 subpage = folio_get_private(folio); 799 start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len); 800 nbits = len >> fs_info->sectorsize_bits; 801 spin_lock_irqsave(&subpage->lock, flags); 802 /* Target range should not yet be locked. */ 803 ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits)); 804 bitmap_set(subpage->bitmaps, start_bit, nbits); 805 ret = atomic_add_return(nbits, &subpage->writers); 806 ASSERT(ret <= fs_info->subpage_info->bitmap_nr_bits); 807 spin_unlock_irqrestore(&subpage->lock, flags); 808 } 809 810 /* 811 * Find any subpage writer locked range inside @folio, starting at file offset 812 * @search_start. The caller should ensure the folio is locked. 813 * 814 * Return true and update @found_start_ret and @found_len_ret to the first 815 * writer locked range. 816 * Return false if there is no writer locked range. 817 */ 818 bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info, 819 struct folio *folio, u64 search_start, 820 u64 *found_start_ret, u32 *found_len_ret) 821 { 822 struct btrfs_subpage_info *subpage_info = fs_info->subpage_info; 823 struct btrfs_subpage *subpage = folio_get_private(folio); 824 const unsigned int len = PAGE_SIZE - offset_in_page(search_start); 825 const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio, 826 locked, search_start, len); 827 const unsigned int locked_bitmap_start = subpage_info->locked_offset; 828 const unsigned int locked_bitmap_end = locked_bitmap_start + 829 subpage_info->bitmap_nr_bits; 830 unsigned long flags; 831 int first_zero; 832 int first_set; 833 bool found = false; 834 835 ASSERT(folio_test_locked(folio)); 836 spin_lock_irqsave(&subpage->lock, flags); 837 first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit); 838 if (first_set >= locked_bitmap_end) 839 goto out; 840 841 found = true; 842 843 *found_start_ret = folio_pos(folio) + 844 ((first_set - locked_bitmap_start) << fs_info->sectorsize_bits); 845 /* 846 * Since @first_set is ensured to be smaller than locked_bitmap_end 847 * here, @found_start_ret should be inside the folio. 848 */ 849 ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE); 850 851 first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set); 852 *found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits; 853 out: 854 spin_unlock_irqrestore(&subpage->lock, flags); 855 return found; 856 } 857 858 /* 859 * Unlike btrfs_folio_end_writer_lock() which unlocks a specified subpage range, 860 * this ends all writer locked ranges of a page. 861 * 862 * This is for the locked page of __extent_writepage(), as the locked page 863 * can contain several locked subpage ranges. 864 */ 865 void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio) 866 { 867 struct btrfs_subpage *subpage = folio_get_private(folio); 868 u64 folio_start = folio_pos(folio); 869 u64 cur = folio_start; 870 871 ASSERT(folio_test_locked(folio)); 872 if (!btrfs_is_subpage(fs_info, folio->mapping)) { 873 folio_unlock(folio); 874 return; 875 } 876 877 /* The page has no new delalloc range locked on it. Just plain unlock. */ 878 if (atomic_read(&subpage->writers) == 0) { 879 folio_unlock(folio); 880 return; 881 } 882 while (cur < folio_start + PAGE_SIZE) { 883 u64 found_start; 884 u32 found_len; 885 bool found; 886 bool last; 887 888 found = btrfs_subpage_find_writer_locked(fs_info, folio, cur, 889 &found_start, &found_len); 890 if (!found) 891 break; 892 last = btrfs_subpage_end_and_test_writer(fs_info, folio, 893 found_start, found_len); 894 if (last) { 895 folio_unlock(folio); 896 break; 897 } 898 cur = found_start + found_len; 899 } 900 } 901 902 #define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \ 903 bitmap_cut(dst, subpage->bitmaps, 0, \ 904 subpage_info->name##_offset, subpage_info->bitmap_nr_bits) 905 906 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info, 907 struct folio *folio, u64 start, u32 len) 908 { 909 struct btrfs_subpage_info *subpage_info = fs_info->subpage_info; 910 struct btrfs_subpage *subpage; 911 unsigned long uptodate_bitmap; 912 unsigned long dirty_bitmap; 913 unsigned long writeback_bitmap; 914 unsigned long ordered_bitmap; 915 unsigned long checked_bitmap; 916 unsigned long flags; 917 918 ASSERT(folio_test_private(folio) && folio_get_private(folio)); 919 ASSERT(subpage_info); 920 subpage = folio_get_private(folio); 921 922 spin_lock_irqsave(&subpage->lock, flags); 923 GET_SUBPAGE_BITMAP(subpage, subpage_info, uptodate, &uptodate_bitmap); 924 GET_SUBPAGE_BITMAP(subpage, subpage_info, dirty, &dirty_bitmap); 925 GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap); 926 GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap); 927 GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap); 928 GET_SUBPAGE_BITMAP(subpage, subpage_info, locked, &checked_bitmap); 929 spin_unlock_irqrestore(&subpage->lock, flags); 930 931 dump_page(folio_page(folio, 0), "btrfs subpage dump"); 932 btrfs_warn(fs_info, 933 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl", 934 start, len, folio_pos(folio), 935 subpage_info->bitmap_nr_bits, &uptodate_bitmap, 936 subpage_info->bitmap_nr_bits, &dirty_bitmap, 937 subpage_info->bitmap_nr_bits, &writeback_bitmap, 938 subpage_info->bitmap_nr_bits, &ordered_bitmap, 939 subpage_info->bitmap_nr_bits, &checked_bitmap); 940 } 941