xref: /linux/fs/btrfs/subpage.c (revision 056a5087d87ead77dedbe9cf5bde53b7cd4b4651)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <linux/slab.h>
4 #include "messages.h"
5 #include "subpage.h"
6 #include "btrfs_inode.h"
7 
8 /*
9  * Subpage (block size < folio size) support overview:
10  *
11  * Limitations:
12  *
13  * - Metadata must be fully aligned to node size
14  *   So when nodesize <= page size, the metadata can never cross folio boundaries.
15  *
16  * - Only support blocks per folio <= min(BTRFS_MAX_FOLIO_SIZE / fs block size,
17  *					  BTRFS_MAX_BLOCKS_PER_FOLIO)
18  *   This is to ensure we can afford an on-stack bitmap, without the need to allocate
19  *   bitmap memory at runtime.
20  *
21  * Implementation:
22  *
23  * - Common
24  *   Both metadata and data will use a new structure, btrfs_folio_state, to
25  *   record the status of each sector inside a page.  This provides the extra
26  *   granularity needed.
27  *
28  * - Metadata
29  *   Since we have multiple tree blocks inside one page, we can't rely on page
30  *   locking anymore, or we will have greatly reduced concurrency or even
31  *   deadlocks (hold one tree lock while trying to lock another tree lock in
32  *   the same page).
33  *
34  *   Thus for metadata locking, subpage support relies on io_tree locking only.
35  *   This means a slightly higher tree locking latency.
36  */
37 
38 int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
39 			     struct folio *folio, enum btrfs_folio_type type)
40 {
41 	struct btrfs_folio_state *bfs;
42 
43 	/* For metadata we don't support large folio yet. */
44 	if (type == BTRFS_SUBPAGE_METADATA)
45 		ASSERT(!folio_test_large(folio));
46 
47 	/*
48 	 * We have cases like a dummy extent buffer page, which is not mapped
49 	 * and doesn't need to be locked.
50 	 */
51 	if (folio->mapping)
52 		ASSERT(folio_test_locked(folio));
53 
54 	/* Either not subpage, or the folio already has private attached. */
55 	if (folio_test_private(folio))
56 		return 0;
57 	if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info))
58 		return 0;
59 	if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
60 		return 0;
61 
62 	bfs = btrfs_alloc_folio_state(fs_info, folio_size(folio), type);
63 	if (IS_ERR(bfs))
64 		return PTR_ERR(bfs);
65 
66 	folio_attach_private(folio, bfs);
67 	return 0;
68 }
69 
70 void btrfs_detach_folio_state(const struct btrfs_fs_info *fs_info, struct folio *folio,
71 			      enum btrfs_folio_type type)
72 {
73 	struct btrfs_folio_state *bfs;
74 
75 	/* Either not subpage, or the folio already has private attached. */
76 	if (!folio_test_private(folio))
77 		return;
78 	if (type == BTRFS_SUBPAGE_METADATA && !btrfs_meta_is_subpage(fs_info))
79 		return;
80 	if (type == BTRFS_SUBPAGE_DATA && !btrfs_is_subpage(fs_info, folio))
81 		return;
82 
83 	bfs = folio_detach_private(folio);
84 	ASSERT(bfs);
85 	btrfs_free_folio_state(bfs);
86 }
87 
88 struct btrfs_folio_state *btrfs_alloc_folio_state(const struct btrfs_fs_info *fs_info,
89 						  size_t fsize, enum btrfs_folio_type type)
90 {
91 	struct btrfs_folio_state *ret;
92 	unsigned int real_size;
93 
94 	ASSERT(fs_info->sectorsize < fsize);
95 
96 	real_size = struct_size(ret, bitmaps,
97 			BITS_TO_LONGS(btrfs_bitmap_nr_max *
98 				      (fsize >> fs_info->sectorsize_bits)));
99 	ret = kzalloc(real_size, GFP_NOFS);
100 	if (!ret)
101 		return ERR_PTR(-ENOMEM);
102 
103 	spin_lock_init(&ret->lock);
104 	if (type == BTRFS_SUBPAGE_METADATA)
105 		atomic_set(&ret->eb_refs, 0);
106 	else
107 		atomic_set(&ret->nr_locked, 0);
108 	return ret;
109 }
110 
111 /*
112  * Increase the eb_refs of current subpage.
113  *
114  * This is important for eb allocation, to prevent race with last eb freeing
115  * of the same page.
116  * With the eb_refs increased before the eb inserted into radix tree,
117  * detach_extent_buffer_page() won't detach the folio private while we're still
118  * allocating the extent buffer.
119  */
120 void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
121 {
122 	struct btrfs_folio_state *bfs;
123 
124 	if (!btrfs_meta_is_subpage(fs_info))
125 		return;
126 
127 	ASSERT(folio_test_private(folio) && folio->mapping);
128 	lockdep_assert_held(&folio->mapping->i_private_lock);
129 
130 	bfs = folio_get_private(folio);
131 	atomic_inc(&bfs->eb_refs);
132 }
133 
134 void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
135 {
136 	struct btrfs_folio_state *bfs;
137 
138 	if (!btrfs_meta_is_subpage(fs_info))
139 		return;
140 
141 	ASSERT(folio_test_private(folio) && folio->mapping);
142 	lockdep_assert_held(&folio->mapping->i_private_lock);
143 
144 	bfs = folio_get_private(folio);
145 	ASSERT(atomic_read(&bfs->eb_refs));
146 	atomic_dec(&bfs->eb_refs);
147 }
148 
149 static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
150 				 struct folio *folio, u64 start, u32 len)
151 {
152 	/* Basic checks */
153 	ASSERT(folio_test_private(folio) && folio_get_private(folio));
154 	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
155 	       IS_ALIGNED(len, fs_info->sectorsize), "start=%llu len=%u", start, len);
156 	/*
157 	 * The range check only works for mapped page, we can still have
158 	 * unmapped page like dummy extent buffer pages.
159 	 */
160 	if (folio->mapping)
161 		ASSERT(folio_pos(folio) <= start &&
162 		       start + len <= folio_next_pos(folio),
163 		       "start=%llu len=%u folio_pos=%llu folio_size=%zu",
164 		       start, len, folio_pos(folio), folio_size(folio));
165 }
166 
167 #define subpage_calc_start_bit(fs_info, folio, name, start, len)	\
168 ({									\
169 	unsigned int __start_bit;					\
170 	const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
171 									\
172 	btrfs_subpage_assert(fs_info, folio, start, len);		\
173 	__start_bit = offset_in_folio(folio, start) >> fs_info->sectorsize_bits; \
174 	__start_bit += __bpf * btrfs_bitmap_nr_##name;			\
175 	__start_bit;							\
176 })
177 
178 static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
179 {
180 	u64 orig_start = *start;
181 	u32 orig_len = *len;
182 
183 	*start = max_t(u64, folio_pos(folio), orig_start);
184 	/*
185 	 * For certain call sites like btrfs_drop_pages(), we may have pages
186 	 * beyond the target range. In that case, just set @len to 0, subpage
187 	 * helpers can handle @len == 0 without any problem.
188 	 */
189 	if (folio_pos(folio) >= orig_start + orig_len)
190 		*len = 0;
191 	else
192 		*len = min_t(u64, folio_next_pos(folio), orig_start + orig_len) - *start;
193 }
194 
195 static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
196 					    struct folio *folio, u64 start, u32 len)
197 {
198 	struct btrfs_folio_state *bfs = folio_get_private(folio);
199 	const int nbits = (len >> fs_info->sectorsize_bits);
200 	unsigned long flags;
201 	bool last;
202 
203 	btrfs_subpage_assert(fs_info, folio, start, len);
204 
205 	spin_lock_irqsave(&bfs->lock, flags);
206 	/*
207 	 * We have call sites passing @lock_page into
208 	 * extent_clear_unlock_delalloc() for compression path.
209 	 *
210 	 * This @locked_page is locked by plain lock_page(), thus its
211 	 * subpage::locked is 0.  Handle them in a special way.
212 	 */
213 	if (atomic_read(&bfs->nr_locked) == 0) {
214 		spin_unlock_irqrestore(&bfs->lock, flags);
215 		return true;
216 	}
217 	ASSERT(atomic_read(&bfs->nr_locked) >= nbits,
218 	       "atomic_read(&bfs->nr_locked)=%d nbits=%d",
219 	       atomic_read(&bfs->nr_locked), nbits);
220 	last = atomic_sub_and_test(nbits, &bfs->nr_locked);
221 	spin_unlock_irqrestore(&bfs->lock, flags);
222 	return last;
223 }
224 
225 /*
226  * Handle different locked folios:
227  *
228  * - Non-subpage folio
229  *   Just unlock it.
230  *
231  * - folio locked but without any subpage locked
232  *   This happens either before writepage_delalloc() or the delalloc range is
233  *   already handled by previous folio.
234  *   We can simple unlock it.
235  *
236  * - folio locked with subpage range locked.
237  *   We go through the locked sectors inside the range and clear their locked
238  *   bitmap, reduce the writer lock number, and unlock the page if that's
239  *   the last locked range.
240  */
241 void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
242 			  struct folio *folio, u64 start, u32 len)
243 {
244 	struct btrfs_folio_state *bfs = folio_get_private(folio);
245 
246 	ASSERT(folio_test_locked(folio));
247 
248 	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio)) {
249 		folio_unlock(folio);
250 		return;
251 	}
252 
253 	/*
254 	 * For subpage case, there are two types of locked page.  With or
255 	 * without locked number.
256 	 *
257 	 * Since we own the page lock, no one else could touch subpage::locked
258 	 * and we are safe to do several atomic operations without spinlock.
259 	 */
260 	if (atomic_read(&bfs->nr_locked) == 0) {
261 		/* No subpage lock, locked by plain lock_page(). */
262 		folio_unlock(folio);
263 		return;
264 	}
265 
266 	btrfs_subpage_clamp_range(folio, &start, &len);
267 	if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len))
268 		folio_unlock(folio);
269 }
270 
271 void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
272 				 struct folio *folio, unsigned long *bitmap)
273 {
274 	struct btrfs_folio_state *bfs = folio_get_private(folio);
275 	const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
276 	const unsigned int nbits = bitmap_weight(bitmap, blocks_per_folio);
277 	unsigned long flags;
278 	bool last = false;
279 
280 	if (!btrfs_is_subpage(fs_info, folio)) {
281 		folio_unlock(folio);
282 		return;
283 	}
284 
285 	if (atomic_read(&bfs->nr_locked) == 0) {
286 		/* No subpage lock, locked by plain lock_page(). */
287 		folio_unlock(folio);
288 		return;
289 	}
290 
291 	spin_lock_irqsave(&bfs->lock, flags);
292 	ASSERT(atomic_read(&bfs->nr_locked) >= nbits,
293 	       "atomic_read(&bfs->nr_locked)=%d nbits=%d",
294 	       atomic_read(&bfs->nr_locked), nbits);
295 	last = atomic_sub_and_test(nbits, &bfs->nr_locked);
296 	spin_unlock_irqrestore(&bfs->lock, flags);
297 	if (last)
298 		folio_unlock(folio);
299 }
300 
301 #define subpage_test_bitmap_all_set(fs_info, folio, name)		\
302 ({									\
303 	struct btrfs_folio_state *__bfs = folio_get_private(folio);	\
304 	const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
305 									\
306 	bitmap_test_range_all_set(__bfs->bitmaps,			\
307 				  __bpf * btrfs_bitmap_nr_##name, __bpf); \
308 })
309 
310 #define subpage_test_bitmap_all_zero(fs_info, folio, name)		\
311 ({									\
312 	struct btrfs_folio_state *__bfs = folio_get_private(folio);	\
313 	const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio); \
314 									\
315 	bitmap_test_range_all_zero(__bfs->bitmaps,			\
316 				   __bpf * btrfs_bitmap_nr_##name, __bpf); \
317 })
318 
319 void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
320 				struct folio *folio, u64 start, u32 len)
321 {
322 	struct btrfs_folio_state *bfs = folio_get_private(folio);
323 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
324 							uptodate, start, len);
325 	unsigned long flags;
326 
327 	spin_lock_irqsave(&bfs->lock, flags);
328 	bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
329 	if (subpage_test_bitmap_all_set(fs_info, folio, uptodate))
330 		folio_mark_uptodate(folio);
331 	spin_unlock_irqrestore(&bfs->lock, flags);
332 }
333 
334 void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
335 				  struct folio *folio, u64 start, u32 len)
336 {
337 	struct btrfs_folio_state *bfs = folio_get_private(folio);
338 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
339 							uptodate, start, len);
340 	unsigned long flags;
341 
342 	spin_lock_irqsave(&bfs->lock, flags);
343 	bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
344 	folio_clear_uptodate(folio);
345 	spin_unlock_irqrestore(&bfs->lock, flags);
346 }
347 
348 void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
349 			     struct folio *folio, u64 start, u32 len)
350 {
351 	struct btrfs_folio_state *bfs = folio_get_private(folio);
352 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
353 							dirty, start, len);
354 	unsigned long flags;
355 
356 	spin_lock_irqsave(&bfs->lock, flags);
357 	bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
358 	spin_unlock_irqrestore(&bfs->lock, flags);
359 	folio_mark_dirty(folio);
360 }
361 
362 /*
363  * Extra clear_and_test function for subpage dirty bitmap.
364  *
365  * Return true if we're the last bits in the dirty_bitmap and clear the
366  * dirty_bitmap.
367  * Return false otherwise.
368  *
369  * NOTE: Callers should manually clear page dirty for true case, as we have
370  * extra handling for tree blocks.
371  */
372 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
373 					struct folio *folio, u64 start, u32 len)
374 {
375 	struct btrfs_folio_state *bfs = folio_get_private(folio);
376 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
377 							dirty, start, len);
378 	unsigned long flags;
379 	bool last = false;
380 
381 	spin_lock_irqsave(&bfs->lock, flags);
382 	bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
383 	if (subpage_test_bitmap_all_zero(fs_info, folio, dirty))
384 		last = true;
385 	spin_unlock_irqrestore(&bfs->lock, flags);
386 	return last;
387 }
388 
389 void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info,
390 			       struct folio *folio, u64 start, u32 len)
391 {
392 	bool last;
393 
394 	last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len);
395 	if (last)
396 		folio_clear_dirty_for_io(folio);
397 }
398 
399 void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
400 				 struct folio *folio, u64 start, u32 len)
401 {
402 	struct btrfs_folio_state *bfs = folio_get_private(folio);
403 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
404 							writeback, start, len);
405 	unsigned long flags;
406 	bool keep_write;
407 
408 	spin_lock_irqsave(&bfs->lock, flags);
409 	bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
410 
411 	/*
412 	 * Don't clear the TOWRITE tag when starting writeback on a still-dirty
413 	 * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it,
414 	 * assume writeback is complete, and exit too early — violating sync
415 	 * ordering guarantees.
416 	 */
417 	keep_write = folio_test_dirty(folio);
418 	if (!folio_test_writeback(folio))
419 		__folio_start_writeback(folio, keep_write);
420 	spin_unlock_irqrestore(&bfs->lock, flags);
421 }
422 
423 void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
424 				   struct folio *folio, u64 start, u32 len)
425 {
426 	struct btrfs_folio_state *bfs = folio_get_private(folio);
427 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
428 							writeback, start, len);
429 	unsigned long flags;
430 
431 	spin_lock_irqsave(&bfs->lock, flags);
432 	bitmap_clear(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
433 	if (subpage_test_bitmap_all_zero(fs_info, folio, writeback)) {
434 		ASSERT(folio_test_writeback(folio));
435 		folio_end_writeback(folio);
436 	}
437 	spin_unlock_irqrestore(&bfs->lock, flags);
438 }
439 
440 /*
441  * Unlike set/clear which is dependent on each page status, for test all bits
442  * are tested in the same way.
443  */
444 #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name)				\
445 bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info,	\
446 			       struct folio *folio, u64 start, u32 len)	\
447 {									\
448 	struct btrfs_folio_state *bfs = folio_get_private(folio);	\
449 	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,	\
450 						name, start, len);	\
451 	unsigned long flags;						\
452 	bool ret;							\
453 									\
454 	spin_lock_irqsave(&bfs->lock, flags);			\
455 	ret = bitmap_test_range_all_set(bfs->bitmaps, start_bit,	\
456 				len >> fs_info->sectorsize_bits);	\
457 	spin_unlock_irqrestore(&bfs->lock, flags);			\
458 	return ret;							\
459 }
460 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
461 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
462 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
463 
464 /*
465  * Note that, in selftests (extent-io-tests), we can have empty fs_info passed
466  * in.  We only test sectorsize == PAGE_SIZE cases so far, thus we can fall
467  * back to regular sectorsize branch.
468  */
469 #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func,			\
470 				 folio_clear_func, folio_test_func)	\
471 void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info,	\
472 			    struct folio *folio, u64 start, u32 len)	\
473 {									\
474 	if (unlikely(!fs_info) ||					\
475 	    !btrfs_is_subpage(fs_info, folio)) {			\
476 		folio_set_func(folio);					\
477 		return;							\
478 	}								\
479 	btrfs_subpage_set_##name(fs_info, folio, start, len);		\
480 }									\
481 void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info,	\
482 			      struct folio *folio, u64 start, u32 len)	\
483 {									\
484 	if (unlikely(!fs_info) ||					\
485 	    !btrfs_is_subpage(fs_info, folio)) {			\
486 		folio_clear_func(folio);				\
487 		return;							\
488 	}								\
489 	btrfs_subpage_clear_##name(fs_info, folio, start, len);		\
490 }									\
491 bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info,	\
492 			     struct folio *folio, u64 start, u32 len)	\
493 {									\
494 	if (unlikely(!fs_info) ||					\
495 	    !btrfs_is_subpage(fs_info, folio))				\
496 		return folio_test_func(folio);				\
497 	return btrfs_subpage_test_##name(fs_info, folio, start, len);	\
498 }									\
499 void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info,	\
500 				  struct folio *folio, u64 start, u32 len) \
501 {									\
502 	if (unlikely(!fs_info) ||					\
503 	    !btrfs_is_subpage(fs_info, folio)) {			\
504 		folio_set_func(folio);					\
505 		return;							\
506 	}								\
507 	btrfs_subpage_clamp_range(folio, &start, &len);			\
508 	btrfs_subpage_set_##name(fs_info, folio, start, len);		\
509 }									\
510 void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
511 				    struct folio *folio, u64 start, u32 len) \
512 {									\
513 	if (unlikely(!fs_info) ||					\
514 	    !btrfs_is_subpage(fs_info, folio)) {			\
515 		folio_clear_func(folio);				\
516 		return;							\
517 	}								\
518 	btrfs_subpage_clamp_range(folio, &start, &len);			\
519 	btrfs_subpage_clear_##name(fs_info, folio, start, len);		\
520 }									\
521 bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info,	\
522 				   struct folio *folio, u64 start, u32 len) \
523 {									\
524 	if (unlikely(!fs_info) ||					\
525 	    !btrfs_is_subpage(fs_info, folio))				\
526 		return folio_test_func(folio);				\
527 	btrfs_subpage_clamp_range(folio, &start, &len);			\
528 	return btrfs_subpage_test_##name(fs_info, folio, start, len);	\
529 }									\
530 void btrfs_meta_folio_set_##name(struct folio *folio, const struct extent_buffer *eb) \
531 {									\
532 	if (!btrfs_meta_is_subpage(eb->fs_info)) {			\
533 		folio_set_func(folio);					\
534 		return;							\
535 	}								\
536 	btrfs_subpage_set_##name(eb->fs_info, folio, eb->start, eb->len); \
537 }									\
538 void btrfs_meta_folio_clear_##name(struct folio *folio, const struct extent_buffer *eb) \
539 {									\
540 	if (!btrfs_meta_is_subpage(eb->fs_info)) {			\
541 		folio_clear_func(folio);				\
542 		return;							\
543 	}								\
544 	btrfs_subpage_clear_##name(eb->fs_info, folio, eb->start, eb->len); \
545 }									\
546 bool btrfs_meta_folio_test_##name(struct folio *folio, const struct extent_buffer *eb) \
547 {									\
548 	if (!btrfs_meta_is_subpage(eb->fs_info))			\
549 		return folio_test_func(folio);				\
550 	return btrfs_subpage_test_##name(eb->fs_info, folio, eb->start, eb->len); \
551 }
552 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate,
553 			 folio_test_uptodate);
554 IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io,
555 			 folio_test_dirty);
556 IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback,
557 			 folio_test_writeback);
558 
559 #define DEFINE_GET_SUBPAGE_BITMAP(name)						\
560 static inline unsigned long get_bitmap_value_##name(				\
561 					const struct btrfs_fs_info *fs_info,	\
562 					struct folio *folio)			\
563 {										\
564 	const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio);	\
565 	const struct btrfs_folio_state *__bfs = folio_get_private(folio);	\
566 	unsigned long value;							\
567 										\
568 	ASSERT(__bpf <= BITS_PER_LONG);						\
569 	value = bitmap_read(__bfs->bitmaps, __bpf * btrfs_bitmap_nr_##name,	\
570 			     __bpf);						\
571 	return value;								\
572 }										\
573 static inline const unsigned long *get_bitmap_pointer_##name(			\
574 					const struct btrfs_fs_info *fs_info,	\
575 					struct folio *folio)			\
576 {										\
577 	const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio);	\
578 	struct btrfs_folio_state *__bfs = folio_get_private(folio);		\
579 	unsigned long *pointer;							\
580 										\
581 	ASSERT(__bpf >= BITS_PER_LONG);						\
582 	ASSERT(IS_ALIGNED(__bpf, BITS_PER_LONG));				\
583 	pointer = __bfs->bitmaps + (BIT_WORD(__bpf) * btrfs_bitmap_nr_##name);	\
584 	return pointer;								\
585 }
586 
587 DEFINE_GET_SUBPAGE_BITMAP(uptodate);
588 DEFINE_GET_SUBPAGE_BITMAP(dirty);
589 DEFINE_GET_SUBPAGE_BITMAP(writeback);
590 
591 #define SUBPAGE_DUMP_BITMAP(fs_info, folio, name, start, len)			\
592 {										\
593 	const unsigned int __bpf = btrfs_blocks_per_folio(fs_info, folio);	\
594 										\
595 	if (__bpf <= BITS_PER_LONG) {						\
596 		unsigned long bitmap = get_bitmap_value_##name(fs_info, folio);	\
597 										\
598 		btrfs_warn(fs_info,						\
599 	"dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl",	\
600 		   start, len, folio_pos(folio), __bpf, &bitmap);		\
601 	} else {								\
602 		btrfs_warn(fs_info,						\
603 	"dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl",	\
604 		   start, len, folio_pos(folio), __bpf,				\
605 		   get_bitmap_pointer_##name(fs_info, folio));			\
606 	}									\
607 }
608 
609 /*
610  * Make sure not only the page dirty bit is cleared, but also subpage dirty bit
611  * is cleared.
612  */
613 void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
614 				  struct folio *folio, u64 start, u32 len)
615 {
616 	struct btrfs_folio_state *bfs;
617 	unsigned int start_bit;
618 	unsigned int nbits;
619 	unsigned long flags;
620 
621 	if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
622 		return;
623 
624 	if (!btrfs_is_subpage(fs_info, folio)) {
625 		ASSERT(!folio_test_dirty(folio));
626 		return;
627 	}
628 
629 	start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
630 	nbits = len >> fs_info->sectorsize_bits;
631 	bfs = folio_get_private(folio);
632 	ASSERT(bfs);
633 	spin_lock_irqsave(&bfs->lock, flags);
634 	if (unlikely(!bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits))) {
635 		SUBPAGE_DUMP_BITMAP(fs_info, folio, dirty, start, len);
636 		ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
637 	}
638 	ASSERT(bitmap_test_range_all_zero(bfs->bitmaps, start_bit, nbits));
639 	spin_unlock_irqrestore(&bfs->lock, flags);
640 }
641 
642 /*
643  * This is for folio already locked by plain lock_page()/folio_lock(), which
644  * doesn't have any subpage awareness.
645  *
646  * This populates the involved subpage ranges so that subpage helpers can
647  * properly unlock them.
648  */
649 void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
650 			  struct folio *folio, u64 start, u32 len)
651 {
652 	struct btrfs_folio_state *bfs;
653 	unsigned long flags;
654 	unsigned int nbits;
655 	int ret;
656 
657 	ASSERT(folio_test_locked(folio));
658 	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio))
659 		return;
660 
661 	bfs = folio_get_private(folio);
662 	nbits = len >> fs_info->sectorsize_bits;
663 	spin_lock_irqsave(&bfs->lock, flags);
664 	ret = atomic_add_return(nbits, &bfs->nr_locked);
665 	ASSERT(ret <= btrfs_blocks_per_folio(fs_info, folio));
666 	spin_unlock_irqrestore(&bfs->lock, flags);
667 }
668 
669 /*
670  * Clear the dirty flag for the folio.
671  *
672  * If the affected folio is no longer dirty, return true. Otherwise return false.
673  */
674 bool btrfs_meta_folio_clear_and_test_dirty(struct folio *folio, const struct extent_buffer *eb)
675 {
676 	bool last;
677 
678 	if (!btrfs_meta_is_subpage(eb->fs_info)) {
679 		folio_clear_dirty_for_io(folio);
680 		return true;
681 	}
682 
683 	last = btrfs_subpage_clear_and_test_dirty(eb->fs_info, folio, eb->start, eb->len);
684 	if (last) {
685 		folio_clear_dirty_for_io(folio);
686 		return true;
687 	}
688 	return false;
689 }
690 
691 void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
692 				      struct folio *folio, u64 start, u32 len)
693 {
694 	struct btrfs_folio_state *bfs;
695 	const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
696 	unsigned long flags;
697 
698 	ASSERT(folio_test_private(folio) && folio_get_private(folio));
699 	ASSERT(blocks_per_folio > 1);
700 	bfs = folio_get_private(folio);
701 
702 	dump_page(folio_page(folio, 0), "btrfs folio state dump");
703 
704 	if (blocks_per_folio <= BITS_PER_LONG) {
705 		unsigned long uptodate;
706 		unsigned long dirty;
707 		unsigned long writeback;
708 
709 		spin_lock_irqsave(&bfs->lock, flags);
710 		uptodate = get_bitmap_value_uptodate(fs_info, folio);
711 		dirty = get_bitmap_value_dirty(fs_info, folio);
712 		writeback = get_bitmap_value_writeback(fs_info, folio);
713 
714 		spin_unlock_irqrestore(&bfs->lock, flags);
715 
716 		btrfs_warn(fs_info,
717 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl",
718 			    start, len, folio_pos(folio),
719 			    blocks_per_folio, &uptodate,
720 			    blocks_per_folio, &dirty,
721 			    blocks_per_folio, &writeback);
722 		return;
723 	}
724 
725 	spin_lock_irqsave(&bfs->lock, flags);
726 	btrfs_warn(fs_info,
727 "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl",
728 		    start, len, folio_pos(folio),
729 		    blocks_per_folio, get_bitmap_pointer_uptodate(fs_info, folio),
730 		    blocks_per_folio, get_bitmap_pointer_dirty(fs_info, folio),
731 		    blocks_per_folio, get_bitmap_pointer_writeback(fs_info, folio));
732 	spin_unlock_irqrestore(&bfs->lock, flags);
733 }
734 
735 void btrfs_copy_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
736 				     struct folio *folio,
737 				     unsigned long *dst)
738 {
739 	struct btrfs_folio_state *bfs;
740 	const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
741 	unsigned long flags;
742 	unsigned long value;
743 
744 	if (blocks_per_folio == 1) {
745 		value = 1;
746 		bitmap_copy(dst, &value, 1);
747 		return;
748 	}
749 
750 	ASSERT(folio_test_private(folio) && folio_get_private(folio));
751 	ASSERT(blocks_per_folio > 1);
752 	bfs = folio_get_private(folio);
753 
754 	if (blocks_per_folio <= BITS_PER_LONG) {
755 		spin_lock_irqsave(&bfs->lock, flags);
756 		value = bitmap_read(bfs->bitmaps, btrfs_bitmap_nr_dirty * blocks_per_folio,
757 				    blocks_per_folio);
758 		spin_unlock_irqrestore(&bfs->lock, flags);
759 		bitmap_copy(dst, &value, blocks_per_folio);
760 		return;
761 	}
762 	spin_lock_irqsave(&bfs->lock, flags);
763 	bitmap_copy(dst, get_bitmap_pointer_dirty(fs_info, folio),
764 		    blocks_per_folio);
765 	spin_unlock_irqrestore(&bfs->lock, flags);
766 }
767