xref: /linux/fs/bcachefs/fs-io-pagecache.c (revision d163d60258c755845cbc9cfe0e45fca71e649488)
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_FS
3 
4 #include "bcachefs.h"
5 #include "btree_iter.h"
6 #include "extents.h"
7 #include "fs-io.h"
8 #include "fs-io-pagecache.h"
9 #include "subvolume.h"
10 
11 #include <linux/pagevec.h>
12 #include <linux/writeback.h>
13 
14 int bch2_filemap_get_contig_folios_d(struct address_space *mapping,
15 				     loff_t start, u64 end,
16 				     fgf_t fgp_flags, gfp_t gfp,
17 				     folios *fs)
18 {
19 	struct folio *f;
20 	u64 pos = start;
21 	int ret = 0;
22 
23 	while (pos < end) {
24 		if ((u64) pos >= (u64) start + (1ULL << 20))
25 			fgp_flags &= ~FGP_CREAT;
26 
27 		ret = darray_make_room_gfp(fs, 1, gfp & GFP_KERNEL);
28 		if (ret)
29 			break;
30 
31 		f = __filemap_get_folio(mapping, pos >> PAGE_SHIFT, fgp_flags, gfp);
32 		if (IS_ERR_OR_NULL(f))
33 			break;
34 
35 		BUG_ON(fs->nr && folio_pos(f) != pos);
36 
37 		pos = folio_end_pos(f);
38 		darray_push(fs, f);
39 	}
40 
41 	if (!fs->nr && !ret && (fgp_flags & FGP_CREAT))
42 		ret = -ENOMEM;
43 
44 	return fs->nr ? 0 : ret;
45 }
46 
47 /* pagecache_block must be held */
48 int bch2_write_invalidate_inode_pages_range(struct address_space *mapping,
49 					    loff_t start, loff_t end)
50 {
51 	int ret;
52 
53 	/*
54 	 * XXX: the way this is currently implemented, we can spin if a process
55 	 * is continually redirtying a specific page
56 	 */
57 	do {
58 		if (!mapping->nrpages)
59 			return 0;
60 
61 		ret = filemap_write_and_wait_range(mapping, start, end);
62 		if (ret)
63 			break;
64 
65 		if (!mapping->nrpages)
66 			return 0;
67 
68 		ret = invalidate_inode_pages2_range(mapping,
69 				start >> PAGE_SHIFT,
70 				end >> PAGE_SHIFT);
71 	} while (ret == -EBUSY);
72 
73 	return ret;
74 }
75 
76 #if 0
77 /* Useful for debug tracing: */
78 static const char * const bch2_folio_sector_states[] = {
79 #define x(n)	#n,
80 	BCH_FOLIO_SECTOR_STATE()
81 #undef x
82 	NULL
83 };
84 #endif
85 
86 static inline enum bch_folio_sector_state
87 folio_sector_dirty(enum bch_folio_sector_state state)
88 {
89 	switch (state) {
90 	case SECTOR_unallocated:
91 		return SECTOR_dirty;
92 	case SECTOR_reserved:
93 		return SECTOR_dirty_reserved;
94 	default:
95 		return state;
96 	}
97 }
98 
99 static inline enum bch_folio_sector_state
100 folio_sector_undirty(enum bch_folio_sector_state state)
101 {
102 	switch (state) {
103 	case SECTOR_dirty:
104 		return SECTOR_unallocated;
105 	case SECTOR_dirty_reserved:
106 		return SECTOR_reserved;
107 	default:
108 		return state;
109 	}
110 }
111 
112 static inline enum bch_folio_sector_state
113 folio_sector_reserve(enum bch_folio_sector_state state)
114 {
115 	switch (state) {
116 	case SECTOR_unallocated:
117 		return SECTOR_reserved;
118 	case SECTOR_dirty:
119 		return SECTOR_dirty_reserved;
120 	default:
121 		return state;
122 	}
123 }
124 
125 /* for newly allocated folios: */
126 struct bch_folio *__bch2_folio_create(struct folio *folio, gfp_t gfp)
127 {
128 	struct bch_folio *s;
129 
130 	s = kzalloc(sizeof(*s) +
131 		    sizeof(struct bch_folio_sector) *
132 		    folio_sectors(folio), gfp);
133 	if (!s)
134 		return NULL;
135 
136 	spin_lock_init(&s->lock);
137 	folio_attach_private(folio, s);
138 	return s;
139 }
140 
141 struct bch_folio *bch2_folio_create(struct folio *folio, gfp_t gfp)
142 {
143 	return bch2_folio(folio) ?: __bch2_folio_create(folio, gfp);
144 }
145 
146 static unsigned bkey_to_sector_state(struct bkey_s_c k)
147 {
148 	if (bkey_extent_is_reservation(k))
149 		return SECTOR_reserved;
150 	if (bkey_extent_is_allocation(k.k))
151 		return SECTOR_allocated;
152 	return SECTOR_unallocated;
153 }
154 
155 static void __bch2_folio_set(struct folio *folio,
156 			     unsigned pg_offset, unsigned pg_len,
157 			     unsigned nr_ptrs, unsigned state)
158 {
159 	struct bch_folio *s = bch2_folio(folio);
160 	unsigned i, sectors = folio_sectors(folio);
161 
162 	BUG_ON(pg_offset >= sectors);
163 	BUG_ON(pg_offset + pg_len > sectors);
164 
165 	spin_lock(&s->lock);
166 
167 	for (i = pg_offset; i < pg_offset + pg_len; i++) {
168 		s->s[i].nr_replicas	= nr_ptrs;
169 		bch2_folio_sector_set(folio, s, i, state);
170 	}
171 
172 	if (i == sectors)
173 		s->uptodate = true;
174 
175 	spin_unlock(&s->lock);
176 }
177 
178 /*
179  * Initialize bch_folio state (allocated/unallocated, nr_replicas) from the
180  * extents btree:
181  */
182 int bch2_folio_set(struct bch_fs *c, subvol_inum inum,
183 		   struct folio **fs, unsigned nr_folios)
184 {
185 	struct btree_trans *trans;
186 	struct btree_iter iter;
187 	struct bkey_s_c k;
188 	struct bch_folio *s;
189 	u64 offset = folio_sector(fs[0]);
190 	unsigned folio_idx;
191 	u32 snapshot;
192 	bool need_set = false;
193 	int ret;
194 
195 	for (folio_idx = 0; folio_idx < nr_folios; folio_idx++) {
196 		s = bch2_folio_create(fs[folio_idx], GFP_KERNEL);
197 		if (!s)
198 			return -ENOMEM;
199 
200 		need_set |= !s->uptodate;
201 	}
202 
203 	if (!need_set)
204 		return 0;
205 
206 	folio_idx = 0;
207 	trans = bch2_trans_get(c);
208 retry:
209 	bch2_trans_begin(trans);
210 
211 	ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
212 	if (ret)
213 		goto err;
214 
215 	for_each_btree_key_norestart(trans, iter, BTREE_ID_extents,
216 			   SPOS(inum.inum, offset, snapshot),
217 			   BTREE_ITER_slots, k, ret) {
218 		unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated(k);
219 		unsigned state = bkey_to_sector_state(k);
220 
221 		while (folio_idx < nr_folios) {
222 			struct folio *folio = fs[folio_idx];
223 			u64 folio_start	= folio_sector(folio);
224 			u64 folio_end	= folio_end_sector(folio);
225 			unsigned folio_offset = max(bkey_start_offset(k.k), folio_start) -
226 				folio_start;
227 			unsigned folio_len = min(k.k->p.offset, folio_end) -
228 				folio_offset - folio_start;
229 
230 			BUG_ON(k.k->p.offset < folio_start);
231 			BUG_ON(bkey_start_offset(k.k) > folio_end);
232 
233 			if (!bch2_folio(folio)->uptodate)
234 				__bch2_folio_set(folio, folio_offset, folio_len, nr_ptrs, state);
235 
236 			if (k.k->p.offset < folio_end)
237 				break;
238 			folio_idx++;
239 		}
240 
241 		if (folio_idx == nr_folios)
242 			break;
243 	}
244 
245 	offset = iter.pos.offset;
246 	bch2_trans_iter_exit(trans, &iter);
247 err:
248 	if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
249 		goto retry;
250 	bch2_trans_put(trans);
251 
252 	return ret;
253 }
254 
255 void bch2_bio_page_state_set(struct bio *bio, struct bkey_s_c k)
256 {
257 	struct bvec_iter iter;
258 	struct folio_vec fv;
259 	unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
260 		? 0 : bch2_bkey_nr_ptrs_fully_allocated(k);
261 	unsigned state = bkey_to_sector_state(k);
262 
263 	bio_for_each_folio(fv, bio, iter)
264 		__bch2_folio_set(fv.fv_folio,
265 				 fv.fv_offset >> 9,
266 				 fv.fv_len >> 9,
267 				 nr_ptrs, state);
268 }
269 
270 void bch2_mark_pagecache_unallocated(struct bch_inode_info *inode,
271 				     u64 start, u64 end)
272 {
273 	pgoff_t index = start >> PAGE_SECTORS_SHIFT;
274 	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
275 	struct folio_batch fbatch;
276 	unsigned i, j;
277 
278 	if (end <= start)
279 		return;
280 
281 	folio_batch_init(&fbatch);
282 
283 	while (filemap_get_folios(inode->v.i_mapping,
284 				  &index, end_index, &fbatch)) {
285 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
286 			struct folio *folio = fbatch.folios[i];
287 			u64 folio_start = folio_sector(folio);
288 			u64 folio_end = folio_end_sector(folio);
289 			unsigned folio_offset = max(start, folio_start) - folio_start;
290 			unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
291 			struct bch_folio *s;
292 
293 			BUG_ON(end <= folio_start);
294 
295 			folio_lock(folio);
296 			s = bch2_folio(folio);
297 
298 			if (s) {
299 				spin_lock(&s->lock);
300 				for (j = folio_offset; j < folio_offset + folio_len; j++)
301 					s->s[j].nr_replicas = 0;
302 				spin_unlock(&s->lock);
303 			}
304 
305 			folio_unlock(folio);
306 		}
307 		folio_batch_release(&fbatch);
308 		cond_resched();
309 	}
310 }
311 
312 int bch2_mark_pagecache_reserved(struct bch_inode_info *inode,
313 				 u64 *start, u64 end,
314 				 bool nonblocking)
315 {
316 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
317 	pgoff_t index = *start >> PAGE_SECTORS_SHIFT;
318 	pgoff_t end_index = (end - 1) >> PAGE_SECTORS_SHIFT;
319 	struct folio_batch fbatch;
320 	s64 i_sectors_delta = 0;
321 	int ret = 0;
322 
323 	if (end <= *start)
324 		return 0;
325 
326 	folio_batch_init(&fbatch);
327 
328 	while (filemap_get_folios(inode->v.i_mapping,
329 				  &index, end_index, &fbatch)) {
330 		for (unsigned i = 0; i < folio_batch_count(&fbatch); i++) {
331 			struct folio *folio = fbatch.folios[i];
332 
333 			if (!nonblocking)
334 				folio_lock(folio);
335 			else if (!folio_trylock(folio)) {
336 				folio_batch_release(&fbatch);
337 				ret = -EAGAIN;
338 				break;
339 			}
340 
341 			u64 folio_start = folio_sector(folio);
342 			u64 folio_end = folio_end_sector(folio);
343 
344 			BUG_ON(end <= folio_start);
345 
346 			*start = min(end, folio_end);
347 
348 			struct bch_folio *s = bch2_folio(folio);
349 			if (s) {
350 				unsigned folio_offset = max(*start, folio_start) - folio_start;
351 				unsigned folio_len = min(end, folio_end) - folio_offset - folio_start;
352 
353 				spin_lock(&s->lock);
354 				for (unsigned j = folio_offset; j < folio_offset + folio_len; j++) {
355 					i_sectors_delta -= s->s[j].state == SECTOR_dirty;
356 					bch2_folio_sector_set(folio, s, j,
357 						folio_sector_reserve(s->s[j].state));
358 				}
359 				spin_unlock(&s->lock);
360 			}
361 
362 			folio_unlock(folio);
363 		}
364 		folio_batch_release(&fbatch);
365 		cond_resched();
366 	}
367 
368 	bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
369 	return ret;
370 }
371 
372 static inline unsigned sectors_to_reserve(struct bch_folio_sector *s,
373 					  unsigned nr_replicas)
374 {
375 	return max(0, (int) nr_replicas -
376 		   s->nr_replicas -
377 		   s->replicas_reserved);
378 }
379 
380 int bch2_get_folio_disk_reservation(struct bch_fs *c,
381 				struct bch_inode_info *inode,
382 				struct folio *folio, bool check_enospc)
383 {
384 	struct bch_folio *s = bch2_folio_create(folio, 0);
385 	unsigned nr_replicas = inode_nr_replicas(c, inode);
386 	struct disk_reservation disk_res = { 0 };
387 	unsigned i, sectors = folio_sectors(folio), disk_res_sectors = 0;
388 	int ret;
389 
390 	if (!s)
391 		return -ENOMEM;
392 
393 	for (i = 0; i < sectors; i++)
394 		disk_res_sectors += sectors_to_reserve(&s->s[i], nr_replicas);
395 
396 	if (!disk_res_sectors)
397 		return 0;
398 
399 	ret = bch2_disk_reservation_get(c, &disk_res,
400 					disk_res_sectors, 1,
401 					!check_enospc
402 					? BCH_DISK_RESERVATION_NOFAIL
403 					: 0);
404 	if (unlikely(ret))
405 		return ret;
406 
407 	for (i = 0; i < sectors; i++)
408 		s->s[i].replicas_reserved +=
409 			sectors_to_reserve(&s->s[i], nr_replicas);
410 
411 	return 0;
412 }
413 
414 void bch2_folio_reservation_put(struct bch_fs *c,
415 			struct bch_inode_info *inode,
416 			struct bch2_folio_reservation *res)
417 {
418 	bch2_disk_reservation_put(c, &res->disk);
419 	bch2_quota_reservation_put(c, inode, &res->quota);
420 }
421 
422 int bch2_folio_reservation_get(struct bch_fs *c,
423 			struct bch_inode_info *inode,
424 			struct folio *folio,
425 			struct bch2_folio_reservation *res,
426 			unsigned offset, unsigned len)
427 {
428 	struct bch_folio *s = bch2_folio_create(folio, 0);
429 	unsigned i, disk_sectors = 0, quota_sectors = 0;
430 	int ret;
431 
432 	if (!s)
433 		return -ENOMEM;
434 
435 	BUG_ON(!s->uptodate);
436 
437 	for (i = round_down(offset, block_bytes(c)) >> 9;
438 	     i < round_up(offset + len, block_bytes(c)) >> 9;
439 	     i++) {
440 		disk_sectors += sectors_to_reserve(&s->s[i],
441 						res->disk.nr_replicas);
442 		quota_sectors += s->s[i].state == SECTOR_unallocated;
443 	}
444 
445 	if (disk_sectors) {
446 		ret = bch2_disk_reservation_add(c, &res->disk, disk_sectors, 0);
447 		if (unlikely(ret))
448 			return ret;
449 	}
450 
451 	if (quota_sectors) {
452 		ret = bch2_quota_reservation_add(c, inode, &res->quota,
453 						 quota_sectors, true);
454 		if (unlikely(ret)) {
455 			struct disk_reservation tmp = {
456 				.sectors = disk_sectors
457 			};
458 
459 			bch2_disk_reservation_put(c, &tmp);
460 			res->disk.sectors -= disk_sectors;
461 			return ret;
462 		}
463 	}
464 
465 	return 0;
466 }
467 
468 static void bch2_clear_folio_bits(struct folio *folio)
469 {
470 	struct bch_inode_info *inode = to_bch_ei(folio->mapping->host);
471 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
472 	struct bch_folio *s = bch2_folio(folio);
473 	struct disk_reservation disk_res = { 0 };
474 	int i, sectors = folio_sectors(folio), dirty_sectors = 0;
475 
476 	if (!s)
477 		return;
478 
479 	EBUG_ON(!folio_test_locked(folio));
480 	EBUG_ON(folio_test_writeback(folio));
481 
482 	for (i = 0; i < sectors; i++) {
483 		disk_res.sectors += s->s[i].replicas_reserved;
484 		s->s[i].replicas_reserved = 0;
485 
486 		dirty_sectors -= s->s[i].state == SECTOR_dirty;
487 		bch2_folio_sector_set(folio, s, i, folio_sector_undirty(s->s[i].state));
488 	}
489 
490 	bch2_disk_reservation_put(c, &disk_res);
491 
492 	bch2_i_sectors_acct(c, inode, NULL, dirty_sectors);
493 
494 	bch2_folio_release(folio);
495 }
496 
497 void bch2_set_folio_dirty(struct bch_fs *c,
498 			  struct bch_inode_info *inode,
499 			  struct folio *folio,
500 			  struct bch2_folio_reservation *res,
501 			  unsigned offset, unsigned len)
502 {
503 	struct bch_folio *s = bch2_folio(folio);
504 	unsigned i, dirty_sectors = 0;
505 
506 	WARN_ON((u64) folio_pos(folio) + offset + len >
507 		round_up((u64) i_size_read(&inode->v), block_bytes(c)));
508 
509 	BUG_ON(!s->uptodate);
510 
511 	spin_lock(&s->lock);
512 
513 	for (i = round_down(offset, block_bytes(c)) >> 9;
514 	     i < round_up(offset + len, block_bytes(c)) >> 9;
515 	     i++) {
516 		unsigned sectors = sectors_to_reserve(&s->s[i],
517 						res->disk.nr_replicas);
518 
519 		/*
520 		 * This can happen if we race with the error path in
521 		 * bch2_writepage_io_done():
522 		 */
523 		sectors = min_t(unsigned, sectors, res->disk.sectors);
524 
525 		s->s[i].replicas_reserved += sectors;
526 		res->disk.sectors -= sectors;
527 
528 		dirty_sectors += s->s[i].state == SECTOR_unallocated;
529 
530 		bch2_folio_sector_set(folio, s, i, folio_sector_dirty(s->s[i].state));
531 	}
532 
533 	spin_unlock(&s->lock);
534 
535 	bch2_i_sectors_acct(c, inode, &res->quota, dirty_sectors);
536 
537 	if (!folio_test_dirty(folio))
538 		filemap_dirty_folio(inode->v.i_mapping, folio);
539 }
540 
541 vm_fault_t bch2_page_fault(struct vm_fault *vmf)
542 {
543 	struct file *file = vmf->vma->vm_file;
544 	struct address_space *mapping = file->f_mapping;
545 	struct address_space *fdm = faults_disabled_mapping();
546 	struct bch_inode_info *inode = file_bch_inode(file);
547 	vm_fault_t ret;
548 
549 	if (fdm == mapping)
550 		return VM_FAULT_SIGBUS;
551 
552 	/* Lock ordering: */
553 	if (fdm > mapping) {
554 		struct bch_inode_info *fdm_host = to_bch_ei(fdm->host);
555 
556 		if (bch2_pagecache_add_tryget(inode))
557 			goto got_lock;
558 
559 		bch2_pagecache_block_put(fdm_host);
560 
561 		bch2_pagecache_add_get(inode);
562 		bch2_pagecache_add_put(inode);
563 
564 		bch2_pagecache_block_get(fdm_host);
565 
566 		/* Signal that lock has been dropped: */
567 		set_fdm_dropped_locks();
568 		return VM_FAULT_SIGBUS;
569 	}
570 
571 	bch2_pagecache_add_get(inode);
572 got_lock:
573 	ret = filemap_fault(vmf);
574 	bch2_pagecache_add_put(inode);
575 
576 	return ret;
577 }
578 
579 vm_fault_t bch2_page_mkwrite(struct vm_fault *vmf)
580 {
581 	struct folio *folio = page_folio(vmf->page);
582 	struct file *file = vmf->vma->vm_file;
583 	struct bch_inode_info *inode = file_bch_inode(file);
584 	struct address_space *mapping = file->f_mapping;
585 	struct bch_fs *c = inode->v.i_sb->s_fs_info;
586 	struct bch2_folio_reservation res;
587 	unsigned len;
588 	loff_t isize;
589 	vm_fault_t ret;
590 
591 	bch2_folio_reservation_init(c, inode, &res);
592 
593 	sb_start_pagefault(inode->v.i_sb);
594 	file_update_time(file);
595 
596 	/*
597 	 * Not strictly necessary, but helps avoid dio writes livelocking in
598 	 * bch2_write_invalidate_inode_pages_range() - can drop this if/when we get
599 	 * a bch2_write_invalidate_inode_pages_range() that works without dropping
600 	 * page lock before invalidating page
601 	 */
602 	bch2_pagecache_add_get(inode);
603 
604 	folio_lock(folio);
605 	isize = i_size_read(&inode->v);
606 
607 	if (folio->mapping != mapping || folio_pos(folio) >= isize) {
608 		folio_unlock(folio);
609 		ret = VM_FAULT_NOPAGE;
610 		goto out;
611 	}
612 
613 	len = min_t(loff_t, folio_size(folio), isize - folio_pos(folio));
614 
615 	if (bch2_folio_set(c, inode_inum(inode), &folio, 1) ?:
616 	    bch2_folio_reservation_get(c, inode, folio, &res, 0, len)) {
617 		folio_unlock(folio);
618 		ret = VM_FAULT_SIGBUS;
619 		goto out;
620 	}
621 
622 	bch2_set_folio_dirty(c, inode, folio, &res, 0, len);
623 	bch2_folio_reservation_put(c, inode, &res);
624 
625 	folio_wait_stable(folio);
626 	ret = VM_FAULT_LOCKED;
627 out:
628 	bch2_pagecache_add_put(inode);
629 	sb_end_pagefault(inode->v.i_sb);
630 
631 	return ret;
632 }
633 
634 void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
635 {
636 	if (offset || length < folio_size(folio))
637 		return;
638 
639 	bch2_clear_folio_bits(folio);
640 }
641 
642 bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
643 {
644 	if (folio_test_dirty(folio) || folio_test_writeback(folio))
645 		return false;
646 
647 	bch2_clear_folio_bits(folio);
648 	return true;
649 }
650 
651 /* fseek: */
652 
653 static int folio_data_offset(struct folio *folio, loff_t pos,
654 			     unsigned min_replicas)
655 {
656 	struct bch_folio *s = bch2_folio(folio);
657 	unsigned i, sectors = folio_sectors(folio);
658 
659 	if (s)
660 		for (i = folio_pos_to_s(folio, pos); i < sectors; i++)
661 			if (s->s[i].state >= SECTOR_dirty &&
662 			    s->s[i].nr_replicas + s->s[i].replicas_reserved >= min_replicas)
663 				return i << SECTOR_SHIFT;
664 
665 	return -1;
666 }
667 
668 loff_t bch2_seek_pagecache_data(struct inode *vinode,
669 				loff_t start_offset,
670 				loff_t end_offset,
671 				unsigned min_replicas,
672 				bool nonblock)
673 {
674 	struct folio_batch fbatch;
675 	pgoff_t start_index	= start_offset >> PAGE_SHIFT;
676 	pgoff_t end_index	= end_offset >> PAGE_SHIFT;
677 	pgoff_t index		= start_index;
678 	unsigned i;
679 	loff_t ret;
680 	int offset;
681 
682 	folio_batch_init(&fbatch);
683 
684 	while (filemap_get_folios(vinode->i_mapping,
685 				  &index, end_index, &fbatch)) {
686 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
687 			struct folio *folio = fbatch.folios[i];
688 
689 			if (!nonblock) {
690 				folio_lock(folio);
691 			} else if (!folio_trylock(folio)) {
692 				folio_batch_release(&fbatch);
693 				return -EAGAIN;
694 			}
695 
696 			offset = folio_data_offset(folio,
697 					max(folio_pos(folio), start_offset),
698 					min_replicas);
699 			if (offset >= 0) {
700 				ret = clamp(folio_pos(folio) + offset,
701 					    start_offset, end_offset);
702 				folio_unlock(folio);
703 				folio_batch_release(&fbatch);
704 				return ret;
705 			}
706 			folio_unlock(folio);
707 		}
708 		folio_batch_release(&fbatch);
709 		cond_resched();
710 	}
711 
712 	return end_offset;
713 }
714 
715 /*
716  * Search for a hole in a folio.
717  *
718  * The filemap layer returns -ENOENT if no folio exists, so reuse the same error
719  * code to indicate a pagecache hole exists at the returned offset. Otherwise
720  * return 0 if the folio is filled with data, or an error code. This function
721  * can return -EAGAIN if nonblock is specified.
722  */
723 static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
724 			      unsigned min_replicas, bool nonblock)
725 {
726 	struct folio *folio;
727 	struct bch_folio *s;
728 	unsigned i, sectors;
729 	int ret = -ENOENT;
730 
731 	folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
732 				    FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
733 	if (IS_ERR(folio))
734 		return PTR_ERR(folio);
735 
736 	s = bch2_folio(folio);
737 	if (!s)
738 		goto unlock;
739 
740 	sectors = folio_sectors(folio);
741 	for (i = folio_pos_to_s(folio, *offset); i < sectors; i++)
742 		if (s->s[i].state < SECTOR_dirty ||
743 		    s->s[i].nr_replicas + s->s[i].replicas_reserved < min_replicas) {
744 			*offset = max(*offset,
745 				      folio_pos(folio) + (i << SECTOR_SHIFT));
746 			goto unlock;
747 		}
748 
749 	*offset = folio_end_pos(folio);
750 	ret = 0;
751 unlock:
752 	folio_unlock(folio);
753 	folio_put(folio);
754 	return ret;
755 }
756 
757 loff_t bch2_seek_pagecache_hole(struct inode *vinode,
758 				loff_t start_offset,
759 				loff_t end_offset,
760 				unsigned min_replicas,
761 				bool nonblock)
762 {
763 	struct address_space *mapping = vinode->i_mapping;
764 	loff_t offset = start_offset;
765 	loff_t ret = 0;
766 
767 	while (!ret && offset < end_offset)
768 		ret = folio_hole_offset(mapping, &offset, min_replicas, nonblock);
769 
770 	if (ret && ret != -ENOENT)
771 		return ret;
772 	return min(offset, end_offset);
773 }
774 
775 int bch2_clamp_data_hole(struct inode *inode,
776 			 u64 *hole_start,
777 			 u64 *hole_end,
778 			 unsigned min_replicas,
779 			 bool nonblock)
780 {
781 	loff_t ret;
782 
783 	ret = bch2_seek_pagecache_hole(inode,
784 		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
785 	if (ret < 0)
786 		return ret;
787 
788 	*hole_start = ret;
789 
790 	if (*hole_start == *hole_end)
791 		return 0;
792 
793 	ret = bch2_seek_pagecache_data(inode,
794 		*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
795 	if (ret < 0)
796 		return ret;
797 
798 	*hole_end = ret;
799 	return 0;
800 }
801 
802 #endif /* NO_BCACHEFS_FS */
803